From 0088363860b6720090a1f2d8a0c020293d142f48 Mon Sep 17 00:00:00 2001 From: Alon Grinberg Dana Date: Mon, 4 Dec 2023 08:21:23 +0200 Subject: [PATCH 01/19] Minor: Style modification in rmgdb tests --- arc/rmgdb_test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/arc/rmgdb_test.py b/arc/rmgdb_test.py index 5a29777f33..3ff298b688 100644 --- a/arc/rmgdb_test.py +++ b/arc/rmgdb_test.py @@ -131,5 +131,6 @@ def test_clean_rmg_database_object(self): self.assertIsNone(self.rmgdb.kinetics) rmgdb.load_rmg_database(rmgdb=self.rmgdb) + if __name__ == '__main__': unittest.main(testRunner=unittest.TextTestRunner(verbosity=2)) From 746a7090a7cf9cd949fd54c0cb2780b4ffdc0662 Mon Sep 17 00:00:00 2001 From: Alon Grinberg Dana Date: Mon, 21 Aug 2023 23:31:46 +0300 Subject: [PATCH 02/19] Minor: Style modifications to AM driver --- arc/mapping/driver.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/arc/mapping/driver.py b/arc/mapping/driver.py index e378eedd69..7bad785bb2 100644 --- a/arc/mapping/driver.py +++ b/arc/mapping/driver.py @@ -40,7 +40,7 @@ def map_reaction(rxn: 'ARCReaction', backend: str = 'ARC', db: Optional['RMGDatabase'] = None, - flip = False + flip: bool = False ) -> Optional[List[int]]: """ Map a reaction. @@ -49,6 +49,7 @@ def map_reaction(rxn: 'ARCReaction', rxn (ARCReaction): An ARCReaction object instance. backend (str, optional): Whether to use ``'QCElemental'`` or ``ARC``'s method as the backend. db (RMGDatabase, optional): The RMG database instance. + flip (bool, optional): Try mapping with a flipped reaction. Returns: Optional[List[int]]: @@ -56,7 +57,8 @@ def map_reaction(rxn: 'ARCReaction', corresponding entry values are running atom indices of the products. """ if flip: - logger.warning(f"The requested ARC reaction {rxn} could not be atom mapped using {backend}. Trying again with the flipped reaction.") + logger.warning(f"The requested ARC reaction {rxn} could not be atom mapped using {backend}. " + f"Trying again with the flipped reaction.") try: _map = flip_map(map_rxn(rxn.flip_reaction(), backend=backend, db=db)) except ValueError: @@ -71,7 +73,7 @@ def map_reaction(rxn: 'ARCReaction', return _map if _map is not None else map_reaction(rxn, backend=backend, db=db, flip=True) try: _map = map_rxn(rxn, backend=backend, db=db) - except ValueError as e: + except ValueError: return map_reaction(rxn, backend=backend, db=db, flip=True) return _map if _map is not None else map_reaction(rxn, backend=backend, db=db, flip=True) @@ -245,7 +247,7 @@ def map_rxn(rxn: 'ARCReaction', # step 2: assign_labels_to_products(rxn, p_label_dict) - #step 3: + # step 3: reactants, products = copy_species_list_for_mapping(rxn.r_species), copy_species_list_for_mapping(rxn.p_species) label_species_atoms(reactants), label_species_atoms(products) @@ -261,7 +263,7 @@ def map_rxn(rxn: 'ARCReaction', r_cuts, p_cuts = update_xyz(r_cuts), update_xyz(p_cuts) - #step 4: + # step 4: pairs_of_reactant_and_products = pairing_reactants_and_products_for_mapping(r_cuts, p_cuts) if len(p_cuts): logger.error(f"Could not find isomorphism for scissored species: {[cut.mol.smiles for cut in p_cuts]}") @@ -269,5 +271,5 @@ def map_rxn(rxn: 'ARCReaction', # step 5: maps = map_pairs(pairs_of_reactant_and_products) - #step 6: + # step 6: return glue_maps(maps, pairs_of_reactant_and_products) From 6ae5dcc285a4e04bf48cbd2f8f796df2fb5169ba Mon Sep 17 00:00:00 2001 From: Alon Grinberg Dana Date: Wed, 13 Sep 2023 14:13:00 +0300 Subject: [PATCH 03/19] Extracted two specialized functions out of map_two_species() --- arc/mapping/engine.py | 144 +++++++++++++++++++++++++++--------------- 1 file changed, 92 insertions(+), 52 deletions(-) diff --git a/arc/mapping/engine.py b/arc/mapping/engine.py index 45d97ce724..8776821f7b 100644 --- a/arc/mapping/engine.py +++ b/arc/mapping/engine.py @@ -267,7 +267,7 @@ def map_two_species(spc_1: Union[ARCSpecies, Species, Molecule], return {0: 0} return [0] - # A shortcut for homonuclear diatomic species. + # A shortcut for homo-nuclear diatomic species. if spc_1.number_of_atoms == spc_2.number_of_atoms == 2 \ and len(set([atom.element.symbol for atom in spc_1.mol.atoms])) == 1: if map_type == 'dict': @@ -288,65 +288,105 @@ def map_two_species(spc_1: Union[ARCSpecies, Species, Molecule], if backend.lower() not in ['qcelemental', 'arc']: raise ValueError(f'The backend method could be either "QCElemental" or "ARC", got {backend}.') - atom_map = None + atom_map = None if backend.lower() == 'arc': + atom_map = map_two_species_via_arc(spc_1=spc_1, + spc_2=spc_2, + map_type=map_type, + consider_chirality=consider_chirality, + ) + if atom_map is None and allow_backend_shift: + backend = 'QCElemental' + if backend.lower() == 'qcelemental': + atom_map = map_two_species_via_qcel(spc_1=spc_1, spc_2=spc_2, map_type=map_type) + + if inc_vals is not None: + atom_map = [value + inc_vals for value in atom_map] + return atom_map + + +def map_two_species_via_arc(spc_1: Union[ARCSpecies, Species, Molecule], + spc_2: Union[ARCSpecies, Species, Molecule], + map_type: str = 'list', + consider_chirality: bool = True, + ) -> Optional[Union[List[int], Dict[int, int]]]: + """ + Map the atoms in spc_1 to the atoms in spc_2 using the ARC 3DAM method. + + Args: + spc_1 (Union[ARCSpecies, Species, Molecule]): Species 1. + spc_2 (Union[ARCSpecies, Species, Molecule]): Species 2. + map_type (str, optional): Whether to return a 'list' or a 'dict' map type. + consider_chirality (bool, optional): Whether to consider chirality when fingerprinting. + + Returns: + Optional[Union[List[int], Dict[int, int]]]: + The atom maps. By default, a list of atom maps is returned. + """ + fingerprint_1 = fingerprint(spc_1, consider_chirality=consider_chirality) + fingerprint_2 = fingerprint(spc_2, consider_chirality=consider_chirality) + candidates = identify_superimposable_candidates(fingerprint_1, fingerprint_2) + if candidates is None or len(candidates) == 0: + consider_chirality = not consider_chirality fingerprint_1 = fingerprint(spc_1, consider_chirality=consider_chirality) fingerprint_2 = fingerprint(spc_2, consider_chirality=consider_chirality) candidates = identify_superimposable_candidates(fingerprint_1, fingerprint_2) if candidates is None or len(candidates) == 0: - consider_chirality = not consider_chirality - fingerprint_1 = fingerprint(spc_1, consider_chirality=consider_chirality) - fingerprint_2 = fingerprint(spc_2, consider_chirality=consider_chirality) - candidates = identify_superimposable_candidates(fingerprint_1, fingerprint_2) - if candidates is None or len(candidates) == 0: - logger.warning(f'Could not identify superimposable candidates {spc_1} and {spc_2}.') - return None - if not len(candidates): - if allow_backend_shift: - backend = 'QCElemental' - else: - return None - else: - rmsds, fixed_spcs = list(), list() - for candidate in candidates: - fixed_spc_1, fixed_spc_2 = fix_dihedrals_by_backbone_mapping(spc_1, spc_2, backbone_map=candidate) - fixed_spcs.append((fixed_spc_1, fixed_spc_2)) - backbone_1, backbone_2 = set(list(candidate.keys())), set(list(candidate.values())) - xyz1, xyz2 = fixed_spc_1.get_xyz(), fixed_spc_2.get_xyz() - xyz1 = xyz_from_data(coords=[xyz1['coords'][i] for i in range(fixed_spc_1.number_of_atoms) if i in backbone_1], - symbols=[xyz1['symbols'][i] for i in range(fixed_spc_1.number_of_atoms) if i in backbone_1], - isotopes=[xyz1['isotopes'][i] for i in range(fixed_spc_1.number_of_atoms) if i in backbone_1]) - xyz2 = xyz_from_data(coords=[xyz2['coords'][i] for i in range(fixed_spc_2.number_of_atoms) if i in backbone_2], - symbols=[xyz2['symbols'][i] for i in range(fixed_spc_2.number_of_atoms) if i in backbone_2], - isotopes=[xyz2['isotopes'][i] for i in range(fixed_spc_2.number_of_atoms) if i in backbone_2]) - no_gap_candidate = remove_gaps_from_values(candidate) - xyz2 = sort_xyz_using_indices(xyz2, indices=[v for k, v in sorted(no_gap_candidate.items(), - key=lambda item: item[0])]) - rmsds.append(compare_confs(xyz1=xyz1, xyz2=xyz2, rmsd_score=True)) - chosen_candidate_index = rmsds.index(min(rmsds)) - fixed_spc_1, fixed_spc_2 = fixed_spcs[chosen_candidate_index] - atom_map = map_hydrogens(fixed_spc_1, fixed_spc_2, candidate) - if map_type == 'list': - atom_map = [v for k, v in sorted(atom_map.items(), key=lambda item: item[0])] - if atom_map is None and allow_backend_shift: - backend = 'QCElemental' - - if backend.lower() == 'qcelemental': - qcmol_1 = create_qc_mol(species=spc_1.copy()) - qcmol_2 = create_qc_mol(species=spc_2.copy()) - if qcmol_1 is None or qcmol_2 is None: + logger.warning(f'Could not identify superimposable candidates {spc_1} and {spc_2}.') return None - if len(qcmol_1.symbols) != len(qcmol_2.symbols): - raise ValueError(f'The number of atoms in spc1 ({spc_1.number_of_atoms}) must be equal ' - f'to the number of atoms in spc1 ({spc_2.number_of_atoms}).') - data = qcmol_2.align(ref_mol=qcmol_1, verbose=0, uno_cutoff=0.01) - atom_map = data[1]['mill'].atommap.tolist() - if map_type == 'dict': - atom_map = {key: val for key, val in enumerate(atom_map)} + if not len(candidates): + return None + rmsds, fixed_spcs = list(), list() + for candidate in candidates: + fixed_spc_1, fixed_spc_2 = fix_dihedrals_by_backbone_mapping(spc_1, spc_2, backbone_map=candidate) + fixed_spcs.append((fixed_spc_1, fixed_spc_2)) + backbone_1, backbone_2 = set(list(candidate.keys())), set(list(candidate.values())) + xyz1, xyz2 = fixed_spc_1.get_xyz(), fixed_spc_2.get_xyz() + xyz1 = xyz_from_data(coords=[xyz1['coords'][i] for i in range(fixed_spc_1.number_of_atoms) if i in backbone_1], + symbols=[xyz1['symbols'][i] for i in range(fixed_spc_1.number_of_atoms) if i in backbone_1], + isotopes=[xyz1['isotopes'][i] for i in range(fixed_spc_1.number_of_atoms) if i in backbone_1]) + xyz2 = xyz_from_data(coords=[xyz2['coords'][i] for i in range(fixed_spc_2.number_of_atoms) if i in backbone_2], + symbols=[xyz2['symbols'][i] for i in range(fixed_spc_2.number_of_atoms) if i in backbone_2], + isotopes=[xyz2['isotopes'][i] for i in range(fixed_spc_2.number_of_atoms) if i in backbone_2]) + no_gap_candidate = remove_gaps_from_values(candidate) + xyz2 = sort_xyz_using_indices(xyz2, indices=[v for k, v in sorted(no_gap_candidate.items(), key=lambda item: item[0])]) + rmsds.append(compare_confs(xyz1=xyz1, xyz2=xyz2, rmsd_score=True)) + chosen_candidate_index = rmsds.index(min(rmsds)) + fixed_spc_1, fixed_spc_2 = fixed_spcs[chosen_candidate_index] + atom_map = map_hydrogens(fixed_spc_1, fixed_spc_2, candidate) + if map_type == 'list': + atom_map = [v for k, v in sorted(atom_map.items(), key=lambda item: item[0])] + return atom_map - if inc_vals is not None: - atom_map = [value + inc_vals for value in atom_map] + +def map_two_species_via_qcel(spc_1: Union[ARCSpecies, Species, Molecule], + spc_2: Union[ARCSpecies, Species, Molecule], + map_type: str = 'list', + ) -> Optional[Union[List[int], Dict[int, int]]]: + """ + Map the atoms in spc_1 to the atoms in spc_2 using the QCElemental method. + + Args: + spc_1 (Union[ARCSpecies, Species, Molecule]): Species 1. + spc_2 (Union[ARCSpecies, Species, Molecule]): Species 2. + map_type (str, optional): Whether to return a 'list' or a 'dict' map type. + + Returns: + Optional[Union[List[int], Dict[int, int]]]: + The atom maps. By default, a list of atom maps is returned. + """ + qcmol_1 = create_qc_mol(species=spc_1.copy()) + qcmol_2 = create_qc_mol(species=spc_2.copy()) + if qcmol_1 is None or qcmol_2 is None: + return None + if len(qcmol_1.symbols) != len(qcmol_2.symbols): + raise ValueError(f'The number of atoms in spc1 ({spc_1.number_of_atoms}) must be equal ' + f'to the number of atoms in spc1 ({spc_2.number_of_atoms}).') + data = qcmol_2.align(ref_mol=qcmol_1, verbose=0, uno_cutoff=0.01) + atom_map = data[1]['mill'].atommap.tolist() + if map_type == 'dict': + atom_map = {key: val for key, val in enumerate(atom_map)} return atom_map From 081110f388cac01c9d3b857b57736c1a529f2791 Mon Sep 17 00:00:00 2001 From: Alon Grinberg Dana Date: Mon, 21 Aug 2023 22:30:44 +0300 Subject: [PATCH 04/19] Added Reaction.is_unimolecular() Also fixed the is_isomerization() method --- arc/reaction.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/arc/reaction.py b/arc/reaction.py index 9ad7a019d5..b27232f02e 100644 --- a/arc/reaction.py +++ b/arc/reaction.py @@ -369,7 +369,18 @@ def is_isomerization(self): Returns: bool: Whether this is an isomerization reaction. """ - return True if len(self.r_species) == 1 and len(self.p_species) == 1 else False + reactants, products = self.get_reactants_and_products() + return len(reactants) == 1 and len(products) == 1 + + def is_unimolecular(self): + """ + Determine whether this is a unimolecular reaction. + + Returns: + bool: Whether this is a unimolecular reaction. + """ + reactants, products = self.get_reactants_and_products() + return len(reactants) == 1 or len(products) == 1 def set_label_reactants_products(self, species_list: Optional[List[ARCSpecies]] = None): """A helper function for settings the label, reactants, and products attributes for a Reaction""" @@ -795,7 +806,7 @@ def get_reactants_and_products(self, else: for i in range(self.get_species_count(species=p_spc, well=1)): products.append(Species(label=p_spc.label, molecule=[p_spc.mol.copy(deep=True) if return_copies - else p_spc.mol])) + else p_spc.mol])) return reactants, products def get_expected_changing_bonds(self, From d675c83029ac656b4b9a8ddd6c59ac330b848a14 Mon Sep 17 00:00:00 2001 From: Alon Grinberg Dana Date: Wed, 15 May 2024 08:43:09 +0300 Subject: [PATCH 05/19] Tests: Reaction.is_unimolecular() --- arc/reaction_test.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/arc/reaction_test.py b/arc/reaction_test.py index 8636a555e7..d277eef9df 100644 --- a/arc/reaction_test.py +++ b/arc/reaction_test.py @@ -152,6 +152,9 @@ def setUpClass(cls): Species(label='NO', smiles='[N]=O')])) cls.rxn11 = ARCReaction(r_species=[ARCSpecies(label='C[CH]C', smiles='C[CH]C', xyz=cls.ch3chch3_xyz)], p_species=[ARCSpecies(label='[CH2]CC', smiles='[CH2]CC', xyz=cls.ch3ch2ch2_xyz)]) + cls.rxn12 = ARCReaction(r_species=[ARCSpecies(label='CH3CH2NH2', smiles='CCN')], + p_species=[ARCSpecies(label='C2H4', smiles='C=C'), + ARCSpecies(label='NH3', smiles='N')]) def test_str(self): """Test the string representation of the object""" @@ -425,6 +428,22 @@ def test_is_isomerization(self): self.assertFalse(self.rxn7.is_isomerization()) self.assertFalse(self.rxn8.is_isomerization()) self.assertFalse(self.rxn9.is_isomerization()) + self.assertTrue(self.rxn11.is_isomerization()) + self.assertFalse(self.rxn12.is_isomerization()) + + def test_is_unimolecular(self): + """Test the is_unimolecular() method""" + self.assertFalse(self.rxn1.is_unimolecular()) + self.assertFalse(self.rxn2.is_unimolecular()) + self.assertTrue(self.rxn3.is_unimolecular()) + self.assertFalse(self.rxn4.is_unimolecular()) + self.assertFalse(self.rxn5.is_unimolecular()) + self.assertFalse(self.rxn6.is_unimolecular()) + self.assertFalse(self.rxn7.is_unimolecular()) + self.assertFalse(self.rxn8.is_unimolecular()) + self.assertTrue(self.rxn9.is_unimolecular()) + self.assertTrue(self.rxn11.is_unimolecular()) + self.assertTrue(self.rxn12.is_unimolecular()) def test_from_rmg_reaction(self): """Test setting up an ARCReaction from an RMG Reaction""" From ef6fea423c73f32f91fb14d0251da3a80f5aa218 Mon Sep 17 00:00:00 2001 From: Alon Grinberg Dana Date: Mon, 21 Aug 2023 23:27:37 +0300 Subject: [PATCH 06/19] Added an atom_order arg to xyz_to_zmat() --- arc/species/zmat.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arc/species/zmat.py b/arc/species/zmat.py index aba7c95a5b..418d53b36f 100644 --- a/arc/species/zmat.py +++ b/arc/species/zmat.py @@ -60,6 +60,7 @@ def xyz_to_zmat(xyz: Dict[str, tuple], consolidate: bool = True, consolidation_tols: Dict[str, float] = None, fragments: Optional[List[List[int]]] = None, + atom_order: Optional[List[int]] = None, ) -> Dict[str, tuple]: """ Generate a z-matrix from cartesian coordinates. @@ -96,6 +97,7 @@ def xyz_to_zmat(xyz: Dict[str, tuple], Fragments represented by the species, i.e., as in a VdW well or a TS. Entries are atom index lists of all atoms in a fragment, each list represents a different fragment. indices are 0-indexed. + atom_order (List[int], optional): The order in which atoms should be added to the zmat. Raises: ZMatError: If the zmat could not be generated. @@ -118,7 +120,7 @@ def xyz_to_zmat(xyz: Dict[str, tuple], f'coordinates with only {len(xyz["symbols"])} atoms:\n{constraints}') xyz = xyz.copy() zmat = {'symbols': list(), 'coords': list(), 'vars': dict(), 'map': dict()} - atom_order = get_atom_order(xyz=xyz, mol=mol, constraints_dict=constraints, fragments=fragments) + atom_order = atom_order or get_atom_order(xyz=xyz, mol=mol, constraints_dict=constraints, fragments=fragments) connectivity = get_connectivity(mol=mol) if mol is not None else None skipped_atoms = list() # atoms for which constrains are applied for atom_index in atom_order: From 4c4f086ce0b8823b3831fc94a2945fc1bb91f689 Mon Sep 17 00:00:00 2001 From: Alon Grinberg Dana Date: Mon, 21 Aug 2023 23:28:04 +0300 Subject: [PATCH 07/19] Tests: xyz_to_zmat() Also testing the atom_order arg --- arc/species/converter_test.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/arc/species/converter_test.py b/arc/species/converter_test.py index a6c4564f00..a77f92ffde 100644 --- a/arc/species/converter_test.py +++ b/arc/species/converter_test.py @@ -1297,6 +1297,41 @@ def test_zmat_from_xyz(self): 'R_9_3': 0.9741224704818748}} self.assertTrue(_compare_zmats(zmat_8, expected_zmat_8)) + def test_xyz_to_zmat(self): + """Check folding xyz into a zmat""" + h2nn = {'symbols': ('N', 'N', 'H', 'H'), + 'isotopes': (14, 14, 1, 1), + 'coords': ((1.3546347608168492, -0.015322539977107492, -0.015327345703300993), + (-0.0986192196858452, 0.0011155018627852027, 0.0011158328655407426), + (-0.6378749227822363, -0.8648316328267205, 0.0067050159766062715), + (-0.6181406183487707, 0.8790386709410358, 0.007506496861156013))} + zmat = xyz_to_zmat(h2nn) + expected_zmat = {'symbols': ('N', 'N', 'H', 'H'), + 'coords': ((None, None, None), ('R_1_0', None, None), + ('R_2_1', 'A_2_1_0', None), ('R_3_2', 'A_3_2_0', 'D_3_2_0_1')), + 'vars': {'R_1_0': 1.453439904003661, 'R_2_1': 1.0201432886642632, 'A_2_1_0': 121.26532344550412, + 'R_3_2': 1.7439821177668233, 'A_3_2_0': 66.26220791342335, 'D_3_2_0_1': 359.99999758516344}, + 'map': {0: 0, 1: 1, 2: 2, 3: 3}} + self.assertTrue(_compare_zmats(zmat, expected_zmat)) + + zmat = xyz_to_zmat(h2nn, atom_order=[2, 3, 0, 1]) + expected_zmat = {'symbols': ('H', 'H', 'N', 'N'), + 'coords': ((None, None, None), ('R_1_0', None, None), + ('R_2_1', 'A_2_1_0', None), ('R_3_2', 'A_3_2_0', 'D_3_2_0_1')), + 'vars': {'R_1_0': 1.7439821177668233, 'R_2_1': 2.166159374808962, 'A_2_1_0': 66.26220397737823, + 'R_3_2': 1.453439904003661, 'A_3_2_0': 23.737787276875004, 'D_3_2_0_1': 359.99999758516344}, + 'map': {0: 2, 1: 3, 2: 0, 3: 1}} + self.assertTrue(_compare_zmats(zmat, expected_zmat)) + + zmat = xyz_to_zmat(h2nn, atom_order=[1, 3, 2, 0]) + expected_zmat = {'symbols': ('N', 'H', 'H', 'N'), + 'coords': ((None, None, None), ('R_1_0', None, None), + ('R_2_1', 'A_2_1_0', None), ('R_3_2', 'A_3_2_0', 'D_3_2_0_1')), + 'vars': {'R_1_0': 1.0201433470919798, 'R_2_1': 1.7439821177668233, 'A_2_1_0': 31.265321828101055, + 'R_3_2': 2.1661591546787227, 'A_3_2_0': 34.99688237878201, 'D_3_2_0_1': 180.0000041826196}, + 'map': {0: 1, 1: 3, 2: 2, 3: 0}} + self.assertTrue(_compare_zmats(zmat, expected_zmat)) + def test_zmat_to_xyz(self): """Check refolding a zmat into cartesian coordinates""" co3 = {'symbols': ('O', 'O', 'O'), 'isotopes': (16, 16, 16), From 00194cafef56334ebdb90ee828f9652378daf86c Mon Sep 17 00:00:00 2001 From: Alon Grinberg Dana Date: Mon, 21 Aug 2023 23:28:35 +0300 Subject: [PATCH 08/19] Added 'linear' to ts_adapters_by_rmg_family --- arc/job/adapters/common.py | 86 ++++++++++++++++++++------------------ 1 file changed, 46 insertions(+), 40 deletions(-) diff --git a/arc/job/adapters/common.py b/arc/job/adapters/common.py index a351d4c01d..45ecd2d19d 100644 --- a/arc/job/adapters/common.py +++ b/arc/job/adapters/common.py @@ -27,48 +27,54 @@ settings['default_job_settings'], settings['global_ess_settings'], settings['rotor_scan_resolution'] -ts_adapters_by_rmg_family = {'1+2_Cycloaddition': ['kinbot'], - '1,2_Insertion_CO': ['kinbot'], - '1,2_Insertion_carbene': ['kinbot'], - '1,2_shiftC': ['gcn', 'xtb_gsm'], - '1,2_shiftS': ['gcn', 'kinbot', 'xtb_gsm'], - '1,3_Insertion_CO2': ['kinbot'], - '1,3_Insertion_ROR': ['kinbot'], - '1,3_Insertion_RSR': ['kinbot'], - '1,4_Cyclic_birad_scission': ['gcn', 'xtb_gsm'], - '2+2_cycloaddition': ['kinbot'], - '6_membered_central_C-C_shift': ['gcn', 'xtb_gsm'], - 'Concerted_Intra_Diels_alder_monocyclic_1,2_shiftH': ['gcn', 'xtb_gsm'], - 'Cyclic_Ether_Formation': ['kinbot'], - 'Cyclopentadiene_scission': ['gcn', 'xtb_gsm'], - 'Diels_alder_addition': ['kinbot'], +ts_adapters_by_rmg_family = {'1+2_Cycloaddition': ['kinbot', 'linear'], + '1,2_Insertion_CO': ['kinbot', 'linear'], + '1,2_Insertion_carbene': ['kinbot', 'linear'], + '1,2_shiftC': ['gcn', 'xtb_gsm', 'linear'], + '1,2_shiftS': ['gcn', 'kinbot', 'xtb_gsm', 'linear'], + '1,3_Insertion_CO2': ['kinbot', 'linear'], + '1,3_Insertion_ROR': ['kinbot', 'linear'], + '1,3_Insertion_RSR': ['kinbot', 'linear'], + '1,3_NH3_elimination': ['linear'], + '1,3_sigmatropic_rearrangement': ['linear'], + '1,4_Cyclic_birad_scission': ['gcn', 'xtb_gsm', 'linear'], + '1,4_Linear_birad_scission': ['linear'], + '2+2_cycloaddition': ['kinbot', 'linear'], + '6_membered_central_C-C_shift': ['gcn', 'xtb_gsm', 'linear'], + 'Birad_recombination': ['linear'], + 'Concerted_Intra_Diels_alder_monocyclic_1,2_shiftH': ['gcn', 'xtb_gsm', 'linear'], + 'Cyclic_Ether_Formation': ['kinbot', 'linear'], + 'Cyclic_Thioether_Formation': ['linear'], + 'Cyclopentadiene_scission': ['gcn', 'xtb_gsm', 'linear'], + 'Diels_alder_addition': ['kinbot', 'linear'], 'H_Abstraction': ['heuristics', 'autotst'], - 'HO2_Elimination_from_PeroxyRadical': ['kinbot'], - 'Intra_2+2_cycloaddition_Cd': ['gcn', 'xtb_gsm'], - 'Intra_5_membered_conjugated_C=C_C=C_addition': ['gcn', 'xtb_gsm'], - 'Intra_Diels_alder_monocyclic': ['gcn', 'kinbot', 'xtb_gsm'], - 'Intra_Disproportionation': ['gcn', 'xtb_gsm'], - 'Intra_ene_reaction': ['gcn', 'kinbot', 'xtb_gsm'], - 'intra_H_migration': ['autotst', 'gcn', 'kinbot', 'xtb_gsm'], - 'intra_NO2_ONO_conversion': ['gcn', 'xtb_gsm'], - 'intra_OH_migration': ['gcn', 'kinbot', 'xtb_gsm'], - 'Intra_RH_Add_Endocyclic': ['gcn', 'kinbot', 'xtb_gsm'], - 'Intra_RH_Add_Exocyclic': ['gcn', 'kinbot', 'xtb_gsm'], - 'Intra_R_Add_Endocyclic': ['gcn', 'kinbot', 'xtb_gsm'], - 'Intra_R_Add_Exo_scission': ['gcn', 'xtb_gsm'], - 'Intra_R_Add_Exocyclic': ['gcn', 'kinbot', 'xtb_gsm'], - 'Intra_R_Add_ExoTetCyclic': ['kinbot'], - 'Intra_Retro_Diels_alder_bicyclic': ['kinbot'], - 'intra_substitutionCS_isomerization': ['gcn', 'xtb_gsm'], - 'intra_substitutionS_isomerization': ['gcn', 'xtb_gsm'], - 'Ketoenol': ['gcn', 'kinbot', 'xtb_gsm'], - 'Korcek_step1': ['gcn', 'xtb_gsm'], - 'Korcek_step2': ['kinbot'], - 'R_Addition_COm': ['kinbot'], - 'R_Addition_CSm': ['kinbot'], - 'R_Addition_MultipleBond': ['autotst', 'kinbot'], + 'HO2_Elimination_from_PeroxyRadical': ['kinbot', 'linear'], + 'Intra_2+2_cycloaddition_Cd': ['gcn', 'xtb_gsm', 'linear'], + 'Intra_5_membered_conjugated_C=C_C=C_addition': ['gcn', 'xtb_gsm', 'linear'], + 'Intra_Diels_alder_monocyclic': ['gcn', 'kinbot', 'xtb_gsm', 'linear'], + 'Intra_Disproportionation': ['gcn', 'xtb_gsm', 'linear'], + 'Intra_ene_reaction': ['gcn', 'kinbot', 'xtb_gsm', 'linear'], + 'intra_H_migration': ['autotst', 'gcn', 'kinbot', 'xtb_gsm', 'linear'], + 'intra_NO2_ONO_conversion': ['gcn', 'xtb_gsm', 'linear'], + 'intra_OH_migration': ['gcn', 'kinbot', 'xtb_gsm', 'linear'], + 'Intra_RH_Add_Endocyclic': ['gcn', 'kinbot', 'xtb_gsm', 'linear'], + 'Intra_RH_Add_Exocyclic': ['gcn', 'kinbot', 'xtb_gsm', 'linear'], + 'Intra_R_Add_Endocyclic': ['gcn', 'kinbot', 'xtb_gsm', 'linear'], + 'Intra_R_Add_Exo_scission': ['gcn', 'xtb_gsm', 'linear'], + 'Intra_R_Add_Exocyclic': ['gcn', 'kinbot', 'xtb_gsm', 'linear'], + 'Intra_R_Add_ExoTetCyclic': ['kinbot', 'linear'], + 'Intra_Retro_Diels_alder_bicyclic': ['kinbot', 'linear'], + 'intra_substitutionCS_cyclization': ['linear'], + 'intra_substitutionS_cyclization': ['linear'], + 'intra_substitutionS_isomerization': ['gcn', 'xtb_gsm', 'linear'], + 'Ketoenol': ['gcn', 'kinbot', 'xtb_gsm', 'linear'], + 'Korcek_step1': ['gcn', 'xtb_gsm', 'linear'], + 'Korcek_step2': ['kinbot', 'linear'], + 'R_Addition_COm': ['kinbot', 'linear'], + 'R_Addition_CSm': ['kinbot', 'linear'], + 'R_Addition_MultipleBond': ['autotst', 'kinbot', 'linear'], 'Retroene': ['kinbot'], - 'Singlet_Carbene_Intra_Disproportionation': ['gcn', 'xtb_gsm'], + 'Singlet_Carbene_Intra_Disproportionation': ['gcn', 'xtb_gsm', 'linear'], } all_families_ts_adapters = [] From d2afabc4a8a50fe4f7a94ab0b10d07df04ecf5b7 Mon Sep 17 00:00:00 2001 From: Alon Grinberg Dana Date: Mon, 21 Aug 2023 23:50:57 +0300 Subject: [PATCH 09/19] Added check_ordered_zmats() --- arc/species/zmat.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/arc/species/zmat.py b/arc/species/zmat.py index 418d53b36f..2cc3519775 100644 --- a/arc/species/zmat.py +++ b/arc/species/zmat.py @@ -2094,3 +2094,19 @@ def map_index_to_int(index: Union[int, str]) -> int: if isinstance(index, str) and all(char.isdigit() for char in index[1:]): return int(index[1:]) raise TypeError(f'Expected either an int or a string on the format "X15", got {index}') + + +def check_ordered_zmats(zmat_1: dict, + zmat_2: dict, + ) -> bool: + """ + Check whether the ZMats have the same order of atoms and the same variable names. + + Args: + zmat_1 (dict): ZMat 1. + zmat_2 (dict): ZMat 2. + + Returns: + bool: Whether the ZMats are ordered. + """ + return zmat_1['symbols'] == zmat_2['symbols'] and zmat_1['vars'].keys() == zmat_2['vars'].keys() From 13e5a033e21b29373f5dc50e3a1343c2503076f1 Mon Sep 17 00:00:00 2001 From: Alon Grinberg Dana Date: Mon, 21 Aug 2023 23:51:10 +0300 Subject: [PATCH 10/19] Tests: check_ordered_zmats() --- arc/species/zmat_test.py | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/arc/species/zmat_test.py b/arc/species/zmat_test.py index 86140ddeff..9e71af78ac 100644 --- a/arc/species/zmat_test.py +++ b/arc/species/zmat_test.py @@ -1815,5 +1815,45 @@ def test_map_index_to_int(self): with self.assertRaises(TypeError): zmat.map_index_to_int('XY5486') + def test_check_ordered_zmats(self): + """Test the check_ordered_zmats() function.""" + zmat_1 = {'symbols': ('H', 'C', 'H', 'H', 'H'), + 'coords': ((None, None, None), + ('R_1_0', None, None), + ('R_2_1', 'A_2_1_0', None), + ('R_3_1', 'A_3_1_2', 'D_3_1_2_0'), + ('R_4_1', 'A_4_1_0', 'D_4_1_0_3')), + 'vars': {'R_1_0': 1.3106392449517583, 'R_2_1': 1.0921994253661749, 'A_2_1_0': 109.47121834780573, + 'R_3_1': 1.092199370793132, 'A_3_1_2': 109.47122048587586, 'D_3_1_2_0': 120.0000002999208, + 'R_4_1': 1.0921994253661749, 'A_4_1_0': 109.47122150322166, 'D_4_1_0_3': 239.99999891956398}, + 'map': {0: 4, 1: 0, 2: 1, 3: 2, 4: 3}} + zmat_2 = {'symbols': ('C', 'H', 'H', 'H', 'H'), + 'coords': ((None, None, None), + ('R_1_0', None, None), + ('R_2_1', 'A_2_1_0', None), + ('R_3_1', 'A_3_1_2', 'D_3_1_2_0'), + ('R_4_1', 'A_4_1_0', 'D_4_1_0_3')), + 'vars': {'R_1_0': 1.3106392449517583, 'R_2_1': 1.0921994253661749, 'A_2_1_0': 109.47121834780573, + 'R_3_1': 1.092199370793132, 'A_3_1_2': 109.47122048587586, 'D_3_1_2_0': 120.0000002999208, + 'R_4_1': 1.0921994253661749, 'A_4_1_0': 109.47122150322166, 'D_4_1_0_3': 239.99999891956398}, + 'map': {0: 4, 1: 0, 2: 1, 3: 2, 4: 3}} + zmat_3 = {'symbols': ('H', 'C', 'H', 'H', 'H'), + 'coords': ((None, None, None), + ('R_1_0', None, None), + ('R_2_1', 'A_2_1_0', None), + ('R_3_1', 'A_3_1_2', 'D_3_1_2_0'), + ('R_4_1', 'A_4_1_0', 'D_4_1_0_3')), + 'vars': {'R_1_0': 2.0, 'R_2_1': 5, 'A_2_1_0': 200, + 'R_3_1': 1.2, 'A_3_1_2': 80, 'D_3_1_2_0': 90, + 'R_4_1': 0.8, 'A_4_1_0': 150, 'D_4_1_0_3': 250}, + 'map': {0: 4, 1: 0, 2: 1, 3: 2, 4: 3}} + self.assertTrue(zmat.check_ordered_zmats(zmat_1, zmat_1)) + self.assertTrue(zmat.check_ordered_zmats(zmat_2, zmat_2)) + self.assertTrue(zmat.check_ordered_zmats(zmat_3, zmat_3)) + self.assertFalse(zmat.check_ordered_zmats(zmat_1, zmat_2)) + self.assertFalse(zmat.check_ordered_zmats(zmat_2, zmat_3)) + self.assertTrue(zmat.check_ordered_zmats(zmat_1, zmat_3)) + + if __name__ == '__main__': unittest.main(testRunner=unittest.TextTestRunner(verbosity=2)) From de6625bc32c298f72e2f0c165cc90b10739435fd Mon Sep 17 00:00:00 2001 From: Alon Grinberg Dana Date: Tue, 22 Aug 2023 00:06:26 +0300 Subject: [PATCH 11/19] Added linear to JobEnum --- arc/job/adapter.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arc/job/adapter.py b/arc/job/adapter.py index 29d83fc97c..c632f23cd6 100644 --- a/arc/job/adapter.py +++ b/arc/job/adapter.py @@ -96,9 +96,10 @@ class JobEnum(str, Enum): # TS search methods autotst = 'autotst' # AutoTST, 10.1021/acs.jpca.7b07361, 10.26434/chemrxiv.13277870.v2 + gcn = 'gcn' # Graph neural network for isomerization, https://doi.org/10.1021/acs.jpclett.0c00500 heuristics = 'heuristics' # ARC's heuristics kinbot = 'kinbot' # KinBot, 10.1016/j.cpc.2019.106947 - gcn = 'gcn' # Graph neural network for isomerization, https://doi.org/10.1021/acs.jpclett.0c00500 + linear = 'linear' # ARC's linear TS search user = 'user' # user guesses xtb_gsm = 'xtb_gsm' # Double ended growing string method (DE-GSM), [10.1021/ct400319w, 10.1063/1.4804162] via xTB From 87b41752022f2af93b4be262c24c3ff766d6445e Mon Sep 17 00:00:00 2001 From: Alon Grinberg Dana Date: Wed, 15 May 2024 10:00:42 +0300 Subject: [PATCH 12/19] Added the Linear TS search job adapter --- arc/job/adapters/ts/linear.py | 397 ++++++++++++++++++++++++++++++++++ 1 file changed, 397 insertions(+) create mode 100644 arc/job/adapters/ts/linear.py diff --git a/arc/job/adapters/ts/linear.py b/arc/job/adapters/ts/linear.py new file mode 100644 index 0000000000..ccf958cbec --- /dev/null +++ b/arc/job/adapters/ts/linear.py @@ -0,0 +1,397 @@ +""" +An adapter for executing TS guess jobs based on linear interpolation of internal coordinate values. +""" + +import copy +import datetime +from typing import TYPE_CHECKING, List, Optional, Tuple, Union + +from arc.common import almost_equal_coords, get_logger +from arc.job.adapter import JobAdapter +from arc.job.adapters.common import _initialize_adapter, ts_adapters_by_rmg_family +from arc.job.factory import register_job_adapter +from arc.plotter import save_geo +from arc.species.converter import zmat_to_xyz +from arc.species.species import ARCSpecies, TSGuess, colliding_atoms +from arc.species.zmat import check_ordered_zmats, xyz_to_zmat + +if TYPE_CHECKING: + from arc.level import Level + from arc.reaction import ARCReaction + + +DIHEDRAL_INCREMENT = 30 + +logger = get_logger() + + +class LinearAdapter(JobAdapter): + """ + A class for executing TS guess jobs based on linear interpolation of internal coordinate values. + + Args: + project (str): The project's name. Used for setting the remote path. + project_directory (str): The path to the local project directory. + job_type (list, str): The job's type, validated against ``JobTypeEnum``. If it's a list, pipe.py will be called. + args (dict, optional): Methods (including troubleshooting) to be used in input files. + Keys are either 'keyword', 'block', or 'trsh', values are dictionaries with values + to be used either as keywords or as blocks in the respective software input file. + If 'trsh' is specified, an action might be taken instead of appending a keyword or a + block to the input file (e.g., change server or change scan resolution). + bath_gas (str, optional): A bath gas. Currently only used in OneDMin to calculate L-J parameters. + checkfile (str, optional): The path to a previous Gaussian checkfile to be used in the current job. + conformer (int, optional): Conformer number if optimizing conformers. + constraints (list, optional): A list of constraints to use during an optimization or scan. + cpu_cores (int, optional): The total number of cpu cores requested for a job. + dihedral_increment (float, optional): The degrees increment to use when scanning dihedrals of TS guesses. + dihedrals (List[float], optional): The dihedral angels corresponding to self.torsions. + directed_scan_type (str, optional): The type of the directed scan. + ess_settings (dict, optional): A dictionary of available ESS and a corresponding server list. + ess_trsh_methods (List[str], optional): A list of troubleshooting methods already tried out. + execution_type (str, optional): The execution type, 'incore', 'queue', or 'pipe'. + fine (bool, optional): Whether to use fine geometry optimization parameters. Default: ``False``. + initial_time (datetime.datetime or str, optional): The time at which this job was initiated. + irc_direction (str, optional): The direction of the IRC job (`forward` or `reverse`). + job_id (int, optional): The job's ID determined by the server. + job_memory_gb (int, optional): The total job allocated memory in GB (14 by default). + job_name (str, optional): The job's name (e.g., 'opt_a103'). + job_num (int, optional): Used as the entry number in the database, as well as in ``job_name``. + job_server_name (str, optional): Job's name on the server (e.g., 'a103'). + job_status (list, optional): The job's server and ESS statuses. + level (Level, optionnal): The level of theory to use. + max_job_time (float, optional): The maximal allowed job time on the server in hours (can be fractional). + reactions (List[ARCReaction], optional): Entries are ARCReaction instances, used for TS search methods. + rotor_index (int, optional): The 0-indexed rotor number (key) in the species.rotors_dict dictionary. + server (str): The server to run on. + server_nodes (list, optional): The nodes this job was previously submitted to. + species (List[ARCSpecies], optional): Entries are ARCSpecies instances. + Either ``reactions`` or ``species`` must be given. + testing (bool, optional): Whether the object is generated for testing purposes, ``True`` if it is. + times_rerun (int, optional): Number of times this job was re-run with the same arguments (no trsh methods). + torsions (List[List[int]], optional): The 0-indexed atom indices of the torsion(s). + tsg (int, optional): TSGuess number if optimizing TS guesses. + xyz (dict, optional): The 3D coordinates to use. If not give, species.get_xyz() will be used. + """ + + def __init__(self, + project: str, + project_directory: str, + job_type: Union[List[str], str], + args: Optional[dict] = None, + bath_gas: Optional[str] = None, + checkfile: Optional[str] = None, + conformer: Optional[int] = None, + constraints: Optional[List[Tuple[List[int], float]]] = None, + cpu_cores: Optional[str] = None, + dihedral_increment: Optional[float] = None, + dihedrals: Optional[List[float]] = None, + directed_scan_type: Optional[str] = None, + ess_settings: Optional[dict] = None, + ess_trsh_methods: Optional[List[str]] = None, + execution_type: Optional[str] = None, + fine: bool = False, + initial_time: Optional[Union['datetime.datetime', str]] = None, + irc_direction: Optional[str] = None, + job_id: Optional[int] = None, + job_memory_gb: float = 14.0, + job_name: Optional[str] = None, + job_num: Optional[int] = None, + job_server_name: Optional[str] = None, + job_status: Optional[List[Union[dict, str]]] = None, + level: Optional['Level'] = None, + max_job_time: Optional[float] = None, + reactions: Optional[List['ARCReaction']] = None, + rotor_index: Optional[int] = None, + server: Optional[str] = None, + server_nodes: Optional[list] = None, + species: Optional[List[ARCSpecies]] = None, + testing: bool = False, + times_rerun: int = 0, + torsions: Optional[List[List[int]]] = None, + tsg: Optional[int] = None, + xyz: Optional[dict] = None, + ): + + self.incore_capacity = 50 + self.job_adapter = 'linear' + self.command = None + self.execution_type = execution_type or 'incore' + + if reactions is None: + raise ValueError('Cannot execute TS Linear without ARCReaction object(s).') + + _initialize_adapter(obj=self, + is_ts=True, + project=project, + project_directory=project_directory, + job_type=job_type, + args=args, + bath_gas=bath_gas, + checkfile=checkfile, + conformer=conformer, + constraints=constraints, + cpu_cores=cpu_cores, + dihedral_increment=dihedral_increment, + dihedrals=dihedrals, + directed_scan_type=directed_scan_type, + ess_settings=ess_settings, + ess_trsh_methods=ess_trsh_methods, + fine=fine, + initial_time=initial_time, + irc_direction=irc_direction, + job_id=job_id, + job_memory_gb=job_memory_gb, + job_name=job_name, + job_num=job_num, + job_server_name=job_server_name, + job_status=job_status, + level=level, + max_job_time=max_job_time, + reactions=reactions, + rotor_index=rotor_index, + server=server, + server_nodes=server_nodes, + species=species, + testing=testing, + times_rerun=times_rerun, + torsions=torsions, + tsg=tsg, + xyz=xyz, + ) + + def write_input_file(self) -> None: + """ + Write the input file to execute the job on the server. + """ + pass + + def set_files(self) -> None: + """ + Set files to be uploaded and downloaded. Writes the files if needed. + Modifies the self.files_to_upload and self.files_to_download attributes. + + self.files_to_download is a list of remote paths. + + self.files_to_upload is a list of dictionaries, each with the following keys: + ``'name'``, ``'source'``, ``'make_x'``, ``'local'``, and ``'remote'``. + If ``'source'`` = ``'path'``, then the value in ``'local'`` is treated as a file path. + Else if ``'source'`` = ``'input_files'``, then the value in ``'local'`` will be taken + from the respective entry in inputs.py + If ``'make_x'`` is ``True``, the file will be made executable. + """ + pass + + def set_additional_file_paths(self) -> None: + """ + Set additional file paths specific for the adapter. + Called from set_file_paths() and extends it. + """ + pass + + def set_input_file_memory(self) -> None: + """ + Set the input_file_memory attribute. + """ + pass + + def execute_incore(self): + """ + Execute a job incore. + """ + self._log_job_execution() + self.initial_time = self.initial_time if self.initial_time else datetime.datetime.now() + + supported_families = [key for key, val in ts_adapters_by_rmg_family.items() if 'linear' in val] + + self.reactions = [self.reactions] if not isinstance(self.reactions, list) else self.reactions + for rxn in self.reactions: + family_label = rxn.family.label + if family_label not in supported_families or not rxn.is_unimolecular(): + logger.warning(f'The heuristics TS search adapter does not support the {family_label} reaction family.') + continue + if any(spc.get_xyz() is None for spc in rxn.r_species + rxn.p_species): + logger.warning(f'The linear TS search adapter cannot process a reaction if 3D coordinates of ' + f'some/all of its reactants/products are missing.\nNot processing {rxn}.') + continue + + rxn.ts_species = rxn.ts_species or ARCSpecies(label='TS', + is_ts=True, + charge=rxn.charge, + multiplicity=rxn.multiplicity, + ) + + t0_0 = datetime.datetime.now() + xyz_0 = interpolate(rxn=rxn, use_weights=False) + t_ex_0 = datetime.datetime.now() - t0_0 + + t0_1 = datetime.datetime.now() + xyz_1 = interpolate(rxn=rxn, use_weights=True) + t_ex_1 = datetime.datetime.now() - t0_1 + + for method_index, (xyz, t0, t_ex) in enumerate(zip([xyz_0, xyz_1], [t0_0, t0_1], [t_ex_0, t_ex_1])): + if xyz is None: + continue + if colliding_atoms(xyz): + continue + unique = True + for other_tsg in rxn.ts_species.ts_guesses: + if almost_equal_coords(xyz, other_tsg.initial_xyz): + if 'linear' not in other_tsg.method.lower(): + other_tsg.method += f' and Linear {method_index}' + unique = False + break + if unique: + ts_guess = TSGuess(method=f'linear {method_index}', + index=len(rxn.ts_species.ts_guesses), + method_index=method_index, + t0=t0, + execution_time=t_ex, + success=True, + family=family_label, + xyz=xyz, + ) + rxn.ts_species.ts_guesses.append(ts_guess) + save_geo(xyz=xyz, + path=self.local_path, + filename=f'Linear {method_index}', + format_='xyz', + comment=f'Linear {method_index}, family: {family_label}', + ) + + if len(self.reactions) < 5: + successes = len([tsg for tsg in rxn.ts_species.ts_guesses if tsg.success and 'linear' in tsg.method]) + if successes: + logger.info(f'Linear successfully found {successes} TS guesses for {rxn.label}.') + else: + logger.info(f'Linear did not find any successful TS guesses for {rxn.label}.') + + self.final_time = datetime.datetime.now() + + def execute_queue(self): + """ + (Execute a job to the server's queue.) + A single Heuristics job will always be executed incore. + """ + self.execute_incore() + + +def interpolate(rxn: 'ARCReaction', + use_weights: bool = False, + ) -> Optional[dict]: + """ + Search for a TS by interpolating internal coords. + + Args: + rxn (ARCReaction): The reaction to process. + use_weights (bool, optional): Whether to use the well energies to determine relative interpolation weights. + + Returns: + Optional[dict]: The XYZ coordinates guess. + """ + if rxn.is_isomerization(): + return interpolate_isomerization(rxn=rxn, use_weights=use_weights) + return None + + +def interpolate_isomerization(rxn: 'ARCReaction', + use_weights: bool = False, + ) -> Optional[dict]: + """ + Search for a TS of an isomerization reaction by interpolating internal coords. + + Args: + rxn (ARCReaction): The reaction to process. + use_weights (bool, optional): Whether to use the well energies to determine relative interpolation weights. + + Returns: + Optional[dict]: The XYZ coordinates guess. + """ + rxn.r_species[0].get_xyz() + rxn.p_species[0].get_xyz() + r_zmat = xyz_to_zmat(xyz=rxn.r_species[0].get_xyz(), + consolidate=False, + atom_order=list(range(sum(r.number_of_atoms for r in rxn.r_species)))) + p_zmat = xyz_to_zmat(xyz=rxn.p_species[0].get_xyz(), + consolidate=False, + atom_order=rxn.atom_map) + weight = get_rxn_weight(rxn) if use_weights else 0.5 + if weight is None: + return None + ts_zmat = average_zmat_params(zmat_1=r_zmat, zmat_2=p_zmat, weight=weight) + if ts_zmat is None: + return None + return zmat_to_xyz(ts_zmat) + + +def average_zmat_params(zmat_1: dict, + zmat_2: dict, + weight: float = 0.5, + ) -> Optional[dict]: + """ + Average internal coordinates using a weight. + + Args: + zmat_1 (dict): ZMat 1. + zmat_2 (dict): ZMat 2. + weight (float, optional): The weight to use on a scale of 0 (the reactant) to 1 (the product). + A value of 0.5 means exactly in the middle. + + Returns: + Optional[dict]: The weighted average ZMat. + """ + if not check_ordered_zmats(zmat_1, zmat_2) or weight < 0 or weight > 1: + return None + ts_zmat = copy.deepcopy(zmat_1) + ts_zmat['vars'] = dict() + for key in zmat_1['vars'].keys(): + ts_zmat['vars'][key] = zmat_1['vars'][key] + weight * (zmat_2['vars'][key] - zmat_1['vars'][key]) + return ts_zmat + + +def get_rxn_weight(rxn: 'ARCReaction') -> Optional[float]: + """ + Get ratio between the activation energy (reactants to TS) to the overall energy path (reactants to TS to products). + + Args: + rxn (ARCReaction): The reaction to process. + + Returns: + float: The reaction weight. + """ + reactants, products = rxn.get_reactants_and_products(arc=True, return_copies=False) + r_e0 = [r.e0 for r in reactants] + p_e0 = [p.e0 for p in products] + ts_e0 = rxn.ts_species.e0 + if any(entry is None for entry in r_e0 + p_e0 + [ts_e0]): + r_ee = [r.e_elect for r in reactants] + p_ee = [p.e_elect for p in products] + ts_ee = rxn.ts_species.e_elect + if any(entry is None for entry in r_e0 + p_e0 + [ts_e0]): + return None + return get_weight(r_ee, p_ee, ts_ee) + return get_weight(r_e0, p_e0, ts_e0) + + +def get_weight(r_e: List[Optional[float]], + p_e: List[Optional[float]], + ts_e: Optional[float], + ) -> Optional[float]: + """ + Get the path ratio of reactants-TS to reactants-TS-products. + + Args: + r_e (List[float]): Reactant energies. + p_e (List[float]): Product energies. + ts_e: TS energy. + + Returns: + Optional[float]: The reaction path ratio. + """ + if any(entry is None for entry in r_e + p_e + [ts_e]): + return None + r_to_ts = ts_e - sum(r_e) + p_to_ts = ts_e - sum(p_e) + return r_to_ts / (r_to_ts + p_to_ts) + + +register_job_adapter('linear', LinearAdapter) From cbec920d11ca636ce1127c9ebaa0d766309edb15 Mon Sep 17 00:00:00 2001 From: Alon Grinberg Dana Date: Wed, 15 May 2024 10:00:57 +0300 Subject: [PATCH 13/19] Tests: Linear TS Job Adapter --- arc/job/adapters/ts/linear_test.py | 250 +++++++++++++++++++++++++++++ 1 file changed, 250 insertions(+) create mode 100644 arc/job/adapters/ts/linear_test.py diff --git a/arc/job/adapters/ts/linear_test.py b/arc/job/adapters/ts/linear_test.py new file mode 100644 index 0000000000..7fd54a748b --- /dev/null +++ b/arc/job/adapters/ts/linear_test.py @@ -0,0 +1,250 @@ +#!/usr/bin/env python3 +# encoding: utf-8 + +""" +This module contains unit tests of the arc.job.adapters.ts.heuristics module +""" + +import os +import shutil +import unittest + +from arc.common import ARC_PATH, almost_equal_coords +from arc.job.adapters.ts.linear import (LinearAdapter, + average_zmat_params, + get_rxn_weight, + get_weight, + interpolate_isomerization, + ) +from arc.reaction import ARCReaction +from arc.rmgdb import make_rmg_database_object, load_families_only +from arc.species.converter import str_to_xyz +from arc.species.species import ARCSpecies +from arc.species.zmat import _compare_zmats + + +class TestHeuristicsAdapter(unittest.TestCase): + """ + Contains unit tests for the HeuristicsAdapter class. + """ + + @classmethod + def setUpClass(cls): + """ + A method that is run before all unit tests in this class. + """ + cls.maxDiff = None + cls.rmgdb = make_rmg_database_object() + load_families_only(cls.rmgdb) + + cls.rxn_1 = ARCReaction(r_species=[ARCSpecies(label='CPD', smiles='C1C=CC=C1', + xyz="""C -1.11689933 -0.16076292 -0.17157587 + C -0.34122713 1.12302797 -0.12498608 + C 0.95393962 0.86179733 0.10168911 + C 1.14045506 -0.56033684 0.22004768 + C -0.03946631 -1.17782376 0.06650470 + H -1.58827673 -0.30386166 -1.14815401 + H -1.87502410 -0.19463481 0.61612857 + H -0.77193310 2.10401684 -0.25572143 + H 1.74801386 1.58807889 0.18578522 + H 2.09208098 -1.03534789 0.40412258 + H -0.20166282 -2.24415315 0.10615953""")], + p_species=[ARCSpecies(label='C5_carbene', adjlist="""1 C u0 p1 c0 {2,S} {6,S} + 2 C u0 p0 c0 {1,S} {3,D} {7,S} + 3 C u0 p0 c0 {2,D} {4,S} {8,S} + 4 C u0 p0 c0 {3,S} {5,D} {9,S} + 5 C u0 p0 c0 {4,D} {10,S} {11,S} + 6 H u0 p0 c0 {1,S} + 7 H u0 p0 c0 {2,S} + 8 H u0 p0 c0 {3,S} + 9 H u0 p0 c0 {4,S} + 10 H u0 p0 c0 {5,S} + 11 H u0 p0 c0 {5,S}""", + xyz="""C 2.62023459 0.49362130 -0.23013873 + C 1.48006570 -0.33866786 -0.38699247 + C 1.53457595 -1.45115429 -1.13132450 + C 0.40179762 -2.32741928 -1.31937443 + C 0.45595744 -3.43865596 -2.06277224 + H 3.47507694 1.11901971 -0.11163109 + H 0.56454036 -0.04212124 0.11659958 + H 2.46516705 -1.72493574 -1.62516589 + H -0.53390611 -2.06386676 -0.83047533 + H -0.42088759 -4.06846526 -2.17670487 + H 1.36205133 -3.75009763 -2.57288841""")]) + cls.rxn_1.determine_family(rmg_database=cls.rmgdb) + + def test_average_zmat_params(self): + """Test the average_zmat_params() function.""" + zmat_1 = {'symbols': ('H', 'H'), + 'coords': ((None, None, None), + ('R_1_0', None, None)), + 'vars': {'R_1_0': 0.7}, + 'map': {0: 0, 1: 1}} + zmat_2 = {'symbols': ('H', 'H'), + 'coords': ((None, None, None), + ('R_1_0', None, None)), + 'vars': {'R_1_0': 1.3}, + 'map': {0: 0, 1: 1}} + expected_zmat = {'symbols': ('H', 'H'), + 'coords': ((None, None, None), + ('R_1_0', None, None)), + 'vars': {'R_1_0': 1.0}, + 'map': {0: 0, 1: 1}} + zmat = average_zmat_params(zmat_1, zmat_2) + self.assertTrue(_compare_zmats(zmat, expected_zmat)) + + expected_zmat = {'symbols': ('H', 'H'), + 'coords': ((None, None, None), + ('R_1_0', None, None)), + 'vars': {'R_1_0': 0.85}, + 'map': {0: 0, 1: 1}} + zmat = average_zmat_params(zmat_1, zmat_2, weight=0.25) + self.assertTrue(_compare_zmats(zmat, expected_zmat)) + zmat = average_zmat_params(zmat_2, zmat_1, weight=0.75) + self.assertTrue(_compare_zmats(zmat, expected_zmat)) + + zmat_1 = {'symbols': ('C', 'N', 'H', 'H', 'H', 'H', 'H'), + 'coords': ((None, None, None), ('R_1_0', None, None), ('R_2|4_0|0', 'A_2|4_0|0_1|1', None), + ('R_3|6_1|1', 'A_3|6_1|1_0|0', 'D_3_1_0_2'), + ('R_2|4_0|0', 'A_2|4_0|0_1|1', 'D_4_0_1_3'), ('R_5_0', 'A_5_0_1', 'D_5_0_1_4'), + ('R_3|6_1|1', 'A_3|6_1|1_0|0', 'D_6_1_0_5')), + 'vars': {'R_1_0': 1.451965854148702, 'D_3_1_0_2': 60.83821034525936, + 'D_4_0_1_3': 301.30263742432356, 'R_5_0': 1.0936965384360282, + 'A_5_0_1': 110.59878027260544, 'D_5_0_1_4': 239.76779188408136, + 'D_6_1_0_5': 65.17113681053117, 'R_2|4_0|0': 1.0935188594180785, + 'R_3|6_1|1': 1.019169330302324, 'A_2|4_0|0_1|1': 110.20495980110817, + 'A_3|6_1|1_0|0': 109.41187648524644}, + 'map': {0: 0, 1: 1, 2: 2, 3: 5, 4: 3, 5: 4, 6: 6}} + zmat_2 = {'symbols': ('C', 'N', 'H', 'H', 'H', 'H', 'H'), + 'coords': ((None, None, None), ('R_1_0', None, None), ('R_2|4_0|0', 'A_2|4_0|0_1|1', None), + ('R_3|6_1|1', 'A_3|6_1|1_0|0', 'D_3_1_0_2'), + ('R_2|4_0|0', 'A_2|4_0|0_1|1', 'D_4_0_1_3'), ('R_5_0', 'A_5_0_1', 'D_5_0_1_4'), + ('R_3|6_1|1', 'A_3|6_1|1_0|0', 'D_6_1_0_5')), + 'vars': {'R_1_0': 1.2, 'D_3_1_0_2': 50, + 'D_4_0_1_3': 250, 'R_5_0': 1.0936965384360282, + 'A_5_0_1': 110.59878027260544, 'D_5_0_1_4': 239.76779188408136, + 'D_6_1_0_5': 120, 'R_2|4_0|0': 1.0935188594180785, + 'R_3|6_1|1': 1.6, 'A_2|4_0|0_1|1': 110.20495980110817, + 'A_3|6_1|1_0|0': 109.41187648524644}, + 'map': {0: 0, 1: 1, 2: 2, 3: 5, 4: 3, 5: 4, 6: 6}} + expected_zmat = {'symbols': ('C', 'N', 'H', 'H', 'H', 'H', 'H'), + 'coords': ((None, None, None), ('R_1_0', None, None), ('R_2|4_0|0', 'A_2|4_0|0_1|1', None), + ('R_3|6_1|1', 'A_3|6_1|1_0|0', 'D_3_1_0_2'), ('R_2|4_0|0', 'A_2|4_0|0_1|1', 'D_4_0_1_3'), + ('R_5_0', 'A_5_0_1', 'D_5_0_1_4'), ('R_3|6_1|1', 'A_3|6_1|1_0|0', 'D_6_1_0_5')), + 'vars': {'R_1_0': 1.3259829270743508, 'D_3_1_0_2': 55.419105172629685, + 'D_4_0_1_3': 275.6513187121618, 'R_5_0': 1.0936965384360282, + 'A_5_0_1': 110.59878027260544, 'D_5_0_1_4': 239.76779188408136, + 'D_6_1_0_5': 92.58556840526558, 'R_2|4_0|0': 1.0935188594180785, + 'R_3|6_1|1': 1.309584665151162, 'A_2|4_0|0_1|1': 110.20495980110817, + 'A_3|6_1|1_0|0': 109.41187648524644}, + 'map': {0: 0, 1: 1, 2: 2, 3: 5, 4: 3, 5: 4, 6: 6}} + zmat = average_zmat_params(zmat_1, zmat_2) + self.assertTrue(_compare_zmats(zmat, expected_zmat)) + + def test_get_weight(self): + """Test the get_weight() function.""" + self.assertEqual(get_weight([0], [0], 4), 0.5) # 4 / 8 + self.assertEqual(get_weight([0], [8], 12), 0.75) # 12 / 20 + self.assertEqual(get_weight([0], [2], 6), 0.6) # 6 / 10 + self.assertEqual(get_weight([10], [0], 30), 0.4) # 20 / 50 + self.assertEqual(get_weight([20], [10], 40), 0.4) # 20 / 50 + self.assertIsNone(get_weight([20], [None], 40), 0.4) # 20 / 50 + self.assertEqual(get_weight([8, 2], [0], 30), 0.4) # 20 / 50 + self.assertEqual(get_weight([4, 1], [5.5, 1.5], 11), 0.6) # 6 / 10 + + def test_get_rxn_weight(self): + """Test the get_rxn_weight() function.""" + rxn_1 = ARCReaction(r_species=[ARCSpecies(label='HO2', smiles='[O]O'), + ARCSpecies(label='NH', smiles='[NH]')], + p_species=[ARCSpecies(label='N', smiles='[N]'), + ARCSpecies(label='H2O2', smiles='OO')]) + rxn_1.r_species[0].e0 = 252.0 + rxn_1.r_species[1].e0 = 100.5 + rxn_1.p_species[0].e0 = 116.0 + rxn_1.p_species[1].e0 = 200.3 + rxn_1.ts_species = ARCSpecies(label='TS', is_ts=True) + rxn_1.ts_species.e0 = 391.6 + self.assertAlmostEquals(get_rxn_weight(rxn_1), 0.3417832) + + def test_interpolate_isomerization(self): + """Test the interpolate_isomerization() function.""" + nc3h7_xyz = """C 0.00375165 -0.48895802 -1.20586379 + C 0.00375165 -0.48895802 0.28487510 + C 0.00375165 0.91997987 0.85403684 + H 0.41748586 -1.33492098 -1.74315104 + H -0.57506729 0.24145491 -1.76006154 + H -0.87717095 -1.03203740 0.64280162 + H 0.88948616 -1.02465371 0.64296621 + H 0.88512433 1.48038223 0.52412379 + H 0.01450405 0.88584135 1.94817394 + H -0.88837301 1.47376959 0.54233121""" + ic3h7_xyz = """C -0.40735690 -0.74240205 -0.34312948 + C 0.38155377 -0.25604705 0.82450968 + C 0.54634593 1.25448345 0.81064511 + H 0.00637731 -1.58836501 -0.88041673 + H -0.98617584 -0.01198912 -0.89732723 + H -1.29710684 -1.29092340 0.08598983 + H 1.36955428 -0.72869684 0.81102246 + H 1.06044877 1.58846788 -0.09702437 + H 1.13774084 1.57830484 1.67308862 + H -0.42424546 1.75989927 0.85794283""" + nc3h7 = ARCSpecies(label='nC3H7', smiles='[CH2]CC', xyz=nc3h7_xyz) + ic3h7 = ARCSpecies(label='iC3H7', smiles='C[CH]C', xyz=ic3h7_xyz) + rxn = ARCReaction(r_species=[nc3h7], p_species=[ic3h7]) + expected_ts_xyz = str_to_xyz("""C 0.01099731 -0.46789926 -1.15958911 + C 0.01099731 -0.46789926 0.33114978 + C 0.01099731 0.94103865 0.90031155 + H 0.57795661 -1.24174248 -1.65467180 + H -0.39690222 0.34527841 -1.69240298 + H -1.19440431 -1.28933062 -0.47327539 + H 0.89689057 -1.16420498 0.45967951 + H 0.76979130 1.33747945 0.33815513 + H -0.04544494 0.70455273 1.77835334 + H -1.00071642 1.24557408 0.38839197""") + ts_xyz = interpolate_isomerization(rxn, use_weights=False) + self.assertTrue(almost_equal_coords(ts_xyz, expected_ts_xyz)) + + nc3h7.e0 = 101.55 + ic3h7.e0 = 88.91 + ts = ARCSpecies(label='TS', is_ts=True, multiplicity=2, xyz=expected_ts_xyz) + ts.e0 = 105 + rxn.ts_species = ts + expected_ts_xyz = str_to_xyz("""C 0.01224420 -0.47400672 -1.18787451 + C 0.01224420 -0.47400672 0.30286438 + C 0.01224420 0.93493122 0.87202615 + H 0.47981756 -1.29923732 -1.70742021 + H -0.50470551 0.28201158 -1.73526026 + H -1.06475721 -1.18141451 0.26785378 + H 0.86736552 -1.12118386 0.54383845 + H 0.79813573 1.38347069 0.43772483 + H -0.03897336 0.76031233 1.86961141 + H -0.97425159 1.33180895 0.47825005""") + ts_xyz = interpolate_isomerization(rxn, use_weights=True) + self.assertTrue(almost_equal_coords(ts_xyz, expected_ts_xyz)) + + def test_linear_adapter(self): + """Test the LinearAdapter class.""" + self.assertEqual(self.rxn_1.family.label, 'Cyclopentadiene_scission') + linear_1 = LinearAdapter(job_type='tsg', + reactions=[self.rxn_1], + testing=True, + project='test', + project_directory=os.path.join(ARC_PATH, 'arc', 'testing', 'test_linear', 'tst1'), + ) + self.assertIsNone(self.rxn_1.ts_species) + linear_1.execute() + self.assertEqual(len(self.rxn_1.ts_species.ts_guesses), 1) + self.assertEqual(self.rxn_1.ts_species.ts_guesses[0].initial_xyz['symbols'], + ('C', 'C', 'C', 'C', 'C', 'H', 'H', 'H', 'H', 'H', 'H')) + + @classmethod + def tearDownClass(cls): + """ + A function that is run ONCE after all unit tests in this class. + Delete all project directories created during these unit tests. + """ + shutil.rmtree(os.path.join(ARC_PATH, 'arc', 'testing', 'test_linear'), ignore_errors=True) + + +if __name__ == '__main__': + unittest.main(testRunner=unittest.TextTestRunner(verbosity=2)) From 5028683b56be8d13d5ef5b4303b348f9ac3b1792 Mon Sep 17 00:00:00 2001 From: Alon Grinberg Dana Date: Sun, 26 May 2024 23:35:26 +0300 Subject: [PATCH 14/19] Added order_xyz_by_atom_map to converter --- arc/species/converter.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/arc/species/converter.py b/arc/species/converter.py index 9b943f2a2a..f175928449 100644 --- a/arc/species/converter.py +++ b/arc/species/converter.py @@ -2096,3 +2096,22 @@ def ics_to_scan_constraints(ics: list, raise NotImplementedError(f'Given software {software} is not implemented ' f'for ics_to_scan_constraints().') return scan_trsh + + +def order_xyz_by_atom_map(xyz: dict, + atom_map: list, + ) -> dict: + """ + Order xyz coordinates according to the atom map. + + Args: + xyz (dict): The xyz coordinates. + atom_map (list): The atom map. + + Returns: + dict: The ordered xyz coordinates. + """ + symbols = [xyz['symbols'][i] for i in atom_map] + isotopes = [xyz['isotopes'][i] for i in atom_map] if 'isotopes' in xyz else None + coords = [xyz['coords'][i] for i in atom_map] + return xyz_from_data(coords=coords, symbols=symbols, isotopes=isotopes) From de4d1f61a140e1bc2242c730246b08202e33325d Mon Sep 17 00:00:00 2001 From: Alon Grinberg Dana Date: Sun, 26 May 2024 23:35:40 +0300 Subject: [PATCH 15/19] Tests: converter order_xyz_by_atom_map() --- arc/species/converter_test.py | 39 +++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/arc/species/converter_test.py b/arc/species/converter_test.py index a77f92ffde..d15d75b69c 100644 --- a/arc/species/converter_test.py +++ b/arc/species/converter_test.py @@ -4968,6 +4968,45 @@ def test_check_isomorphism(self): mol2 = Molecule(smiles='[N-]=[N+]=O') self.assertTrue(converter.check_isomorphism(mol1, mol2)) + def test_order_xyz_by_atom_map(self): + """Test ordering xyz by atom map""" + xyz = {'symbols': ('C', 'H', 'H', 'H', 'H'), + 'isotopes': (12, 1, 1, 1, 1), + 'coords': ((0.0, 0.0, 0.0), + (0.6300326, 0.6300326, 0.6300326), + (-0.6300326, -0.6300326, 0.6300326), + (-0.6300326, 0.6300326, -0.6300326), + (0.6300326, -0.6300326, -0.6300326))} + atom_map = [4, 3, 1, 2, 0] + ordered_xyz = converter.order_xyz_by_atom_map(xyz, atom_map) + expected_xyz = {'symbols': ('H', 'H', 'H', 'H', 'C'), + 'isotopes': (1, 1, 1, 1, 12), + 'coords': ((0.6300326, -0.6300326, -0.6300326), + (-0.6300326, 0.6300326, -0.6300326), + (0.6300326, 0.6300326, 0.6300326), + (-0.6300326, -0.6300326, 0.6300326), + (0.0, 0.0, 0.0))} + self.assertEqual(ordered_xyz, expected_xyz) + + xyz = {'symbols': ('C', 'C', 'C', 'H', 'H', 'H', 'H', 'H', 'H', 'H'), + 'isotopes': (12, 12, 12, 1, 1, 1, 1, 1, 1, 1), + 'coords': ((-0.4073569, -0.74240205, -0.34312948), (0.38155377, -0.25604705, 0.82450968), + (0.54634593, 1.25448345, 0.81064511), (0.00637731, -1.58836501, -0.88041673), + (-0.98617584, -0.01198912, -0.89732723), (-1.29710684, -1.2909234, 0.08598983), + (1.36955428, -0.72869684, 0.81102246), (1.06044877, 1.58846788, -0.09702437), + (1.13774084, 1.57830484, 1.67308862), (-0.42424546, 1.75989927, 0.85794283))} + ordered_xyz = converter.order_xyz_by_atom_map(xyz, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) + self.assertEqual(ordered_xyz, xyz) + expected_xyz = {'symbols': ('H', 'H', 'H', 'H', 'H', 'H', 'H', 'C', 'C', 'C'), + 'isotopes': (1, 1, 1, 1, 1, 1, 1, 12, 12, 12), + 'coords': ((-0.42424546, 1.75989927, 0.85794283), (1.13774084, 1.57830484, 1.67308862), + (1.06044877, 1.58846788, -0.09702437), (1.36955428, -0.72869684, 0.81102246), + (-1.29710684, -1.2909234, 0.08598983), (-0.98617584, -0.01198912, -0.89732723), + (0.00637731, -1.58836501, -0.88041673), (0.54634593, 1.25448345, 0.81064511), + (0.38155377, -0.25604705, 0.82450968), (-0.4073569, -0.74240205, -0.34312948))} + ordered_xyz = converter.order_xyz_by_atom_map(xyz, [9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) + self.assertEqual(ordered_xyz, expected_xyz) + def test_cluster_confs_by_rmsd(self): nco_1 = {'symbols': ('C', 'H', 'H', 'O', 'H', 'N', 'H', 'H'), 'isotopes': (12, 1, 1, 16, 1, 14, 1, 1), From 607b16eeac425d44f00bbc2e37ce941d82c94835 Mon Sep 17 00:00:00 2001 From: Alon Grinberg Dana Date: Sun, 26 May 2024 23:35:52 +0300 Subject: [PATCH 16/19] Added update_zmat_by_xyz() to zmat --- arc/species/zmat.py | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/arc/species/zmat.py b/arc/species/zmat.py index 2cc3519775..958e1d5957 100644 --- a/arc/species/zmat.py +++ b/arc/species/zmat.py @@ -1361,7 +1361,7 @@ def get_atom_order(xyz: Optional[Dict[str, tuple]] = None, Entries are atom index lists of all atoms in a fragment, each list represents a different fragment. constraints_dict (dict, optional): A dictionary of atom constraints. The function will try to find an atom order in which all constrained atoms - are after the atoms they are constraint to. + are after the atoms they are constrained to. Returns: List[int]: The atom order, 0-indexed. @@ -1412,7 +1412,7 @@ def get_atom_order_from_mol(mol: Molecule, for constraint_type, constraint_list in constraints_dict.items(): constraints.extend(constraint_list) # A list of all constraint tuples. for constraint in constraint_list: - # A list of the atoms being constraint to other atoms. + # A list of the atoms being constrained to other atoms. constraint_atoms.append(constraint[0]) # Only the first atom in the constraint tuple is really constrained. if constraint_type == 'D_group': for constraint_indices in constraint_list: @@ -2110,3 +2110,30 @@ def check_ordered_zmats(zmat_1: dict, bool: Whether the ZMats are ordered. """ return zmat_1['symbols'] == zmat_2['symbols'] and zmat_1['vars'].keys() == zmat_2['vars'].keys() + + +def update_zmat_by_xyz(zmat: dict, + xyz: Dict[str, tuple], + ) -> dict: + """ + Update a zmat vars by xyz. + + Args: + zmat (dict): The zmat to update. + xyz (dict): The xyz to update the zmat with. + + Returns: + dict: The updated zmat. + """ + zmat = {'symbols': zmat['symbols'], + 'coords': zmat['coords'], + 'vars': zmat['vars'], + 'map': zmat['map'], + } + new_vars = dict() + for key, val in zmat['vars'].items(): + indices = get_atom_indices_from_zmat_parameter(key)[0] + indices = [zmat['map'][index] for index in indices] + new_vars[key] = calculate_param(coords=xyz, atoms=indices) + zmat['vars'] = new_vars + return zmat From 6f662330581dd41be528649e1d04b3c108cf5fdf Mon Sep 17 00:00:00 2001 From: Alon Grinberg Dana Date: Sun, 26 May 2024 23:36:18 +0300 Subject: [PATCH 17/19] Tests: zmat update_zmat_by_xyz() --- arc/species/zmat_test.py | 42 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/arc/species/zmat_test.py b/arc/species/zmat_test.py index 9e71af78ac..b5c2a9ca1e 100644 --- a/arc/species/zmat_test.py +++ b/arc/species/zmat_test.py @@ -9,6 +9,7 @@ import arc.species.zmat as zmat from arc.exceptions import ZMatError +from arc.species.converter import str_to_xyz from arc.species.species import ARCSpecies @@ -1854,6 +1855,47 @@ def test_check_ordered_zmats(self): self.assertFalse(zmat.check_ordered_zmats(zmat_2, zmat_3)) self.assertTrue(zmat.check_ordered_zmats(zmat_1, zmat_3)) + def test_update_zmat_by_xyz(self): + """Test the update_zmat_by_xyz() function.""" + xyz_1 = str_to_xyz("""C -2.02459021 0.31147541 0.00000000 + H -1.66791737 0.81587360 0.87365150 + H -3.09459021 0.31148859 0.00000000 + N -1.53460703 -1.07445983 0.00000000 + H -1.86794423 -1.54585934 0.81649790 + H -1.86794803 -1.54586068 -0.81649557 + O -1.54791529 0.98557766 -1.16759033 + H -1.82476883 1.90445028 -1.14258591""") + zmat_1 = zmat.xyz_to_zmat(xyz=xyz_1, mol=ARCSpecies(label='NCO', xyz=xyz_1).mol, consolidate=False) + xyz_2 = str_to_xyz("""C -2.12697003 0.60106045 0.00000000 + H -1.77029719 1.10545864 0.87365150 + H -3.19697003 0.60107363 0.00000000 + N -1.43222721 -1.36404487 0.00000000 + H -1.76556441 -1.83544438 0.81649790 + H -1.76556821 -1.83544572 -0.81649557 + O -1.65029511 1.27516270 -1.16759033 + H -1.11689351 0.67210652 -1.69047270""") + zmat_2 = zmat.update_zmat_by_xyz(zmat_1, xyz_2) + expected_zmat_2 = {'symbols': ('N', 'C', 'O', 'H', 'H', 'H', 'H', 'H'), + 'coords': ((None, None, None), + ('R_1_0', None, None), + ('R_2_1', 'A_2_1_0', None), + ('R_3_1', 'A_3_1_0', 'D_3_1_0_2'), + ('R_4_1', 'A_4_1_0', 'D_4_1_0_3'), + ('R_5_0', 'A_5_0_1', 'D_5_0_1_4'), + ('R_6_0', 'A_6_0_1', 'D_6_0_1_5'), + ('R_7_2', 'A_7_2_1', 'D_7_2_1_0')), + 'vars': {'A_2_1_0': 109.47120138885585, 'A_3_1_0': 109.47119974631076, + 'A_4_1_0': 109.47123024500976, 'A_5_0_1': 109.47119888440865, + 'A_6_0_1': 109.47120176378891, 'A_7_2_1': 109.49999787419641, + 'D_3_1_0_2': 239.9999860097654, 'D_4_1_0_3': 240.00000689317258, + 'D_5_0_1_4': 299.9998481421676, 'D_6_0_1_5': 120.00001661106755, + 'D_7_2_1_0': 359.9999980909041, 'R_1_0': 2.084299994953611, + 'R_2_1': 1.4300000625556986, 'R_3_1': 1.0700000390382562, + 'R_4_1': 1.0699999276277037, 'R_5_0': 0.9999999701976772, + 'R_6_0': 1.000000059604643, 'R_7_2': 0.9599999917546908}, + 'map': {0: 3, 1: 0, 2: 6, 3: 1, 4: 2, 5: 4, 6: 5, 7: 7}} + self.assertEqual(zmat_2, expected_zmat_2) + if __name__ == '__main__': unittest.main(testRunner=unittest.TextTestRunner(verbosity=2)) From ae4aca591fec2171866199fc4188665f659f0492 Mon Sep 17 00:00:00 2001 From: Alon Grinberg Dana Date: Tue, 28 May 2024 08:19:27 +0300 Subject: [PATCH 18/19] f! linear --- arc/job/adapters/ts/linear.py | 77 +++++++++++++++++++++++++++-------- 1 file changed, 60 insertions(+), 17 deletions(-) diff --git a/arc/job/adapters/ts/linear.py b/arc/job/adapters/ts/linear.py index ccf958cbec..751e39cf4b 100644 --- a/arc/job/adapters/ts/linear.py +++ b/arc/job/adapters/ts/linear.py @@ -4,16 +4,19 @@ import copy import datetime -from typing import TYPE_CHECKING, List, Optional, Tuple, Union +from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union from arc.common import almost_equal_coords, get_logger from arc.job.adapter import JobAdapter from arc.job.adapters.common import _initialize_adapter, ts_adapters_by_rmg_family from arc.job.factory import register_job_adapter +from arc.mapping.engine import (get_atom_indices_of_labeled_atoms_in_an_rmg_reaction, + get_rmg_reactions_from_arc_reaction, + ) from arc.plotter import save_geo -from arc.species.converter import zmat_to_xyz +from arc.species.converter import order_xyz_by_atom_map, zmat_to_xyz from arc.species.species import ARCSpecies, TSGuess, colliding_atoms -from arc.species.zmat import check_ordered_zmats, xyz_to_zmat +from arc.species.zmat import check_ordered_zmats, get_atom_order, update_zmat_by_xyz, xyz_to_zmat if TYPE_CHECKING: from arc.level import Level @@ -295,7 +298,7 @@ def interpolate(rxn: 'ARCReaction', def interpolate_isomerization(rxn: 'ARCReaction', use_weights: bool = False, - ) -> Optional[dict]: + ) -> Optional[List[dict]]: """ Search for a TS of an isomerization reaction by interpolating internal coords. @@ -304,23 +307,63 @@ def interpolate_isomerization(rxn: 'ARCReaction', use_weights (bool, optional): Whether to use the well energies to determine relative interpolation weights. Returns: - Optional[dict]: The XYZ coordinates guess. + Optional[List[dict]]: The XYZ coordinates guesses. """ - rxn.r_species[0].get_xyz() - rxn.p_species[0].get_xyz() - r_zmat = xyz_to_zmat(xyz=rxn.r_species[0].get_xyz(), - consolidate=False, - atom_order=list(range(sum(r.number_of_atoms for r in rxn.r_species)))) - p_zmat = xyz_to_zmat(xyz=rxn.p_species[0].get_xyz(), - consolidate=False, - atom_order=rxn.atom_map) weight = get_rxn_weight(rxn) if use_weights else 0.5 if weight is None: return None - ts_zmat = average_zmat_params(zmat_1=r_zmat, zmat_2=p_zmat, weight=weight) - if ts_zmat is None: - return None - return zmat_to_xyz(ts_zmat) + rmg_reactions = get_rmg_reactions_from_arc_reaction(arc_reaction=rxn) or list() + ts_xyzs = list() + for rmg_reaction in rmg_reactions: + r_label_dict = get_atom_indices_of_labeled_atoms_in_an_rmg_reaction(arc_reaction=rxn, + rmg_reaction=rmg_reaction)[0] + if r_label_dict is None: + continue + expected_breaking_bonds, expected_forming_bonds = rxn.get_expected_changing_bonds(r_label_dict=r_label_dict) + if expected_breaking_bonds is None or expected_forming_bonds is None: + continue + r_zmat = xyz_to_zmat(xyz=rxn.r_species[0].get_xyz(), + mol=rxn.r_species[0].mol, + consolidate=False, + constraints=get_r_constraints(expected_breaking_bonds=expected_breaking_bonds, + expected_forming_bonds=expected_forming_bonds), + ) + ordered_p_xyz = order_xyz_by_atom_map(xyz=rxn.p_species[0].get_xyz(), atom_map=rxn.atom_map) + p_zmat = update_zmat_by_xyz(zmat=r_zmat, xyz=ordered_p_xyz) + ts_zmat = average_zmat_params(zmat_1=r_zmat, zmat_2=p_zmat, weight=weight) + + if ts_zmat is not None: + ts_xyzs.append(zmat_to_xyz(ts_zmat)) + return ts_xyzs + + +def get_r_constraints(expected_breaking_bonds: List[Tuple[int, int]], + expected_forming_bonds: List[Tuple[int, int]], + ) -> Dict[str, list]: + """ + Get the "R_atom" constraints for the reactant ZMat. + + Args: + expected_breaking_bonds (List[Tuple[int, int]]): Expected breaking bonds. + expected_forming_bonds (List[Tuple[int, int]]): Expected forming bonds. + + Returns: + Dict[str, list]: The constraints. + """ + constraints = list() + atom_occurrences = dict() + for bond in expected_breaking_bonds + expected_forming_bonds: + for atom in bond: + if atom not in atom_occurrences: + atom_occurrences[atom] = 0 + atom_occurrences[atom] += 1 + atoms_sorted_by_frequency = [k for k, _ in sorted(atom_occurrences.items(), key=lambda item: item[1], reverse=True)] + for i, atom in enumerate(atoms_sorted_by_frequency): + for bond in expected_breaking_bonds + expected_forming_bonds: + if atom in bond and all(a not in atoms_sorted_by_frequency[:i] for a in bond): + constraints.append(bond if atom == bond[0] else (bond[1], bond[0])) + break + return {'R_atom': constraints} def average_zmat_params(zmat_1: dict, From 69759f849159d117da4dd895b446b6e2044d7efe Mon Sep 17 00:00:00 2001 From: Alon Grinberg Dana Date: Tue, 28 May 2024 08:19:49 +0300 Subject: [PATCH 19/19] f! tsts --- arc/job/adapters/ts/linear_test.py | 145 +++++++++++++++++++++++------ 1 file changed, 117 insertions(+), 28 deletions(-) diff --git a/arc/job/adapters/ts/linear_test.py b/arc/job/adapters/ts/linear_test.py index 7fd54a748b..7b128c4dbf 100644 --- a/arc/job/adapters/ts/linear_test.py +++ b/arc/job/adapters/ts/linear_test.py @@ -9,16 +9,19 @@ import shutil import unittest +from rmgpy.data.kinetics import KineticsFamily + from arc.common import ARC_PATH, almost_equal_coords from arc.job.adapters.ts.linear import (LinearAdapter, average_zmat_params, + get_r_constraints, get_rxn_weight, get_weight, interpolate_isomerization, ) from arc.reaction import ARCReaction from arc.rmgdb import make_rmg_database_object, load_families_only -from arc.species.converter import str_to_xyz +from arc.species.converter import str_to_xyz, xyz_to_str from arc.species.species import ARCSpecies from arc.species.zmat import _compare_zmats @@ -73,6 +76,30 @@ def setUpClass(cls): H 1.36205133 -3.75009763 -2.57288841""")]) cls.rxn_1.determine_family(rmg_database=cls.rmgdb) + cls.rxn_2 = ARCReaction(r_species=[ARCSpecies(label='CCONO', smiles='CCON=O', + xyz="""C -1.36894499 0.07118059 -0.24801399 + C -0.01369535 0.17184136 0.42591278 + O -0.03967083 -0.62462610 1.60609048 + N 1.23538512 -0.53558048 2.24863846 + O 1.25629155 -1.21389295 3.27993827 + H -2.16063255 0.41812452 0.42429392 + H -1.39509985 0.66980796 -1.16284741 + H -1.59800183 -0.96960842 -0.49986392 + H 0.19191326 1.21800574 0.68271847 + H 0.76371340 -0.19234475 -0.25650067""")], + p_species=[ARCSpecies(label='CCNO2', smiles='CC[N+](=O)[O-]', + xyz="""C -1.12362739 -0.04664655 -0.08575959 + C 0.24488022 -0.51587553 0.36119196 + N 0.57726975 -1.77875156 -0.37104243 + O 1.16476543 -1.66382529 -1.45384186 + O 0.24561669 -2.84385320 0.16410116 + H -1.87655344 -0.80826847 0.13962125 + H -1.14729169 0.14493421 -1.16405294 + H -1.41423043 0.87863077 0.42354512 + H 1.02430791 0.21530309 0.12674144 + H 0.27058353 -0.73979548 1.43184405""")]) + cls.rxn_2.determine_family(rmg_database=cls.rmgdb) + def test_average_zmat_params(self): """Test the average_zmat_params() function.""" zmat_1 = {'symbols': ('H', 'H'), @@ -166,8 +193,8 @@ def test_get_rxn_weight(self): rxn_1.ts_species.e0 = 391.6 self.assertAlmostEquals(get_rxn_weight(rxn_1), 0.3417832) - def test_interpolate_isomerization(self): - """Test the interpolate_isomerization() function.""" + def test_interpolate_isomerization_intra_h_migration(self): + """Test the interpolate_isomerization() function for intra H migration reactions.""" nc3h7_xyz = """C 0.00375165 -0.48895802 -1.20586379 C 0.00375165 -0.48895802 0.28487510 C 0.00375165 0.91997987 0.85403684 @@ -191,36 +218,76 @@ def test_interpolate_isomerization(self): nc3h7 = ARCSpecies(label='nC3H7', smiles='[CH2]CC', xyz=nc3h7_xyz) ic3h7 = ARCSpecies(label='iC3H7', smiles='C[CH]C', xyz=ic3h7_xyz) rxn = ARCReaction(r_species=[nc3h7], p_species=[ic3h7]) - expected_ts_xyz = str_to_xyz("""C 0.01099731 -0.46789926 -1.15958911 - C 0.01099731 -0.46789926 0.33114978 - C 0.01099731 0.94103865 0.90031155 - H 0.57795661 -1.24174248 -1.65467180 - H -0.39690222 0.34527841 -1.69240298 - H -1.19440431 -1.28933062 -0.47327539 - H 0.89689057 -1.16420498 0.45967951 - H 0.76979130 1.33747945 0.33815513 - H -0.04544494 0.70455273 1.77835334 - H -1.00071642 1.24557408 0.38839197""") - ts_xyz = interpolate_isomerization(rxn, use_weights=False) - self.assertTrue(almost_equal_coords(ts_xyz, expected_ts_xyz)) + expected_ts_xyz = str_to_xyz("""C 0.00598652 -0.48762088 -1.18600054 + C 0.00598652 -0.48762088 0.30473835 + C 0.00598652 0.92131703 0.87390011 + H 0.57807817 -1.25594905 -1.69382911 + H -0.42698663 0.35443110 -1.71434916 + H -1.27461406 -1.27709743 -0.24121083 + H 0.89172104 -1.02331658 0.66282944 + H 0.88735917 1.48171935 0.54398704 + H 0.01673891 0.88717852 1.96803717 + H -0.88613815 1.47510670 0.56219446""") + ts_xyzs = interpolate_isomerization(rxn, use_weights=False) + self.assertEqual(len(ts_xyzs), 2) + self.assertTrue(almost_equal_coords(ts_xyzs[0], expected_ts_xyz)) nc3h7.e0 = 101.55 ic3h7.e0 = 88.91 ts = ARCSpecies(label='TS', is_ts=True, multiplicity=2, xyz=expected_ts_xyz) ts.e0 = 105 rxn.ts_species = ts - expected_ts_xyz = str_to_xyz("""C 0.01224420 -0.47400672 -1.18787451 - C 0.01224420 -0.47400672 0.30286438 - C 0.01224420 0.93493122 0.87202615 - H 0.47981756 -1.29923732 -1.70742021 - H -0.50470551 0.28201158 -1.73526026 - H -1.06475721 -1.18141451 0.26785378 - H 0.86736552 -1.12118386 0.54383845 - H 0.79813573 1.38347069 0.43772483 - H -0.03897336 0.76031233 1.86961141 - H -0.97425159 1.33180895 0.47825005""") - ts_xyz = interpolate_isomerization(rxn, use_weights=True) - self.assertTrue(almost_equal_coords(ts_xyz, expected_ts_xyz)) + expected_ts_xyz = str_to_xyz("""C 0.00591772 -0.48764618 -1.20069282 + C 0.00591772 -0.48764618 0.29004607 + C 0.00591772 0.92129176 0.85920784 + H 0.47693974 -1.30982443 -1.72763512 + H -0.52424330 0.28530048 -1.74580953 + H -1.07348606 -1.15308709 0.40763997 + H 0.89165221 -1.02334186 0.64813718 + H 0.88729039 1.48169408 0.52929476 + H 0.01667012 0.88715326 1.95334482 + H -0.88620694 1.47508143 0.54750218""") + ts_xyzs = interpolate_isomerization(rxn, use_weights=True) + self.assertEqual(len(ts_xyzs), 2) + self.assertTrue(almost_equal_coords(ts_xyzs[0], expected_ts_xyz)) + + # r_xyz = """C -1.05582103 -0.03329574 -0.10080257 + # C 0.41792695 0.17831205 0.21035514 + # O 1.19234020 -0.65389683 -0.61111443 + # O 2.44749684 -0.41401220 -0.28381363 + # H -1.33614002 -1.09151783 0.08714882 + # H -1.25953618 0.21489046 -1.16411897 + # H -1.67410396 0.62341419 0.54699514 + # H 0.59566350 -0.06437686 1.28256640 + # H 0.67254676 1.24676329 0.02676370""" + # p_xyz = """C -1.40886397 0.22567351 -0.37379668 + # C 0.06280787 0.04097694 -0.38515682 + # O 0.44130326 -0.57668419 0.84260864 + # O 1.89519755 -0.66754203 0.80966180 + # H -1.87218376 0.90693511 -1.07582340 + # H -2.03646287 -0.44342165 0.20255768 + # H 0.35571681 -0.60165457 -1.22096147 + # H 0.56095122 1.01161503 -0.47393734 + # H 2.05354047 -0.10415729 1.58865243""" + # r = ARCSpecies(label='R', smiles='CCO[O]', xyz=r_xyz) + # p = ARCSpecies(label='P', smiles='[CH2]COO', xyz=p_xyz) + # rxn = ARCReaction(r_species=[r], p_species=[p]) + # expected_ts_xyz = str_to_xyz("""C 0.00598652 -0.48762088 -1.18600054 + # C 0.00598652 -0.48762088 0.30473835 + # C 0.00598652 0.92131703 0.87390011 + # H 0.57807817 -1.25594905 -1.69382911 + # H -0.42698663 0.35443110 -1.71434916 + # H -1.27461406 -1.27709743 -0.24121083 + # H 0.89172104 -1.02331658 0.66282944 + # H 0.88735917 1.48171935 0.54398704 + # H 0.01673891 0.88717852 1.96803717 + # H -0.88613815 1.47510670 0.56219446""") + # ts_xyzs = interpolate_isomerization(rxn, use_weights=False) + # self.assertEqual(len(ts_xyzs), 3) + # for ts_xyz in ts_xyzs: + # print(f'\nTS xyz:\n\n') + # print(xyz_to_str(ts_xyz)) + # self.assertTrue(almost_equal_coords(ts_xyzs[0], expected_ts_xyz)) def test_linear_adapter(self): """Test the LinearAdapter class.""" @@ -229,7 +296,7 @@ def test_linear_adapter(self): reactions=[self.rxn_1], testing=True, project='test', - project_directory=os.path.join(ARC_PATH, 'arc', 'testing', 'test_linear', 'tst1'), + project_directory=os.path.join(ARC_PATH, 'arc', 'testing', 'test_linear', 'rxn_1'), ) self.assertIsNone(self.rxn_1.ts_species) linear_1.execute() @@ -237,6 +304,28 @@ def test_linear_adapter(self): self.assertEqual(self.rxn_1.ts_species.ts_guesses[0].initial_xyz['symbols'], ('C', 'C', 'C', 'C', 'C', 'H', 'H', 'H', 'H', 'H', 'H')) + def test_linear_adapter_2(self): + self.rxn_2.family = KineticsFamily(label='intra_NO2_ONO_conversion') + self.rxn_2.atom_map = [0, 1, 3, 2, 4, 5, 7, 6, 9, 8] + self.assertEqual(self.rxn_2.family.label, 'intra_NO2_ONO_conversion') + linear_2 = LinearAdapter(job_type='tsg', + reactions=[self.rxn_2], + testing=True, + project='test', + project_directory=os.path.join(ARC_PATH, 'arc', 'testing', 'test_linear', 'rxn_2'), + ) + self.assertIsNone(self.rxn_2.ts_species) + linear_2.execute() + self.assertEqual(len(self.rxn_2.ts_species.ts_guesses), 1) + print(xyz_to_str(self.rxn_2.ts_species.ts_guesses[0].initial_xyz)) + self.assertEqual(self.rxn_2.ts_species.ts_guesses[0].initial_xyz['symbols'], + ('C', 'C', 'C', 'C', 'C', 'H', 'H', 'H', 'H', 'H', 'H')) + + def test_get_r_constraints(self): + """Test the get_r_constraints() function.""" + self.assertEqual(get_r_constraints([(1, 5)], [(0, 5)]), {'R_atom': [(5, 1)]}) + self.assertEqual(get_r_constraints([(1, 5), (7, 2), (8, 2)], [(0, 5), (7, 4), (8, 1)]), {'R_atom': [(1, 5), (5, 0), (7, 2), (2, 8)]}) + @classmethod def tearDownClass(cls): """