ReactionMechanismGenerator · calvinp0 · Dec 12, 2023 · May 25, 2023 · Jun 26, 2023 · Jun 26, 2023
diff --git a/.github/workflows/cont_int.yml b/.github/workflows/cont_int.yml
@@ -21,7 +21,7 @@ jobs:
 
     steps:
     - name: Checkout ARC
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4.1.1
 
     - name: Clean Ubuntu Image
       uses: kfir4444/free-disk-space@main
@@ -36,7 +36,7 @@ jobs:
 
     - name: Cache RMG-Py
       id: cache-rmg-py
-      uses: actions/cache@v2
+      uses: actions/cache@v3.3.2
       with:
         path: RMG-Py
         key: ${{ runner.os }}-rmg-main
@@ -45,7 +45,7 @@ jobs:
 
     - name: Checkout RMG-py
       if: steps.cache-rmg-py.outputs.cache-hit != 'true'
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4.1.1
       with:
         repository: ReactionMechanismGenerator/RMG-Py
         path: RMG-Py
@@ -54,15 +54,15 @@ jobs:
 
     - name: Cache RMG-database
       id: cache-rmg-db
-      uses: actions/cache@v2
+      uses: actions/cache@v3.3.2
       with:
           path: RMG-database
           key: ${{ runner.os }}-rmgdb-main
           restore-keys: |
             ${{ runner.os }}-rmgdb-
     - name: Checkout RMG-database
       if: steps.cache-rmg-db.outputs.cache-hit != 'true'
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4.1.1
       with:
         repository: ReactionMechanismGenerator/RMG-database
         path: RMG-database
@@ -71,15 +71,15 @@ jobs:
 
     - name: Cache AutoTST
       id: cache-autotst
-      uses: actions/cache@v2
+      uses: actions/cache@v3.3.2
       with:
           path: AutoTST
           key: ${{ runner.os }}-autotst-main
           restore-keys: |
             ${{ runner.os }}-autotst-
     - name: Checkout AutoTST
       if: steps.cache-autotst.outputs.cache-hit != 'true'
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4.1.1
       with:
         repository: ReactionMechanismGenerator/AutoTST
         path: AutoTST
@@ -88,15 +88,15 @@ jobs:
 
     - name: Cache TS-GCN
       id: cache-tsgcn
-      uses: actions/cache@v2
+      uses: actions/cache@v3.3.2
       with:
           path: TS-GCN
           key: ${{ runner.os }}-tsgcn-main
           restore-keys: |
             ${{ runner.os }}-tsgcn-
     - name: Checkout TS-GCN
       if: steps.cache-tsgcn.outputs.cache-hit != 'true'
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4.1.1
       with:
         repository: ReactionMechanismGenerator/TS-GCN
         path: TS-GCN
@@ -105,7 +105,7 @@ jobs:
 
     - name: Cache KinBot
       id: cache-kinbot
-      uses: actions/cache@v2
+      uses: actions/cache@v3.3.2
       with:
           path: KinBot
           key: ${{ runner.os }}-kinbot-2.0.6
@@ -114,41 +114,42 @@ jobs:
 
     - name: Checkout Kinbot 2.0.6
       if: steps.cache-kinbot.outputs.cache-hit != 'true'
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4.1.1
       with:
         repository: zadorlab/KinBot
         path: KinBot
         ref: v2.0.6
         fetch-depth: 1
 
     - name: Cache Packages
-      uses: actions/cache@v2
+      uses: actions/cache@v3.3.2
       env:
         CACHE_NUMBER: 0
       with:
         path: ~/conda_pkgs_dir
         key:
           ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{ hashFiles('environment.yml') }}
     - name: Setup ARC Env
-      uses: conda-incubator/setup-miniconda@v2
+      uses: conda-incubator/setup-miniconda@v2.2.0
       with:
           miniforge-variant: Mambaforge
           miniforge-version: latest
           activate-environment: arc_env
           use-mamba: true
 
     - name: Cache ARC env
-      uses: actions/cache@v2
+      uses: actions/cache@v3.3.2
       with:
         path: ${{ env.CONDA }}/envs/arc_env
-        key: conda-${{ runner.os }}--${{ runner.arch }}-arcenv-${{ env.CACHE_NUMBER}}
+        key: conda-${{ runner.os }}--${{ runner.arch }}-arcenv-${{ env.CACHE_NUMBER}}-${{ hashFiles('environment.yml') }}
       env:
-        # Increase this value to reset cache if etc/example-environment.yml has not changed
-        CACHE_NUMBER: 0
+            # Increase this value to reset cache if etc/example-environment.yml has not changed
+            CACHE_NUMBER: 0
       id: cache-arc-env
     - name: Update environment
       run: mamba env update -n arc_env -f environment.yml
       if: steps.cache-arc-env.outputs.cache-hit != 'true'
+
 
     - name: Install codecov
       run: mamba install -y -c conda-forge codecov
@@ -239,7 +240,7 @@ jobs:
       run: sleep 10
 
     - name: Code Coverage
-      uses: codecov/codecov-action@v3
+      uses: codecov/codecov-action@v3.1.4
       with:
         token: f259713a-7f1d-4e9c-b140-bb3bb371d3ef
         file: ./coverage.xml

diff --git a/.gitignore b/.gitignore
@@ -36,6 +36,7 @@ scratch/
 
 # csv database
 *.csv
+!basis_sets.csv
 
 # iPython files
 *.ipynb_*

diff --git a/arc/common.py b/arc/common.py
@@ -34,6 +34,8 @@
 from rmgpy.molecule.molecule import Atom, Bond, Molecule
 from rmgpy.qm.qmdata import QMData
 from rmgpy.qm.symmetry import PointGroupCalculator
+from rmgpy.molecule.resonance import generate_optimal_aromatic_resonance_structures, analyze_molecule
+from rmgpy.species import Species
 
 from arc.exceptions import InputError, SettingsError
 from arc.imports import home, settings
@@ -1524,6 +1526,20 @@
                                                        filter_structures=filter_structures,
                                                        save_order=save_order,
                                                        )
+        # # Check if the molecule is a Molecule object instance
+        # if isinstance(object_, Species):
+        #     # Check if object_.molecule is a list and then iterate over it
+        #     for i in range(len(object_.molecule)):
+        #         if isinstance(object_.molecule[i], Molecule):
+        #             # Analyse the molecule to determine whether it is an aryl radical and aromatic
+        #             features_molecule = analyze_molecule(mol=object_.molecule[i])
+        #             # If the molecule is an aryl radical and aromatic, remove the aryl radical flag
+        #             if features_molecule['is_aryl_radical'] and features_molecule['is_aromatic']:
+        #                 features_molecule['is_aryl_radical'] = False
+        #                 # Generate optimal aromatic resonance structures using the adjusted features
+        #                 mol_list = generate_optimal_aromatic_resonance_structures(mol=object_.molecule[i],    features = features_molecule, save_order=True)
+        #                 # If the list of resonance structures is not empty, update the object
+        #                 object_.molecule[i]= mol_list[0]
     except (AtomTypeError, ILPSolutionError, ResonanceError, TypeError, ValueError):
         pass
     return result

diff --git a/arc/job/adapter_test.py b/arc/job/adapter_test.py
@@ -207,6 +207,21 @@
                                     server='server3',
                                     testing=True,
                                     )
+        cls.job_6 = GaussianAdapter(execution_type='queue',
+                                    job_name='spc1',
+                                    job_type='opt',
+                                    job_id='123456',
+                                    job_num=101,
+                                    job_server_name = 'server1',
+                                    level=Level(method='cbs-qb3'),
+                                    project='test',
+                                    project_directory=os.path.join(ARC_PATH, 'arc', 'testing', 'test_JobAdapter_ServerTimeLimit'),
+                                    species=[ARCSpecies(label='spc1', xyz=['O 0 0 1'])],
+                                    server='test_server',
+                                    testing=True,
+                                    queue='short_queue',
+                                    attempted_queues=['short_queue']
+                                    )
         cls.job_6 = GaussianAdapter(execution_type='queue',
                                     job_name='spc1',
                                     job_type='opt',
@@ -354,7 +369,7 @@
         self.job_4.cpu_cores = None
         self.job_4.set_cpu_and_mem()
         self.assertEqual(self.job_4.cpu_cores, 8)
-        expected_memory = math.ceil((14 * 1024 * 1.1) / self.job_4.cpu_cores)
+        expected_memory = math.ceil((14 * 1024 * 1.1)/self.job_4.cpu_cores)
         self.assertEqual(self.job_4.submit_script_memory, expected_memory)
         self.job_4.server = 'local'
 
@@ -431,6 +446,13 @@
                                        'remote': os.path.join(self.job_1.remote_path, 'm.x'),
                                        'source': 'input_files',
                                        'make_x': True})
+
+    def test_determine_job_status(self):
+        """Test determining the job status"""
+        self.job_5.determine_job_status()
+        self.assertEqual(self.job_5.job_status[0], 'done')
+        self.assertEqual(self.job_5.job_status[1]['status'], 'errored')
+        self.assertEqual(self.job_5.job_status[1]['keywords'], ['ServerTimeLimit'])
 
     def test_determine_job_status(self):
         """Test determining the job status"""

diff --git a/arc/job/adapters/molpro.py b/arc/job/adapters/molpro.py
@@ -156,6 +156,7 @@ def __init__(self,
         self.execution_type = execution_type or 'queue'
         self.command = 'molpro'
         self.url = 'https://www.molpro.net/'
+        self.core_change = None
 
         if species is None:
             raise ValueError('Cannot execute Molpro without an ARCSpecies object.')
@@ -324,19 +325,50 @@ def set_additional_file_paths(self) -> None:
     def set_input_file_memory(self) -> None:
         """
         Set the input_file_memory attribute.
+
+        Molpro's memory is per cpu core and in MW (mega word; 1000 MW = 7.45 GB on a 64-bit machine)
+        The conversion from mW to GB was done using this (https://deviceanalytics.com/words-to-bytes-converter/)
+        specifying a 64-bit architecture.
+        See also:
+        https://www.molpro.net/pipermail/molpro-user/2010-April/003723.html
+        In the link, they describe the conversion of 100,000,000 Words (100Mword) is equivalent to
+        800,000,000 bytes (800 mb).
+        Formula - (100,000,000 [Words]/( 800,000,000 [Bytes] / (job mem in gb * 1000,000,000 [Bytes])))/ 1000,000 [Words -> MegaWords]
+        The division by 1E6 is for converting into MWords
         """
-        # Molpro's memory is per cpu core and in MW (mega word; 1000 MW = 7.45 GB on a 64-bit machine)
-        # The conversion from mW to GB was done using this (https://deviceanalytics.com/words-to-bytes-converter/)
-        # specifying a 64-bit architecture.
-        #
-        # See also:
-        # https://www.molpro.net/pipermail/molpro-user/2010-April/003723.html
-        # In the link, they describe the conversion of 100,000,000 Words (100Mword) is equivalent to
-        # 800,000,000 bytes (800 mb).
-        # Formula - (100,000,000 [Words]/( 800,000,000 [Bytes] / (job mem in gb * 1000,000,000 [Bytes])))/ 1000,000 [Words -> MegaWords]
-        # The division by 1E6 is for converting into MWords
-                # Due to Zeus's configuration, there is only 1 nproc so the memory should not be divided by cpu_cores. 
-        self.input_file_memory = math.ceil(self.job_memory_gb / (7.45e-3 * self.cpu_cores)) if 'zeus' not in socket.gethostname() else math.ceil(self.job_memory_gb / (7.45e-3))
+
+        self.input_file_memory = math.ceil(self.job_memory_gb / (7.45e-3 * self.cpu_cores))
+        # We need to check if ess_trsh_methods=['cpu'] and ess_trsh_methods=['molpro_memory:] exists
+        # If it does, we need to reduce the cpu_cores
+        if self.ess_trsh_methods is not None:
+            if 'cpu' in self.ess_trsh_methods and  any('molpro_memory:' in method for method in self.ess_trsh_methods):
+                current_cpu_cores = self.cpu_cores
+                max_memory = self.job_memory_gb
+                memory_values = []
+                for item in self.ess_trsh_methods:
+                    if 'molpro_memory:' in item:
+                        memory_value = item.split('molpro_memory:')[1]
+                        memory_values.append(float(memory_value))
+
+                if memory_values:
+                    min_memory_value = min(memory_values)
+                    available_cores = math.floor(max_memory / (min_memory_value * 7.45e-3))
+                    if self.core_change is None:
+                        self.core_change = available_cores
+                    elif self.core_change == available_cores:
+                        # We have already done this
+                        # Reduce the cores by 1
+                        available_cores -= 1
+                    if available_cores < current_cpu_cores:
+                        self.cpu_cores = available_cores
+                        logger.info(f'Changing the number of cpu_cores from {current_cpu_cores} to {self.cpu_cores}')
+                    # Situation may occur when the required memory per process by Molpro is only enough for 1 cpu core for us to use (for example, 4300 MW -> 32.04GB and if we have 64GB, we can only use 1 cpu core)
+                    # And this means that for 1 CPU, we may end up using all 64GB of memory which approximates to 8600 MW. We need to take precaution here and not use all the memory.
+                    # We will therefore, limit the MW to 4300 MW
+                    self.input_file_memory = math.ceil(self.job_memory_gb / (7.45e-3 * self.cpu_cores))
+                    if self.cpu_cores == 1 and self.input_file_memory > min_memory_value:
+                        self.input_file_memory = min_memory_value
+                        logger.info(f'Changing the input_file_memory from {self.input_file_memory} to {min_memory_value} as the number of cpu_cores will be restricted to 1 due to the memory requirements of Molpro')
 
     def execute_incore(self):
         """

diff --git a/arc/job/adapters/molpro_test.py b/arc/job/adapters/molpro_test.py
@@ -43,6 +43,37 @@ def setUpClass(cls):
                                   species=[ARCSpecies(label='spc1', xyz=['O 0 0 1'])],
                                   testing=True,
                                   )
+        cls.job_3 = MolproAdapter(execution_type='queue',
+                                  job_type='opt',
+                                  level=Level(method='CCSD(T)', basis='cc-pVQZ'),
+                                  project='test',
+                                  project_directory=os.path.join(ARC_PATH, 'arc', 'testing', 'test_MolproAdapter_2'),
+                                  species=[ARCSpecies(label='spc1', xyz=['O 0 0 1'])],
+                                  testing=True,
+                                  ess_trsh_methods=['memory','cpu', 'molpro_memory: 2800 '],
+                                  job_memory_gb=64,
+                                  )
+        cls.job_4 = MolproAdapter(execution_type='queue',
+                                  job_type='opt',
+                                  level=Level(method='CCSD(T)', basis='cc-pVQZ'),
+                                  project='test',
+                                  project_directory=os.path.join(ARC_PATH, 'arc', 'testing', 'test_MolproAdapter_2'),
+                                  species=[ARCSpecies(label='spc1', xyz=['O 0 0 1'])],
+                                  testing=True,
+                                  ess_trsh_methods=['memory','cpu', 'molpro_memory: 4300 '],
+                                  job_memory_gb=64,
+                                  )
+
+        cls.job_5 = MolproAdapter(execution_type='queue',
+                                  job_type='opt',
+                                  level=Level(method='CCSD(T)', basis='cc-pVQZ'),
+                                  project='test',
+                                  project_directory=os.path.join(ARC_PATH, 'arc', 'testing', 'test_MolproAdapter_2'),
+                                  species=[ARCSpecies(label='spc1', xyz=['O 0 0 1'])],
+                                  testing=True,
+                                  ess_trsh_methods=['memory','cpu', 'molpro_memory: 2800 '],
+                                  job_memory_gb=64,
+                                  )
 
     def test_set_cpu_and_mem(self):
         """Test assigning number of cpu's and memory"""
@@ -52,6 +83,18 @@ def test_set_cpu_and_mem(self):
         self.job_1.set_cpu_and_mem()
         self.assertEqual(self.job_1.cpu_cores, 48)
 
+    def test_memory_change(self):
+        """Test changing the memory
+
+        1. Test that the required memory is set correctly and that the number of CPUs changes accordingly
+        2. Test that the required memory requires 1 CPU and will therefore not attempt to the total memory
+        """
+        self.assertEqual(self.job_3.input_file_memory, 2864)
+        self.assertEqual(self.job_3.cpu_cores, 3)
+
+        self.assertEqual(self.job_4.input_file_memory, 4300)
+        self.assertEqual(self.job_4.cpu_cores, 1)
+
     def test_set_input_file_memory(self):
         """Test setting the input_file_memory argument"""
         self.job_1.input_file_memory = None
@@ -128,6 +171,15 @@ def test_write_input_file(self):
 """
         self.assertEqual(content_2, job_2_expected_input_file)
 
+    def test_core_reduction_logic(self):
+        """Test the core reduction logic"""
+
+        # Job 5 again to trigger the condition of the core reduction logic
+        # Job 5 technically would be 3 CPUs prior to the reactive setting the input file memory.
+        self.job_5.set_input_file_memory()
+        self.assertEqual(self.job_5.input_file_memory, 4296)
+        self.assertEqual(self.job_5.cpu_cores, 2)
+
     def test_set_files(self):
         """Test setting files"""
         job_1_files_to_upload = [{'file_name': 'submit.sub',
@@ -141,7 +193,7 @@ def test_set_files(self):
                                   'source': 'path',
                                   'make_x': False},
                                  ]
-        job_1_files_to_download = [{'file_name': 'input.out',
+        job_1_files_to_download = [{'file_name':'output.out',
                                     'local': os.path.join(self.job_1.local_path, output_filenames[self.job_1.job_adapter]),
                                     'remote': os.path.join(self.job_1.remote_path, output_filenames[self.job_1.job_adapter]),
                                     'source': 'path',