Generalized multifwn to work on different clusters

davidkastner · Jun 10, 2024 · 41106e9 · 41106e9
1 parent 51814f4
commit 41106e9
Show file tree

Hide file tree

Showing 5 changed files with 71 additions and 45 deletions.
diff --git a/demo/pyef.yaml b/demo/pyef.yaml
@@ -1,6 +1,9 @@
 # Configuration parameters
-input: ./jobs.in              # Path to the input file
-dielectric: 1                 # User defined dielectric
-ef: true                      # Run the EF workflow
-esp: false                    # Run the ESP workflow
-geometry_check: false         # Run the geometry check workflow
+input: ./jobs.in                                                 # Path to the input file
+dielectric: 1                                                    # User defined dielectric
+ef: true                                                         # Run the EF workflow
+esp: false                                                       # Run the ESP workflow
+geometry_check: false                                            # Run the geometry check workflow
+multiwfn_module: "module load multiwfn/noGUI"                    # Module command to load Multiwfn
+multiwfn_path: "/opt/Multiwfn_3.7_bin_Linux_noGUI/Multiwfn"      # Path to Multiwfn executable
+atmrad_path: "/opt/Multiwfn_3.7_bin_Linux_noGUI/examples/atmrad" # Path to the atom radius file
diff --git a/pyef/analysis.py b/pyef/analysis.py
@@ -819,14 +819,18 @@ def errorAnalysis(self, csvName):
     # new_dir: the [post-folder path to the scr folder that contains the .molden and optim.xyz file themselfs
     # dict of calcs, calculations to be performed by multiwavefunction with the corresponding keys
     # newfilanme: desired name of the .csv fiole that will be createcd in getData cotnaining all of the ESP/other data extracted un the file
-    ''' Function computes a series of ESP data using the charge scheme specified in charge types
-    Accepts:
-    charge_types: list of strings
-    ESPdata_filename: string
-        Name of the output file name
-    
-    '''
-    def getESPData(self, charge_types, ESPdata_filename):
+
+    def getESPData(self, charge_types, ESPdata_filename, multiwfn_module, multiwfn_path, atmrad_path):
+        '''
+        Function computes a series of ESP data using the charge scheme specified in charge types.
+
+        Attributes
+        ----------
+        charge_types: list of strings
+        ESPdata_filename: string
+            Name of the output file name
+
+        '''
        # Access Class Variables
         metal_idxs = self.lst_of_tmcm_idx
         folder_to_molden = self.folder_to_file_path
@@ -841,8 +845,8 @@ def getESPData(self, charge_types, ESPdata_filename):
             counter = counter + 1
             os.chdir(owd)
             os.chdir(f + folder_to_molden)
-            subprocess.call("module load multiwfn/noGUI", shell=True)
-            command_A = '/opt/Multiwfn_3.7_bin_Linux_noGUI/Multiwfn '+ 'final_optim.molden'
+            subprocess.call(multiwfn_module, shell=True)
+            command_A = f"{multiwfn_path} final_optim.molden"
             results_dir = os.getcwd() + '/'
 
             results_dict = {}
@@ -851,10 +855,10 @@ def getESPData(self, charge_types, ESPdata_filename):
             for key in charge_types:
                 print('Partial Charge Scheme:' + str(key))
                 try:
-                    full_file_path = os.getcwd() +'/final_optim_' +key+'.txt'
-                    path_to_xyz = os.getcwd() + '/final_optim.xyz'
+                    full_file_path = f"{os.getcwd()}/final_optim_{key}.txt'"
+                    path_to_xyz = f"{os.getcwd()}/final_optim.xyz'"
                     if key == "Hirshfeld_I":
-                        atmrad_src = "/opt/Multiwfn_3.7_bin_Linux_noGUI/examples/atmrad"
+                        atmrad_src = atmrad_path
                         copy_tree(atmrad_src, results_dir + 'atmrad/')
                     try: 
                         if self.inGaCageBool:
@@ -931,8 +935,9 @@ def getESPData(self, charge_types, ESPdata_filename):
         df.to_csv(ESPdata_filename +'.csv')
         return df
 
+
     # input_bond_indices is a list of a list of tuples
-    def getEFieldData(self, Efield_data_filename, input_bond_indices=[]):
+    def getEFieldData(self, Efield_data_filename, multiwfn_module, multiwfn_path, input_bond_indices=[]):
 
         metal_idxs = self.lst_of_tmcm_idx
         folder_to_molden = self.folder_to_file_path
@@ -948,7 +953,7 @@ def getEFieldData(self, Efield_data_filename, input_bond_indices=[]):
             bool_manual_mode = True
 
         for f in list_of_file:
-            load_multiwfn = "module load multiwfn/noGUI"
+            load_multiwfn = multiwfn_module
             atom_idx = metal_idxs[counter] 
             os.chdir(owd)
             os.chdir(f + folder_to_molden)
@@ -957,7 +962,7 @@ def getEFieldData(self, Efield_data_filename, input_bond_indices=[]):
             # First For this to work, the .molden file should be named: f.molden
             results_dict = {}
             results_dict['Name'] = f
-            multiwfn_path = "/opt/Multiwfn_3.7_bin_Linux_noGUI/Multiwfn"
+            multiwfn_path = multiwfn_path
             molden_filename = "final_optim.molden"
             final_structure_file = "final_optim.xyz"
             polarization_file = "final_optim_polarization.txt"
@@ -1012,17 +1017,23 @@ def getEFieldData(self, Efield_data_filename, input_bond_indices=[]):
         df.to_csv(f"{Efield_data_filename}.csv")
 
 
-
-    ''' Function computes partial charges on a select set of atoms using the charge scheme specified in charge types. Note atom indices will be carried over between csvs
-    Accepts:
-    charge_types: list of strings
-    lst_of_atom_idxs: list of integers denoting atom indices (0 indexed!)
-    partial_chg_filename: string
-        Name of the output file name
-    Returns: 
-        nothing. Will Create a csv file entitled partial_chg_filename.csv with partial charge info
-    '''
-    def getpartialchgs(self, charge_types, lst_atom_idxs, partial_chg_filename):
+    def getpartialchgs(self, charge_types, lst_atom_idxs, partial_chg_filename, multiwfn_path, multiwfn_module, atmrad_path):
+        '''
+        Function computes partial charges on a select set of atoms using the charge scheme specified in charge types. Note atom indices will be carried over between csvs
+        
+        Attributes
+        ----------
+        charge_types: list(str)
+            list of strings
+        lst_of_atom_idxs: list(int)
+            list of integers denoting atom indices (0 indexed!)
+        partial_chg_filename: string
+            Name of the output file name
+        
+        Notes
+        -----
+        Will Create a csv file entitled partial_chg_filename.csv with partial charge info
+        '''
        # Access Class Variables
         folder_to_molden = self.folder_to_file_path
         list_of_file = self.lst_of_folders
@@ -1035,8 +1046,8 @@ def getpartialchgs(self, charge_types, lst_atom_idxs, partial_chg_filename):
             counter = counter + 1
             os.chdir(owd)
             os.chdir(f + folder_to_molden)
-            subprocess.call("module load multiwfn/noGUI", shell=True)
-            command_A = '/opt/Multiwfn_3.7_bin_Linux_noGUI/Multiwfn '+ 'final_optim.molden'
+            subprocess.call(multiwfn_module, shell=True)
+            command_A = f"{multiwfn_path} final_optim.molden"
             results_dir = os.getcwd() + '/'
 
             results_dict = {}
@@ -1048,7 +1059,7 @@ def getpartialchgs(self, charge_types, lst_atom_idxs, partial_chg_filename):
                     full_file_path = os.getcwd() +'/final_optim_' +key+'.txt'
                     path_to_xyz = os.getcwd() + '/final_optim.xyz'
                     if key == "Hirshfeld_I":
-                        atmrad_src = "/opt/Multiwfn_3.7_bin_Linux_noGUI/examples/atmrad"
+                        atmrad_src = atmrad_path
                         copy_tree(atmrad_src, results_dir + 'atmrad/')
                     try:
                         for atom_idx in lst_atom_idxs:

diff --git a/pyef/cli.py b/pyef/cli.py
@@ -57,22 +57,25 @@ def run(config):
     run_ef = config_data.get('ef', False)
     run_esp = config_data.get('esp', False)
     run_geometry_check = config_data.get('esp', False)
+    multiwfn_module = config_data.get('multiwfn_module', False)
+    multiwfn_path = config_data.get('multiwfn_path', False)
+    atmrad_path = config_data.get('atmrad_path', False)
 
     if run_ef:
         """Analyzes electric fields"""
         click.echo("Importing dependencies...")
 
         jobs, metal_indices, bond_indices = parse_job_batch_file(input)
         job_name = os.path.splitext(input)[0]
-        pyef.run.main(job_name, jobs, metal_indices, bond_indices, run_geometry_check, run_esp)
+        pyef.run.main(job_name, jobs, metal_indices, bond_indices, run_geometry_check, run_esp, multiwfn_module, multiwfn_path, atmrad_path)
 
     # THE ESP SECTION IS UNDERCONSTRUCTION AND MAY NOT WORK  
     if run_esp:      
         """Analyzes electrostatic potential"""
         click.echo("Performing electrostatic analysis.")
 
         jobs, metal_indices, bond_indices = parse_job_batch_file(input)
-        pyef.run.main(jobs, run_geometry_check, run_esp, metal_indices)
+        pyef.run.main(job_name, jobs, metal_indices, bond_indices, run_geometry_check, run_esp, multiwfn_module, multiwfn_path, atmrad_path)
 
 cli.add_command(run)
 

diff --git a/pyef/manage.py b/pyef/manage.py
@@ -1,8 +1,9 @@
-"""Functions for managing files need for pyEF"""
+"""Functions for managing files needed for pyEF"""
 
 def parse_job_batch_file(file_path):
     """
     Parse a CSV file and extract specific columns as lists and tuples.
+    The input file allows Python style comments on any line.
 
     Parameters
     ----------
@@ -26,16 +27,24 @@ def parse_job_batch_file(file_path):
 
     with open(file_path, 'r') as file:
         for line in file:
-            # Skip empty lines and comments '#'
+            # Skip empty lines and comments that start with '#'
             if line.strip() == '' or line.strip().startswith('#'):
                 continue
 
-            columns = [col.strip() for col in line.strip().split(',')]
+            # Remove comments from the line
+            line = line.split('#')[0].strip()
+
+            # Skip the line if it's empty after removing the comment
+            if line == '':
+                continue
+
+            columns = [col.strip() for col in line.split(',')]
+
             # Extracting and appending data to respective lists
             jobs.append(columns[0])
             metal_index = int(columns[1])
             bonded_atom_index = int(columns[2])
             metal_indices.append(metal_index)
-            column_pairs.append([(metal_index, bonded_atom_index)])
+            column_pairs.append((metal_index, bonded_atom_index))
 
-    return jobs, metal_indices, column_pairs
+    return jobs, metal_indices, column_pairs
diff --git a/pyef/run.py b/pyef/run.py
@@ -4,7 +4,7 @@
 import argparse
 from pyef.analysis import Electrostatics
 
-def main(job_name, jobs, metal_indices, bond_indices, geom_flag, esp_flag):
+def main(job_name, jobs, metal_indices, bond_indices, geom_flag, esp_flag, multiwfn_module, multiwfn_path, atmrad_path):
     """
     Main function for running the pyEF workflow.
 
@@ -48,10 +48,10 @@ def main(job_name, jobs, metal_indices, bond_indices, geom_flag, esp_flag):
 
     if esp_flag:
         # Create CSV with ESP data
-        dataObject.getESPData(lst_charge_types, ESPdata_filename)
+        dataObject.getESPData(lst_charge_types, ESPdata_filename, multiwfn_module, multiwfn_path, atmrad_path)
 
     # Method to Compute Efield Projections on bonds connected to the atom specified by index in metal_indices
-    dataObject.getEFieldData(job_name, bond_indices)
+    dataObject.getEFieldData(job_name, multiwfn_module, multiwfn_path, bond_indices)
 
 def read_file_lines(file_path):
     """Reads in auxiliary files containing job information"""