From 41106e9859e2f8c8e2cd2f738d042a4e875d9295 Mon Sep 17 00:00:00 2001 From: David Kastner Date: Mon, 10 Jun 2024 15:18:40 -0400 Subject: [PATCH] Generalized multifwn to work on different clusters --- demo/pyef.yaml | 13 +++++---- pyef/analysis.py | 71 ++++++++++++++++++++++++++++-------------------- pyef/cli.py | 7 +++-- pyef/manage.py | 19 +++++++++---- pyef/run.py | 6 ++-- 5 files changed, 71 insertions(+), 45 deletions(-) diff --git a/demo/pyef.yaml b/demo/pyef.yaml index 47f800f..abc98bb 100644 --- a/demo/pyef.yaml +++ b/demo/pyef.yaml @@ -1,6 +1,9 @@ # Configuration parameters -input: ./jobs.in # Path to the input file -dielectric: 1 # User defined dielectric -ef: true # Run the EF workflow -esp: false # Run the ESP workflow -geometry_check: false # Run the geometry check workflow \ No newline at end of file +input: ./jobs.in # Path to the input file +dielectric: 1 # User defined dielectric +ef: true # Run the EF workflow +esp: false # Run the ESP workflow +geometry_check: false # Run the geometry check workflow +multiwfn_module: "module load multiwfn/noGUI" # Module command to load Multiwfn +multiwfn_path: "/opt/Multiwfn_3.7_bin_Linux_noGUI/Multiwfn" # Path to Multiwfn executable +atmrad_path: "/opt/Multiwfn_3.7_bin_Linux_noGUI/examples/atmrad" # Path to the atom radius file \ No newline at end of file diff --git a/pyef/analysis.py b/pyef/analysis.py index 6530a98..2e7bc52 100644 --- a/pyef/analysis.py +++ b/pyef/analysis.py @@ -819,14 +819,18 @@ def errorAnalysis(self, csvName): # new_dir: the [post-folder path to the scr folder that contains the .molden and optim.xyz file themselfs # dict of calcs, calculations to be performed by multiwavefunction with the corresponding keys # newfilanme: desired name of the .csv fiole that will be createcd in getData cotnaining all of the ESP/other data extracted un the file - ''' Function computes a series of ESP data using the charge scheme specified in charge types - Accepts: - charge_types: list of strings - ESPdata_filename: string - Name of the output file name - - ''' - def getESPData(self, charge_types, ESPdata_filename): + + def getESPData(self, charge_types, ESPdata_filename, multiwfn_module, multiwfn_path, atmrad_path): + ''' + Function computes a series of ESP data using the charge scheme specified in charge types. + + Attributes + ---------- + charge_types: list of strings + ESPdata_filename: string + Name of the output file name + + ''' # Access Class Variables metal_idxs = self.lst_of_tmcm_idx folder_to_molden = self.folder_to_file_path @@ -841,8 +845,8 @@ def getESPData(self, charge_types, ESPdata_filename): counter = counter + 1 os.chdir(owd) os.chdir(f + folder_to_molden) - subprocess.call("module load multiwfn/noGUI", shell=True) - command_A = '/opt/Multiwfn_3.7_bin_Linux_noGUI/Multiwfn '+ 'final_optim.molden' + subprocess.call(multiwfn_module, shell=True) + command_A = f"{multiwfn_path} final_optim.molden" results_dir = os.getcwd() + '/' results_dict = {} @@ -851,10 +855,10 @@ def getESPData(self, charge_types, ESPdata_filename): for key in charge_types: print('Partial Charge Scheme:' + str(key)) try: - full_file_path = os.getcwd() +'/final_optim_' +key+'.txt' - path_to_xyz = os.getcwd() + '/final_optim.xyz' + full_file_path = f"{os.getcwd()}/final_optim_{key}.txt'" + path_to_xyz = f"{os.getcwd()}/final_optim.xyz'" if key == "Hirshfeld_I": - atmrad_src = "/opt/Multiwfn_3.7_bin_Linux_noGUI/examples/atmrad" + atmrad_src = atmrad_path copy_tree(atmrad_src, results_dir + 'atmrad/') try: if self.inGaCageBool: @@ -931,8 +935,9 @@ def getESPData(self, charge_types, ESPdata_filename): df.to_csv(ESPdata_filename +'.csv') return df + # input_bond_indices is a list of a list of tuples - def getEFieldData(self, Efield_data_filename, input_bond_indices=[]): + def getEFieldData(self, Efield_data_filename, multiwfn_module, multiwfn_path, input_bond_indices=[]): metal_idxs = self.lst_of_tmcm_idx folder_to_molden = self.folder_to_file_path @@ -948,7 +953,7 @@ def getEFieldData(self, Efield_data_filename, input_bond_indices=[]): bool_manual_mode = True for f in list_of_file: - load_multiwfn = "module load multiwfn/noGUI" + load_multiwfn = multiwfn_module atom_idx = metal_idxs[counter] os.chdir(owd) os.chdir(f + folder_to_molden) @@ -957,7 +962,7 @@ def getEFieldData(self, Efield_data_filename, input_bond_indices=[]): # First For this to work, the .molden file should be named: f.molden results_dict = {} results_dict['Name'] = f - multiwfn_path = "/opt/Multiwfn_3.7_bin_Linux_noGUI/Multiwfn" + multiwfn_path = multiwfn_path molden_filename = "final_optim.molden" final_structure_file = "final_optim.xyz" polarization_file = "final_optim_polarization.txt" @@ -1012,17 +1017,23 @@ def getEFieldData(self, Efield_data_filename, input_bond_indices=[]): df.to_csv(f"{Efield_data_filename}.csv") - - ''' Function computes partial charges on a select set of atoms using the charge scheme specified in charge types. Note atom indices will be carried over between csvs - Accepts: - charge_types: list of strings - lst_of_atom_idxs: list of integers denoting atom indices (0 indexed!) - partial_chg_filename: string - Name of the output file name - Returns: - nothing. Will Create a csv file entitled partial_chg_filename.csv with partial charge info - ''' - def getpartialchgs(self, charge_types, lst_atom_idxs, partial_chg_filename): + def getpartialchgs(self, charge_types, lst_atom_idxs, partial_chg_filename, multiwfn_path, multiwfn_module, atmrad_path): + ''' + Function computes partial charges on a select set of atoms using the charge scheme specified in charge types. Note atom indices will be carried over between csvs + + Attributes + ---------- + charge_types: list(str) + list of strings + lst_of_atom_idxs: list(int) + list of integers denoting atom indices (0 indexed!) + partial_chg_filename: string + Name of the output file name + + Notes + ----- + Will Create a csv file entitled partial_chg_filename.csv with partial charge info + ''' # Access Class Variables folder_to_molden = self.folder_to_file_path list_of_file = self.lst_of_folders @@ -1035,8 +1046,8 @@ def getpartialchgs(self, charge_types, lst_atom_idxs, partial_chg_filename): counter = counter + 1 os.chdir(owd) os.chdir(f + folder_to_molden) - subprocess.call("module load multiwfn/noGUI", shell=True) - command_A = '/opt/Multiwfn_3.7_bin_Linux_noGUI/Multiwfn '+ 'final_optim.molden' + subprocess.call(multiwfn_module, shell=True) + command_A = f"{multiwfn_path} final_optim.molden" results_dir = os.getcwd() + '/' results_dict = {} @@ -1048,7 +1059,7 @@ def getpartialchgs(self, charge_types, lst_atom_idxs, partial_chg_filename): full_file_path = os.getcwd() +'/final_optim_' +key+'.txt' path_to_xyz = os.getcwd() + '/final_optim.xyz' if key == "Hirshfeld_I": - atmrad_src = "/opt/Multiwfn_3.7_bin_Linux_noGUI/examples/atmrad" + atmrad_src = atmrad_path copy_tree(atmrad_src, results_dir + 'atmrad/') try: for atom_idx in lst_atom_idxs: diff --git a/pyef/cli.py b/pyef/cli.py index c9f33b4..39d4d6e 100644 --- a/pyef/cli.py +++ b/pyef/cli.py @@ -57,6 +57,9 @@ def run(config): run_ef = config_data.get('ef', False) run_esp = config_data.get('esp', False) run_geometry_check = config_data.get('esp', False) + multiwfn_module = config_data.get('multiwfn_module', False) + multiwfn_path = config_data.get('multiwfn_path', False) + atmrad_path = config_data.get('atmrad_path', False) if run_ef: """Analyzes electric fields""" @@ -64,7 +67,7 @@ def run(config): jobs, metal_indices, bond_indices = parse_job_batch_file(input) job_name = os.path.splitext(input)[0] - pyef.run.main(job_name, jobs, metal_indices, bond_indices, run_geometry_check, run_esp) + pyef.run.main(job_name, jobs, metal_indices, bond_indices, run_geometry_check, run_esp, multiwfn_module, multiwfn_path, atmrad_path) # THE ESP SECTION IS UNDERCONSTRUCTION AND MAY NOT WORK if run_esp: @@ -72,7 +75,7 @@ def run(config): click.echo("Performing electrostatic analysis.") jobs, metal_indices, bond_indices = parse_job_batch_file(input) - pyef.run.main(jobs, run_geometry_check, run_esp, metal_indices) + pyef.run.main(job_name, jobs, metal_indices, bond_indices, run_geometry_check, run_esp, multiwfn_module, multiwfn_path, atmrad_path) cli.add_command(run) diff --git a/pyef/manage.py b/pyef/manage.py index 3cae658..b17c389 100644 --- a/pyef/manage.py +++ b/pyef/manage.py @@ -1,8 +1,9 @@ -"""Functions for managing files need for pyEF""" +"""Functions for managing files needed for pyEF""" def parse_job_batch_file(file_path): """ Parse a CSV file and extract specific columns as lists and tuples. + The input file allows Python style comments on any line. Parameters ---------- @@ -26,16 +27,24 @@ def parse_job_batch_file(file_path): with open(file_path, 'r') as file: for line in file: - # Skip empty lines and comments '#' + # Skip empty lines and comments that start with '#' if line.strip() == '' or line.strip().startswith('#'): continue - columns = [col.strip() for col in line.strip().split(',')] + # Remove comments from the line + line = line.split('#')[0].strip() + + # Skip the line if it's empty after removing the comment + if line == '': + continue + + columns = [col.strip() for col in line.split(',')] + # Extracting and appending data to respective lists jobs.append(columns[0]) metal_index = int(columns[1]) bonded_atom_index = int(columns[2]) metal_indices.append(metal_index) - column_pairs.append([(metal_index, bonded_atom_index)]) + column_pairs.append((metal_index, bonded_atom_index)) - return jobs, metal_indices, column_pairs \ No newline at end of file + return jobs, metal_indices, column_pairs diff --git a/pyef/run.py b/pyef/run.py index d2d93a8..0ec0413 100644 --- a/pyef/run.py +++ b/pyef/run.py @@ -4,7 +4,7 @@ import argparse from pyef.analysis import Electrostatics -def main(job_name, jobs, metal_indices, bond_indices, geom_flag, esp_flag): +def main(job_name, jobs, metal_indices, bond_indices, geom_flag, esp_flag, multiwfn_module, multiwfn_path, atmrad_path): """ Main function for running the pyEF workflow. @@ -48,10 +48,10 @@ def main(job_name, jobs, metal_indices, bond_indices, geom_flag, esp_flag): if esp_flag: # Create CSV with ESP data - dataObject.getESPData(lst_charge_types, ESPdata_filename) + dataObject.getESPData(lst_charge_types, ESPdata_filename, multiwfn_module, multiwfn_path, atmrad_path) # Method to Compute Efield Projections on bonds connected to the atom specified by index in metal_indices - dataObject.getEFieldData(job_name, bond_indices) + dataObject.getEFieldData(job_name, multiwfn_module, multiwfn_path, bond_indices) def read_file_lines(file_path): """Reads in auxiliary files containing job information"""