diff --git a/pyef/analysis.py b/pyef/analysis.py index e61971a..46f3be6 100644 --- a/pyef/analysis.py +++ b/pyef/analysis.py @@ -32,13 +32,14 @@ class Electrostatics: hasECP: boolean indicates if an effective core potential was used... in this case, molden file will need to be re-formatted to be compatible multiwfn! ''' - def __init__(self, lst_of_folders, lst_of_tmcm_idx, folder_to_file_path, inGaCage, hasECP=False): + def __init__(self, lst_of_folders, lst_of_tmcm_idx, folder_to_file_path, inGaCage, hasECP=False, includePtChgs=False): self.lst_of_folders = lst_of_folders self.lst_of_tmcm_idx = lst_of_tmcm_idx self.folder_to_file_path = folder_to_file_path self.dict_of_calcs = {'Hirshfeld': '1', 'Voronoi':'2', 'Mulliken': '5', 'Lowdin': '6', 'SCPA': '7', 'Becke': '10', 'ADCH': '11', 'CHELPG': '12', 'MK':'13', 'AIM': '14', 'Hirshfeld_I': '15', 'CM5':'16', 'EEM': '17', 'RESP': '18', 'PEOE': '19'} self.inGaCageBool = inGaCage self.dielectric = 1 + self.ptChgs = includePtChgs #To avoid over-estimating screening from bound atoms, set dielectric to 1 for primary bound atoms in ESP calv self.changeDielectBoundBool = False @@ -82,6 +83,9 @@ def __init__(self, lst_of_folders, lst_of_tmcm_idx, folder_to_file_path, inGaCag 'Cm': (247.07, 96, 1.69, 10), 'Bk': (247.07, 97, 1.68, 11), 'Cf': (251.08, 98, 1.68, 12)} self.prepData() + def includePtChgs(self): + self.ptChgs = True + def minDielecBonds(self, bool_bonds): self.changeDielectBoundBool = bool_bonds @@ -159,9 +163,8 @@ def prepData(self): os.chdir(owd) - - # Accepts path to the xyz file and returns a dataframe containing the atoms names and the coordinates + #Accepts path to the xyz file and returns a dataframe containing the atoms names and the coordinates def getGeomInfo(self, filepathtoxyz): data = [] counter_idx = 0 @@ -174,6 +177,9 @@ def getGeomInfo(self, filepathtoxyz): tokens = line.split() if len(tokens) == 4: # Assuming atom name and x, y, z coordinates are present atom_name = tokens[0] + #for xyz in QMMM simulation, pnt charges at end of file, skip them! + if atom_name == 'pnt': + break x, y, z = map(float, tokens[1:]) rad = self.amassdict[atom_name][2] data.append([counter_idx, atom_name, x, y, z, rad]) @@ -262,7 +268,13 @@ def getmultipoles(multipole_name): atomDict = {"Index": index, "Element": element, "Atom_Charge": atomic_charge, 'Dipole_Moment': dipole_moment, 'Quadrupole_Moment': quadrupole_moment} atomicDicts.append(atomDict) return atomicDicts - + #Function will process a point charge file (as generated by amber) and return a dataframe with partial charges and coordinates for use in ESP calculation + def getPtChgs(self, filename_pt): + chg_df = pd.read_table(filename_pt, skiprows=2, delim_whitespace=True, names=['charge', 'x', 'y', 'z']) + atm_name = ['pnt'] + atoms = atm_name*len(chg_df['charge']) + chg_df['Atom'] = atoms + return chg_df # Define the functions to calculate the ESP: def mapcount(filename): @@ -383,6 +395,17 @@ def calcesp(self, path_to_xyz, espatom_idx, charge_range, charge_file): ys = df['y'] zs = df['z'] + #For QMMM calculation, include point charges in ESP calculation + if self.ptChgs: + ptchg_filename = 'ptchrg.xyz' + init_file_path = path_to_xyz[0:-len('final_optim.xyz')] + full_ptchg_fp = init_file_path + ptchg_filename + df_ptchg = self.getPtChgs(full_ptchg_fp) + xs = xs + df_ptchg['x'] + ys = ys + df_ptchg['y'] + zs = zs + df_ptchg['z'] + charges = charges + df_ptchg['charge'] + atoms = atoms + df_ptchg['Atom'] # Pick the index of the atom at which the esp should be calculated idx_atom = espatom_idx @@ -453,8 +476,6 @@ def calc_firstTermE(espatom_idx, charge_range, charge_file): C_e = 1.6023*(10**-19) one_mol = 6.02*(10**23) - print('This is the charge range...') - print(charge_range) for idx in charge_range: if idx == idx_atom: continue @@ -633,7 +654,7 @@ def ESP_all_calcs(self, path_to_xyz, filename, atom_idx, cageTrue): print('ESP just cage: ' + str(ESP_just_cage) + ' kJ/(mol*e)') return [ESP_all, ESP_just_ligand, ESP_just_cage, atom_type] else: - # print('ESP for all atoms: ' + str(ESP_all) + ' kJ/(mol*e)') + print('ESP for all atoms: ' + str(ESP_all) + ' kJ/(mol*e)') return [ESP_all, atom_type] @@ -666,6 +687,19 @@ def esp_bydistance(self, path_to_xyz, espatom_idx, charge_file): yo = ys[idx_atom] zo = zs[idx_atom] chargeo = charges[idx_atom] + + #For QMMM calculation, include point charges in ESP calculation + if self.ptChgs: + ptchg_filename = 'ptchrg.xyz' + init_file_path = path_to_xyz[0:-len('final_optim.xyz')] + full_ptchg_fp = init_file_path + ptchg_filename + df_ptchg = self.getPtChgs(full_ptchg_fp) + xs = xs + df_ptchg['x'] + ys = ys + df_ptchg['y'] + zs = zs + df_ptchg['z'] + charges = charges + df_ptchg['charge'] + atoms = atoms + df_ptchg['Atom'] + total_esp = 0 # Create an ordering of the atoms based on distance from the central atom total_atoms = len(xs) @@ -686,14 +720,33 @@ def esp_bydistance(self, path_to_xyz, espatom_idx, charge_file): distances.append(r) esps.append(k*(1/dielectric)*C_e*cal_J*faraday*charges[idx]/r) # Now we sort the distance list, and use sorted indices to sort the + atm_lst = list(atoms) + atm_lst.pop(idx_atom) + + chg_lst = list(charges) + chg_lst.pop(idx_atom) + chg_arr = np.array(chg_lst) + dist_arr = np.array(distances) - sorted_idx = np.argsort(dist_arr) + init_sorted_idx = np.argsort(dist_arr) + lst_sorted_idx = list(init_sorted_idx) + lst_sorted_idx.append(len(lst_sorted_idx)) + lst_sorted_idx.remove(idx_atom) + sorted_idx = np.array(lst_sorted_idx) + esp_arr = np.array(esps) - sorted_esps = esp_arr.take(sorted_idx) + sorted_esps = esp_arr.take(init_sorted_idx) cumulative_esps = np.cumsum(sorted_esps) - sorted_dist = dist_arr.take(sorted_idx) - return [sorted_dist, sorted_esps, cumulative_esps] + sorted_dist = dist_arr.take(init_sorted_idx) + sorted_partial_charges = chg_arr.take(init_sorted_idx) + sorted_atomTypes = [atm_lst[i] for i in init_sorted_idx] + + print('Here is idx_atom: ' + str(idx_atom)) + new_bool_still = idx_atom in sorted_idx + print(new_bool_still) + #final_sorted_idx = np.delete(init_sorted_idx, init_sorted_idx[idx_atom]) + return [sorted_dist, sorted_esps, cumulative_esps, sorted_idx, sorted_partial_charges, sorted_atomTypes] # Function that can be called on a class object to compute key error analysis metrics for a transition metal complex def errorAnalysis(self, csvName): @@ -747,6 +800,7 @@ def getESPData(self, charge_types, ESPdata_filename): allspeciesdict = [] counter = 0 # Iterator to account for atomic indices of interest for f in list_of_file: + print('-----------------' + str(f) + '------------------') atom_idx = metal_idxs[counter] counter = counter + 1 os.chdir(owd) @@ -759,7 +813,7 @@ def getESPData(self, charge_types, ESPdata_filename): results_dict['Name'] = f for key in charge_types: - print('Current key:' + str(key)) + print('Partial Charge Scheme:' + str(key)) try: full_file_path = os.getcwd() +'/final_optim_' +key+'.txt' path_to_xyz = os.getcwd() + '/final_optim.xyz' @@ -774,7 +828,7 @@ def getESPData(self, charge_types, ESPdata_filename): [ESP_all, atom_type] = self.ESP_all_calcs(path_to_xyz, full_file_path, atom_idx, self.inGaCageBool) [total_charge,partial_charge_atom] = Electrostatics.charge_atom(full_file_path, atom_idx) - [sorted_distances, sorted_esps, cum_esps] = self.esp_bydistance(path_to_xyz, atom_idx, full_file_path) + [sorted_distances, sorted_esps, cum_esps, sorted_cum_idx, sorted_cum_chg, sorted_atomTypes] = self.esp_bydistance(path_to_xyz, atom_idx, full_file_path) ESP_fcoord = self.esp_first_coord(atom_idx, full_file_path, path_to_xyz) ESP_scoord = self.esp_second_coord(atom_idx, full_file_path, path_to_xyz) @@ -800,7 +854,7 @@ def getESPData(self, charge_types, ESPdata_filename): [ESP_all, atom_type] = self.ESP_all_calcs(full_file_path, atom_idx, self.inGaCageBool) [total_charge,partial_charge_atom] = Electrostatics.charge_atom(full_file_path, atom_idx) - [sorted_distances, sorted_esps, cum_esps] = self.esp_bydistance(path_to_xyz, atom_idx, full_file_path) + [sorted_distances, sorted_esps, cum_esps, sorted_cum_idx, sorted_cum_chg, sorted_atomTypes] = self.esp_bydistance(path_to_xyz, atom_idx, full_file_path) ESP_fcoord = self.esp_first_coord(atom_idx, full_file_path, path_to_xyz) ESP_scoord = self.esp_second_coord(atom_idx, full_file_path, path_to_xyz) @@ -819,6 +873,9 @@ def getESPData(self, charge_types, ESPdata_filename): results_dict['Sorted Distances'] = sorted_distances results_dict['Sorted ESP '+ str(key)] = sorted_esps results_dict['Cumulative ESP ' + str(key)] = cum_esps + results_dict['Dist Sorted Idxs' + str(key)] = sorted_cum_idx + results_dict['Dist Sorted Partial Charges' + str(key)] = sorted_cum_chg + results_dict['Dist Sorted Atom Types' + str(key)] = sorted_atomTypes # If .molden files deal with encapsulated TMCS, complete an additional set of analyses if self.inGaCageBool: