-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdatageneration.py
137 lines (106 loc) · 5.18 KB
/
datageneration.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import openmm as mm
from openmm import app, unit
import numpy as np
from dotenv import load_dotenv
import os
load_dotenv()
MLP_WEIGHTPATH = os.getenv('MLP_WEIGHTPATH')
PES_WEIGHTPATH = os.getenv('PES_WEIGHTPATH')
DATAPATH = os.getenv('DATAPATH')
#need to be manually change
def create_water_topology():
# Initialize the topology
topology = app.Topology()
chain = topology.addChain()
residue = topology.addResidue("water", chain)
element_O = app.Element.getByAtomicNumber(8)
element_H = app.Element.getByAtomicNumber(1)
atom0 = topology.addAtom("O", element_O, residue)
atom1 = topology.addAtom("H", element_H, residue)
atom2 = topology.addAtom("H", element_H, residue)
residue_1 = topology.addResidue("water1", chain)
atom3 = topology.addAtom("O", element_O, residue_1)
atom4 = topology.addAtom("H", element_H, residue_1)
atom5 = topology.addAtom("H", element_H, residue_1)
topology.addBond(atom0, atom1)
topology.addBond(atom0, atom2)
topology.addBond(atom3, atom4)
topology.addBond(atom3, atom5)
return topology
def setup_simulation(topology):
# Load the forcefield for water
forcefield = app.ForceField("amber14/tip3pfb.xml")
# Create the system based on the topology with a specified cutoff for nonbonded interactions
system = forcefield.createSystem(topology, nonbondedCutoff=1 * unit.nanometer)
# Define an integrator
integrator = mm.VerletIntegrator(2 * unit.femtoseconds)
# Initialize the simulation
simulation = app.Simulation(topology, system, integrator)
return simulation
# def generate_configuration(num_atoms):
# # Generate random configurations within a 2x2x2 Å box centered at the origin
# return np.random.rand(num_atoms, 3) * 2 - 1
# def generate_configurations(num_atoms, num_configs):
# """
# Generates random 3D configurations for a specified number of atoms across multiple configurations.
# Args:
# num_atoms (int): Number of atoms per configuration.
# num_configs (int): Number of configurations to generate.
# Returns:
# np.ndarray: Array of shape (num_configs, num_atoms, 3) containing the configurations.
# """
# # Each dimension will have coordinates in the range [-1, 1) multiplied by the box size, centered at the origin
# return np.random.rand(num_configs, num_atoms, 3) * 10 - 5
def calculate_potential_energy_and_forces(simulation, configuration):
# Set the positions of the atoms in the simulation
simulation.context.setPositions(configuration * unit.angstroms)
# Get the state of the simulation including the potential energy
state = simulation.context.getState(getEnergy=True,getForces=True)
pe = state.getPotentialEnergy()
forces = state.getForces(asNumpy=True)
return pe, forces
import numpy as np
from scipy.spatial.transform import Rotation as R
def generate_molecule():
"""Generates a single water molecule with random variations in bond lengths."""
# Oxygen at origin, hydrogens in the xz-plane
O = np.array([0.0, 0.0, 0.0])
theta = np.deg2rad(104.5) / 2 # Half the bond angle
# Random bond length between 0.4572 and 1.4572 Å (base length 0.9572 ± 0.5 Å)
r = 0.9572 + np.random.uniform(-0.5, 0.5)
H1 = np.array([r * np.cos(theta), 0.0, r * np.sin(theta)])
H2 = np.array([r * np.cos(theta), 0.0, -r * np.sin(theta)])
return np.array([O, H1, H2])
def generate_water_dimer():
"""Generates a configuration for a water dimer."""
molecule1 = generate_molecule()
molecule2 = generate_molecule()
# Random rotation and translation of the second molecule
rotation = R.random().as_matrix()
molecule2 = molecule2 @ rotation # Apply rotation
# Set a reasonable range for the intermolecular O-O distance
translation = np.array([np.random.uniform(2.5, 3.5), np.random.uniform(-1, 1), np.random.uniform(-1, 1)])
molecule2 += translation
return np.vstack((molecule1, molecule2))
def generate_configurations(num_configs):
"""Generate multiple dimer configurations with variable bond lengths."""
return np.array([generate_water_dimer() for _ in range(num_configs)])
# Main execution
if __name__ == '__main__':
num_atoms = 6 # For a water molecule (H2O)
num_configs = 40000 # Number of configurations
configurations = generate_configurations(num_configs)
# Create the topology and set up the simulation
topology = create_water_topology()
simulation = setup_simulation(topology)
# Initialize an array to store the potential energies
potential_energies = np.zeros(num_configs)
forces = np.zeros((num_configs, num_atoms, 3))
# Calculate potential energy for each configuration
for i, config in enumerate(configurations):
potential_energy, force = calculate_potential_energy_and_forces(simulation, config)
potential_energies[i] = potential_energy.value_in_unit(unit.kilojoules_per_mole)
forces[i] = force.value_in_unit(unit.kilojoules_per_mole/unit.angstroms)
# Save configurations and potential energies to an NPZ file
np.savez(DATAPATH, configurations=configurations, potentials=potential_energies, forces=forces)
print("Data has been saved to "+ DATAPATH)