Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make some tests use deterministic torch algorithms #108

Merged
merged 3 commits into from
Jul 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/self-hosted-gpu-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ jobs:
run: |
conda activate nnpops
cd build
export CUBLAS_WORKSPACE_CONFIG=:4096:8
ctest --verbose

stop-runner:
Expand Down
56 changes: 34 additions & 22 deletions src/pytorch/TestBatchedNN.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,40 +34,52 @@ def test_import():
import NNPOps
import NNPOps.BatchedNN

class DeterministicTorch:
def __enter__(self):
if torch.are_deterministic_algorithms_enabled():
self._already_enabled = True
return
self._already_enabled = False
torch.use_deterministic_algorithms(True)

def __exit__(self, type, value, traceback):
if not self._already_enabled:
torch.use_deterministic_algorithms(False)

@pytest.mark.parametrize('deviceString', ['cpu', 'cuda'])
@pytest.mark.parametrize('molFile', ['1hvj', '1hvk', '2iuz', '3hkw', '3hky', '3lka', '3o99'])
def test_compare_with_native(deviceString, molFile):

if deviceString == 'cuda' and not torch.cuda.is_available():
pytest.skip('CUDA is not available')
with DeterministicTorch():
from NNPOps.BatchedNN import TorchANIBatchedNN

from NNPOps.BatchedNN import TorchANIBatchedNN
device = torch.device(deviceString)

device = torch.device(deviceString)

mol = mdtraj.load(os.path.join(molecules, f'{molFile}_ligand.mol2'))
atomicNumbers = torch.tensor([[atom.element.atomic_number for atom in mol.top.atoms]], device=device)
atomicPositions = torch.tensor(mol.xyz, dtype=torch.float32, requires_grad=True, device=device)
mol = mdtraj.load(os.path.join(molecules, f'{molFile}_ligand.mol2'))
atomicNumbers = torch.tensor([[atom.element.atomic_number for atom in mol.top.atoms]], device=device)
atomicPositions = torch.tensor(mol.xyz, dtype=torch.float32, requires_grad=True, device=device)

nnp = torchani.models.ANI2x(periodic_table_index=True).to(device)
energy_ref = nnp((atomicNumbers, atomicPositions)).energies
energy_ref.backward()
grad_ref = atomicPositions.grad.clone()
nnp = torchani.models.ANI2x(periodic_table_index=True).to(device)
energy_ref = nnp((atomicNumbers, atomicPositions)).energies
energy_ref.backward()
grad_ref = atomicPositions.grad.clone()

nnp.neural_networks = TorchANIBatchedNN(nnp.species_converter, nnp.neural_networks, atomicNumbers).to(device)
energy = nnp((atomicNumbers, atomicPositions)).energies
atomicPositions.grad.zero_()
energy.backward()
grad = atomicPositions.grad.clone()
nnp.neural_networks = TorchANIBatchedNN(nnp.species_converter, nnp.neural_networks, atomicNumbers).to(device)
energy = nnp((atomicNumbers, atomicPositions)).energies
atomicPositions.grad.zero_()
energy.backward()
grad = atomicPositions.grad.clone()

energy_error = torch.abs((energy - energy_ref)/energy_ref)
grad_error = torch.max(torch.abs((grad - grad_ref)/grad_ref))
energy_error = torch.abs((energy - energy_ref)/energy_ref)
grad_error = torch.max(torch.abs((grad - grad_ref)/grad_ref))

assert energy_error < 5e-7
if molFile == '3o99':
assert grad_error < 0.025 # Some numerical instability
else:
assert grad_error < 5e-3
assert energy_error < 5e-7
if molFile == '3o99':
assert grad_error < 0.025 # Some numerical instability
else:
assert grad_error < 5e-3

@pytest.mark.parametrize('deviceString', ['cpu', 'cuda'])
@pytest.mark.parametrize('molFile', ['1hvj', '1hvk', '2iuz', '3hkw', '3hky', '3lka', '3o99'])
Expand Down
76 changes: 44 additions & 32 deletions src/pytorch/TestCFConv.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,18 @@ def test_gradients(deviceString):
# return torch.sum(conv(neighbors, pos, input))
# assert torch.autograd.gradcheck(func, positions)

class DeterministicTorch:
def __enter__(self):
if torch.are_deterministic_algorithms_enabled():
self._already_enabled = True
return
self._already_enabled = False
torch.use_deterministic_algorithms(True)

def __exit__(self, type, value, traceback):
if not self._already_enabled:
torch.use_deterministic_algorithms(False)

@pytest.mark.parametrize('deviceString', ['cpu', 'cuda'])
def test_model_serialization(deviceString):

Expand All @@ -94,35 +106,35 @@ def test_model_serialization(deviceString):
device = torch.device(deviceString)
numAtoms = 7
numFilters = 5

neighbors_ref, conv_ref = getCFConv(numFilters, device)
positions = (10*torch.rand(numAtoms, 3, dtype=torch.float32, device=device) - 5).detach()
positions.requires_grad = True
input = torch.rand(numAtoms, numFilters, dtype=torch.float32, device=device)

neighbors_ref.build(positions)
output_ref = conv_ref(neighbors_ref, positions, input)
total_ref = torch.sum(output_ref)
total_ref.backward()
grad_ref = positions.grad.clone()

with tempfile.NamedTemporaryFile() as fd1, tempfile.NamedTemporaryFile() as fd2:

torch.jit.script(neighbors_ref).save(fd1.name)
neighbors = torch.jit.load(fd1.name).to(device)

torch.jit.script(conv_ref).save(fd2.name)
conv = torch.jit.load(fd2.name).to(device)

neighbors.build(positions)
output = conv(neighbors, positions, input)
total = torch.sum(output)
positions.grad.zero_()
total.backward()
grad = positions.grad.clone()

assert torch.allclose(output, output_ref, rtol=1e-07)
if deviceString == 'cuda':
assert torch.allclose(grad, grad_ref, rtol=1e-07, atol=1e-6) # Numerical noise
else:
assert torch.allclose(grad, grad_ref, rtol=1e-07)
with DeterministicTorch():
neighbors_ref, conv_ref = getCFConv(numFilters, device)
positions = (10*torch.rand(numAtoms, 3, dtype=torch.float32, device=device) - 5).detach()
positions.requires_grad = True
input = torch.rand(numAtoms, numFilters, dtype=torch.float32, device=device)

neighbors_ref.build(positions)
output_ref = conv_ref(neighbors_ref, positions, input)
total_ref = torch.sum(output_ref)
total_ref.backward()
grad_ref = positions.grad.clone()

with tempfile.NamedTemporaryFile() as fd1, tempfile.NamedTemporaryFile() as fd2:

torch.jit.script(neighbors_ref).save(fd1.name)
neighbors = torch.jit.load(fd1.name).to(device)

torch.jit.script(conv_ref).save(fd2.name)
conv = torch.jit.load(fd2.name).to(device)

neighbors.build(positions)
output = conv(neighbors, positions, input)
total = torch.sum(output)
positions.grad.zero_()
total.backward()
grad = positions.grad.clone()

assert torch.allclose(output, output_ref, rtol=1e-07)
if deviceString == 'cuda':
assert torch.allclose(grad, grad_ref, rtol=1e-07, atol=1e-6) # Numerical noise
else:
assert torch.allclose(grad, grad_ref, rtol=1e-07)
Loading