Skip to content

Commit

Permalink
Precision handling and citations (#80)
Browse files Browse the repository at this point in the history
* type selection when saving the trajectories

* added precision also to saponify

* updated citations
  • Loading branch information
Iximiel authored Apr 11, 2023
1 parent 62837d7 commit 9e746ea
Show file tree
Hide file tree
Showing 5 changed files with 66 additions and 15 deletions.
1 change: 1 addition & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ Until we get to a release version you may encounter different broken interface p
- Added `getSOAPSettings()` for getting the SOAP organization from a hdf5 dataset
- Added `getTimeSOAPSimple()` to give the user a shortcut to solve memory problem with large SOAP datasets
- Now `createUniverseFromSlice()` returns also types
- added precision selection for storing the trajectories and the SOAP fingerprints

## Changes since v0.0.6

Expand Down
52 changes: 41 additions & 11 deletions docs/source/Citation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,19 @@ Work in progress
.. code-block::
:caption: Innate Dynamics and Identity Crisis of a Metal Surface Unveiled by Machine Learning of Atomic Environments
@misc{CioniCu,
title = {{Innate Dynamics and Identity Crisis of a Metal Surface Unveiled by Machine Learning of Atomic Environments}},
author = {Cioni, Matteo and Polino, Daniela and Rapetti, Daniele and Pesce, Luca and Dell Piane, Massimo and Pavan, Giovanni M.},
year = 2022,
archivePrefix = "arXiv",
note = {arXiv: 2207.14622},
journal = {arXiv},
publisher = {arXiv},
copyright = {Creative Commons Attribution 4.0 International},
keywords = {Materials Science (cond-mat.mtrl-sci), Computational Physics (physics.comp-ph), FOS: Physical sciences, FOS: Physical sciences}
}
@article{doi:10.1063/5.0139010,
title = {Innate dynamics and identity crisis of a metal surface unveiled by machine learning of atomic environments},
author = {Cioni,Matteo and Polino,Daniela and Rapetti,Daniele and Pesce,Luca and Delle Piane,Massimo and Pavan,Giovanni M.},
year = 2023,
journal = {The Journal of Chemical Physics},
volume = 158,
number = 12,
pages = 124701,
doi = {10.1063/5.0139010},
url = {https://doi.org/10.1063/5.0139010},
eprint = {https://doi.org/10.1063/5.0139010}
}
.. code-block::
:caption: Machine Learning of Atomic Dynamics and Statistical Surface Identities in Gold Nanoparticles
Expand All @@ -31,4 +33,32 @@ Work in progress
publisher = {ChemRxiv},
copyright = {Creative Commons Attribution 4.0 International},
keywords = {Gold nanoparticles, Machine learning, Atomic environments, Classification, Atomic dynamics, Statistical identities}
}
TimeSOAP analysis

.. code-block::
:caption: TimeSOAP: Tracking high-dimensional fluctuations in complex molecular systems via time-variations of SOAP spectra
@misc{caruso2023timesoap,
title = {TimeSOAP: Tracking high-dimensional fluctuations in complex molecular systems via time-variations of SOAP spectra},
author = {Cristina Caruso and Annalisa Cardellini and Martina Crippa and Daniele Rapetti and Giovanni M. Pavan},
year = 2023,
eprint = {2302.09673},
archiveprefix = {arXiv},
primaryclass = {physics.chem-ph}
}
LENS analysis

.. code-block::
:caption: TimeSOAP: Tracking high-dimensional fluctuations in complex molecular systems via time-variations of SOAP spectra
@misc{crippa2022detecting,
title = {Detecting dynamic domains and local fluctuations in complex molecular systems via timelapse neighbors shuffling},
author = {Martina Crippa and Annalisa Cardellini and Cristina Caruso and Giovanni M. Pavan},
year = 2022,
eprint = {2212.12694},
archiveprefix = {arXiv},
primaryclass = {physics.chem-ph}
}
13 changes: 10 additions & 3 deletions src/SOAPify/HDF5er/ToHDF5.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ def universe2HDF5(
trajFolder: h5py.Group,
trajChunkSize: int = 100,
trajslice: slice = slice(None),
useType="float64",
):
"""Uploads an mda.Universe or an mda.AtomGroup to a h5py.Group in an hdf5 file
Expand All @@ -25,12 +26,14 @@ def universe2HDF5(
trajChunkSize (int, optional):
The desired dimension of the chunks of data that are stored in the hdf5 file.
Defaults to 100.
useType (str,optional):
The precision used to store the data. Defaults to "float64".
"""

atoms = mdaTrajectory.atoms
universe = mdaTrajectory.universe
nat = len(atoms)

useType = numpy.dtype(useType)
if "Types" not in list(trajFolder.keys()):
trajFolder.create_dataset("Types", (nat), compression="gzip", data=atoms.types)

Expand All @@ -41,7 +44,7 @@ def universe2HDF5(
compression="gzip",
chunks=(trajChunkSize, nat, 3),
maxshape=(None, nat, 3),
dtype=numpy.float64,
dtype=useType,
)

if "Box" not in list(trajFolder.keys()):
Expand All @@ -51,7 +54,7 @@ def universe2HDF5(
compression="gzip",
chunks=True,
maxshape=(None, 6),
dtype=numpy.float64,
dtype=useType,
)

frameNum = 0
Expand Down Expand Up @@ -82,6 +85,7 @@ def MDA2HDF5(
override: bool = False,
attrs: dict = None,
trajslice: slice = slice(None),
useType="float64",
):
"""Creates an HDF5 trajectory groupfrom an mda trajectory
Expand All @@ -106,6 +110,8 @@ def MDA2HDF5(
override (bool, optional):
If true the hdf5 file will be completely overwritten.
Defaults to False.
useType (str,optional):
The precision used to store the data. Defaults to "float64".
"""
with h5py.File(targetHDF5File, "w" if override else "a") as newTraj:
trajGroup = newTraj.require_group(f"Trajectories/{groupName}")
Expand All @@ -114,6 +120,7 @@ def MDA2HDF5(
trajGroup,
trajChunkSize=trajChunkSize,
trajslice=trajslice,
useType=useType,
)
if attrs:
for key in attrs.keys():
Expand Down
14 changes: 13 additions & 1 deletion src/SOAPify/saponify.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ def _applySOAP(
SOAPnJobs: int = 1,
doOverride: bool = False,
verbose: bool = True,
useType="float64",
):
"""helper function: applies the soap engine to the given trajectory within the trajContainer
Expand All @@ -132,7 +133,10 @@ def _applySOAP(
verbose (bool, optional):
regulates the verbosity of the step by step operations.
Defaults to True.
useType (str,optional):
The precision used to store the data. Defaults to "float64".
"""
useType = numpy.dtype(useType)
nOfFeatures = soapEngine.features
symbols = trajContainer["Types"].asstr()[:]
nCenters = (
Expand All @@ -157,7 +161,7 @@ def _applySOAP(
compression_opts=9,
chunks=(SOAPOutputChunkDim, nCenters, nOfFeatures),
maxshape=(None, nCenters, nOfFeatures),
dtype=numpy.float64,
dtype=useType,
)
SOAPout = SOAPoutContainer[key]
SOAPout.resize((len(trajContainer["Trajectory"]), nCenters, nOfFeatures))
Expand Down Expand Up @@ -186,6 +190,7 @@ def saponifyMultipleTrajectories(
useSoapFrom: KNOWNSOAPENGINES = "dscribe",
doOverride: bool = False,
verbose: bool = True,
useType="float64",
):
"""Calculates and stores the SOAP descriptor for all of the trajectories in
the given group/file
Expand Down Expand Up @@ -240,6 +245,8 @@ def saponifyMultipleTrajectories(
verbose (bool, optional):
regulates the verbosity of the step by step operations.
Defaults to True.
useType (str,optional):
The precision used to store the data. Defaults to "float64".
"""
for key in trajContainers.keys():
if isTrajectoryGroup(trajContainers[key]):
Expand All @@ -258,6 +265,7 @@ def saponifyMultipleTrajectories(
useSoapFrom=useSoapFrom,
doOverride=doOverride,
verbose=verbose,
useType=useType,
)


Expand All @@ -276,6 +284,7 @@ def saponifyTrajectory(
useSoapFrom: KNOWNSOAPENGINES = "dscribe",
doOverride: bool = False,
verbose: bool = True,
useType="float64",
):
"""Calculates the SOAP fingerprints for each atom in a given hdf5 trajectory
Expand Down Expand Up @@ -329,6 +338,8 @@ def saponifyTrajectory(
verbose (bool, optional):
regulates the verbosity of the step by step operations.
Defaults to True.
useType (str,optional):
The precision used to store the data. Defaults to "float64".
"""
if isTrajectoryGroup(trajContainer):
print(f'using "{useSoapFrom}" to calculate SOAP for "{trajContainer.name}"')
Expand All @@ -355,6 +366,7 @@ def saponifyTrajectory(
SOAPnJobs,
doOverride=doOverride,
verbose=verbose,
useType=useType,
)
else:
raise ValueError("saponify: The input object is not a trajectory group.")
1 change: 1 addition & 0 deletions tests/test_HDF5er.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
"""Test for HDF5er"""
import SOAPify.HDF5er as HDF5er
import h5py
import numpy
Expand Down

0 comments on commit 9e746ea

Please sign in to comment.