Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dask timeseries prototype #4714

Draft
wants to merge 11 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,6 @@ benchmarks/results
.idea
.vscode
*.lock

# dev
tmp/
49 changes: 49 additions & 0 deletions package/MDAnalysis/analysis/dasktimeseries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from .results import Results, ResultsGroup
import dask.array as da
import numpy as np

Check warning on line 3 in package/MDAnalysis/analysis/dasktimeseries.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/analysis/dasktimeseries.py#L1-L3

Added lines #L1 - L3 were not covered by tests


class DaskTimeSeriesAnalysisBase:

Check warning on line 6 in package/MDAnalysis/analysis/dasktimeseries.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/analysis/dasktimeseries.py#L6

Added line #L6 was not covered by tests

def __init__(self, dask_timeseries, verbose=False, **kwargs):
self._dts = dask_timeseries
self._verbose = verbose
self.results = Results()

Check warning on line 11 in package/MDAnalysis/analysis/dasktimeseries.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/analysis/dasktimeseries.py#L8-L11

Added lines #L8 - L11 were not covered by tests

def _prepare(self):
pass # pylint: disable=unnecessary-pass

Check warning on line 14 in package/MDAnalysis/analysis/dasktimeseries.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/analysis/dasktimeseries.py#L13-L14

Added lines #L13 - L14 were not covered by tests

def _compute(self):
pass

Check warning on line 17 in package/MDAnalysis/analysis/dasktimeseries.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/analysis/dasktimeseries.py#L16-L17

Added lines #L16 - L17 were not covered by tests

def _conclude(self):
pass # pylint: disable=unnecessary-pass

Check warning on line 20 in package/MDAnalysis/analysis/dasktimeseries.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/analysis/dasktimeseries.py#L19-L20

Added lines #L19 - L20 were not covered by tests

def run(self):
self._prepare()
self._compute()
self._conclude()
return self

Check warning on line 26 in package/MDAnalysis/analysis/dasktimeseries.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/analysis/dasktimeseries.py#L22-L26

Added lines #L22 - L26 were not covered by tests


class DaskRMSF(DaskTimeSeriesAnalysisBase):
def __init__(self, dask_timeseries, verbose=False, **kwargs):
super().__init__(dask_timeseries, verbose=verbose, **kwargs)

Check warning on line 31 in package/MDAnalysis/analysis/dasktimeseries.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/analysis/dasktimeseries.py#L29-L31

Added lines #L29 - L31 were not covered by tests

def _prepare(self):
n_atoms = len(self._dts[0])
self.results["rmsf"] = np.zeros((n_atoms, 3))

Check warning on line 35 in package/MDAnalysis/analysis/dasktimeseries.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/analysis/dasktimeseries.py#L33-L35

Added lines #L33 - L35 were not covered by tests

def _compute(self):
positions = self._dts
mean_positions = positions.mean(axis=0)
subtracted_positions = positions - mean_positions
squared_deviations = subtracted_positions**2
avg_squared_deviations = squared_deviations.mean(axis=0)
sqrt_avg_squared_deviations = da.sqrt(avg_squared_deviations)
self.results.rmsf = da.sqrt(

Check warning on line 44 in package/MDAnalysis/analysis/dasktimeseries.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/analysis/dasktimeseries.py#L37-L44

Added lines #L37 - L44 were not covered by tests
(sqrt_avg_squared_deviations**2).sum(axis=1)
).compute()

def _conclude(self):
pass

Check warning on line 49 in package/MDAnalysis/analysis/dasktimeseries.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/analysis/dasktimeseries.py#L48-L49

Added lines #L48 - L49 were not covered by tests
55 changes: 54 additions & 1 deletion package/MDAnalysis/coordinates/H5MD.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,9 @@

"""
import warnings

import dask.array as da
from .. import units
from typing import Any, Union, Optional, List, Dict
import numpy as np
import MDAnalysis as mda
from . import base, core
Expand Down Expand Up @@ -810,6 +812,57 @@
kwargs.setdefault('forces', self.has_forces)
return H5MDWriter(filename, n_atoms, **kwargs)

def dask_timeseries(self, asel: Optional['AtomGroup']=None,
atomgroup: Optional['Atomgroup']=None,
start: Optional[int]=None, stop: Optional[int]=None,
step: Optional[int]=None,
order: Optional[str]='fac') -> np.ndarray:
if asel is not None:
warnings.warn(

Check warning on line 821 in package/MDAnalysis/coordinates/H5MD.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/coordinates/H5MD.py#L821

Added line #L821 was not covered by tests
"asel argument to timeseries will be renamed to"
"'atomgroup' in 3.0, see #3911",
category=DeprecationWarning)
if atomgroup:
raise ValueError("Cannot provide both asel and atomgroup kwargs")
atomgroup = asel
start, stop, step = self.check_slice_indices(start, stop, step)
nframes = len(range(start, stop, step))

Check warning on line 829 in package/MDAnalysis/coordinates/H5MD.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/coordinates/H5MD.py#L826-L829

Added lines #L826 - L829 were not covered by tests

if atomgroup is not None:
if len(atomgroup) == 0:
raise ValueError(

Check warning on line 833 in package/MDAnalysis/coordinates/H5MD.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/coordinates/H5MD.py#L833

Added line #L833 was not covered by tests
"Timeseries requires at least one atom to analyze")
atom_numbers = atomgroup.indices
natoms = len(atom_numbers)

Check warning on line 836 in package/MDAnalysis/coordinates/H5MD.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/coordinates/H5MD.py#L835-L836

Added lines #L835 - L836 were not covered by tests
else:
natoms = self.n_atoms
atom_numbers = np.arange(natoms)

Check warning on line 839 in package/MDAnalysis/coordinates/H5MD.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/coordinates/H5MD.py#L838-L839

Added lines #L838 - L839 were not covered by tests

coordinates = da.from_array(self._particle_group['position']['value'],)[start:stop:step, atom_numbers, :]

Check warning on line 841 in package/MDAnalysis/coordinates/H5MD.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/coordinates/H5MD.py#L841

Added line #L841 was not covered by tests

# switch axes around
default_order = 'fac'

Check warning on line 844 in package/MDAnalysis/coordinates/H5MD.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/coordinates/H5MD.py#L844

Added line #L844 was not covered by tests
if order != default_order:
try:

Check warning on line 846 in package/MDAnalysis/coordinates/H5MD.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/coordinates/H5MD.py#L846

Added line #L846 was not covered by tests
newidx = [default_order.index(i) for i in order]
except ValueError:
raise ValueError(f"Unrecognized order key in {order}, "

Check warning on line 849 in package/MDAnalysis/coordinates/H5MD.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/coordinates/H5MD.py#L848-L849

Added lines #L848 - L849 were not covered by tests
"must be permutation of 'fac'")

try:
coordinates = da.moveaxis(coordinates, newidx, [0, 1, 2])
except ValueError:
errmsg = ("Repeated or missing keys passed to argument "

Check warning on line 855 in package/MDAnalysis/coordinates/H5MD.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/coordinates/H5MD.py#L852-L855

Added lines #L852 - L855 were not covered by tests
f"`order`: {order}, each key must be used once")
raise ValueError(errmsg)

Check warning on line 857 in package/MDAnalysis/coordinates/H5MD.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/coordinates/H5MD.py#L857

Added line #L857 was not covered by tests

f = units.get_conversion_factor('length',

Check warning on line 859 in package/MDAnalysis/coordinates/H5MD.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/coordinates/H5MD.py#L859

Added line #L859 was not covered by tests
self.units['length'], 'Angstrom')
coordinates *= f

Check warning on line 861 in package/MDAnalysis/coordinates/H5MD.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/coordinates/H5MD.py#L861

Added line #L861 was not covered by tests

return coordinates

Check warning on line 863 in package/MDAnalysis/coordinates/H5MD.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/coordinates/H5MD.py#L863

Added line #L863 was not covered by tests


@property
def has_positions(self):
"""``True`` if 'position' group is in trajectory."""
Expand Down
43 changes: 43 additions & 0 deletions tmp/env.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
name: dask-timeseries-dev
channels:
- defaults
- conda-forge
dependencies:
- chemfiles>=0.10
- codecov
- cython
- dask
- docutils
- fasteners
- griddataformats
- gsd
- h5py>=2.10
- hypothesis
- ipykernel
- joblib>=0.12
- mdanalysis-sphinx-theme >=1.3.0
- matplotlib>=3.2.2
- mmtf-python
- mock
- networkx
- numpy>=1.23.2
- pytest
- python==3.10
- pytng>=0.2.3
- scikit-learn
- scipy
- pip
- sphinx <7.0
- tidynamics>=1.0.0
- tqdm>=4.43.0
- sphinxcontrib-bibtex
- mdaencore
- waterdynamics
- pip:
- mdahole2
- pathsimanalysis
- duecredit
- parmed
- sphinx-sitemap
- packaging
- pyedr>=0.7.0
Loading
Loading