From 060947aad5b3141ff67ee492fc04d399b394b625 Mon Sep 17 00:00:00 2001
From: wiederm <marcus.wieder@gmail.com>
Date: Wed, 10 Jan 2024 15:12:40 +0100
Subject: [PATCH] Update reporters in integrators and mcmc modules

---
 chiron/integrators.py            |  8 +--
 chiron/mcmc.py                   |  9 ++--
 chiron/reporters.py              | 90 +++++++++++++++++++++++---------
 chiron/tests/test_integrators.py | 10 ++--
 4 files changed, 81 insertions(+), 36 deletions(-)

diff --git a/chiron/integrators.py b/chiron/integrators.py
index b9120fd..b905fb6 100644
--- a/chiron/integrators.py
+++ b/chiron/integrators.py
@@ -4,7 +4,7 @@
 from jax import random
 from openmm import unit
 from .states import SamplerState, ThermodynamicState
-from .reporters import SimulationReporter
+from .reporters import LangevinIntegrator
 from typing import Optional
 
 
@@ -25,7 +25,7 @@ def __init__(
         stepsize=1.0 * unit.femtoseconds,
         collision_rate=1.0 / unit.picoseconds,
         save_frequency: int = 100,
-        reporter: Optional[SimulationReporter] = None,
+        reporter: Optional[LangevinIntegrator] = None,
         save_traj_in_memory: bool = False,
     ) -> None:
         """
@@ -53,8 +53,8 @@ def __init__(
 
         self.stepsize = stepsize
         self.collision_rate = collision_rate
-        if reporter is not None:
-            log.info(f"Using reporter {reporter} saving to {reporter.filename}")
+        if reporter:
+            log.info(f"Using reporter {reporter} saving to {reporter.file_path}")
             self.reporter = reporter
         self.save_frequency = save_frequency
         self.save_traj_in_memory = save_traj_in_memory
diff --git a/chiron/mcmc.py b/chiron/mcmc.py
index 2cb7d75..078d9c2 100644
--- a/chiron/mcmc.py
+++ b/chiron/mcmc.py
@@ -2,7 +2,8 @@
 from openmm import unit
 from typing import Tuple, List, Optional
 import jax.numpy as jnp
-from chiron.reporters import SimulationReporter
+from chiron.reporters import _SimulationReporter
+
 
 class MCMCMove:
     def __init__(self, nr_of_moves: int, seed: int):
@@ -27,7 +28,7 @@ def __init__(
         self,
         stepsize=1.0 * unit.femtoseconds,
         collision_rate=1.0 / unit.picoseconds,
-        simulation_reporter: Optional[SimulationReporter] = None,
+        simulation_reporter: Optional[LangevinIntegrator] = None,
         nr_of_steps=1_000,
         seed: int = 1234,
         save_traj_in_memory: bool = False,
@@ -349,7 +350,7 @@ def apply(
         self,
         thermodynamic_state: ThermodynamicState,
         sampler_state: SamplerState,
-        reporter: SimulationReporter,
+        reporter: _SimulationReporter,
         nbr_list=None,
     ):
         """Apply a metropolized move to the sampler state.
@@ -497,7 +498,7 @@ def __init__(
         displacement_sigma=1.0 * unit.nanometer,
         nr_of_moves: int = 100,
         atom_subset: Optional[List[int]] = None,
-        simulation_reporter: Optional[SimulationReporter] = None,
+        simulation_reporter: Optional[_SimulationReporter] = None,
     ):
         """
         Initialize the MCMC class.
diff --git a/chiron/reporters.py b/chiron/reporters.py
index 6230a44..7924778 100644
--- a/chiron/reporters.py
+++ b/chiron/reporters.py
@@ -7,8 +7,30 @@
 from openmm.app import Topology
 
 
-class SimulationReporter:
-    def __init__(self, filename: str, topology: Topology, buffer_size: int = 1):
+class BaseReporter:
+    _directory = None
+
+    @classmethod
+    def set_directory(cls, directory: str):
+        cls._directory = directory
+
+    @classmethod
+    def get_directory(cls):
+        from pathlib import Path
+
+        if cls._directory is None:
+            log.debug(
+                f"No directory set, using current working directory: {Path.cwd()}"
+            )
+            return Path.cwd()
+        return Path(cls._directory)
+
+
+import pathlib
+
+
+class _SimulationReporter:
+    def __init__(self, file_path: pathlib.Path, buffer_size: int = 10):
         """
         Initialize the SimulationReporter.
 
@@ -16,17 +38,15 @@ def __init__(self, filename: str, topology: Topology, buffer_size: int = 1):
         ----------
         filename : str
             Name of the HDF5 file to write the simulation data.
-        topology: openmm.Topology
-        buffer_size : int, optional
-            Number of data points to buffer before writing to disk (default is 1).
-
         """
-        self.filename = filename
+        if file_path.suffix != ".h5":
+            file_path = file_path.with_suffix(".h5")
+        self.file_path = file_path
+        log.info(f"Writing simulation data to {self.file_path}")
+
         self.buffer_size = buffer_size
-        self.topology = topology
         self.buffer = {}
-        self.h5file = h5py.File(filename, "a")
-        log.info(f"Writing simulation data to {filename}")
+        self.h5file = h5py.File(self.file_path, "a")
 
     def get_available_keys(self):
         return self.h5file.keys()
@@ -40,7 +60,6 @@ def report(self, data_dict):
         data_dict : dict
             Dictionary containing data to report. Keys are data labels (e.g., 'energy'),
             and values are the data points (usually numpy arrays).
-
         """
         for key, value in data_dict.items():
             if key not in self.buffer:
@@ -50,7 +69,7 @@ def report(self, data_dict):
             if len(self.buffer[key]) >= self.buffer_size:
                 self._write_to_disk(key)
 
-    def _write_to_disk(self, key:str):
+    def _write_to_disk(self, key: str):
         """
         Write buffered data of a given key to the HDF5 file.
 
@@ -66,7 +85,7 @@ def _write_to_disk(self, key:str):
             dset.resize((dset.shape[0] + data.shape[0],) + data.shape[1:])
             dset[-data.shape[0] :] = data
         else:
-            log.debug(f"Creating {key} in {self.filename}")
+            log.debug(f"Creating {key} in {self.file_path}")
             self.h5file.create_dataset(
                 key, data=data, maxshape=(None,) + data.shape[1:], chunks=True
             )
@@ -99,11 +118,40 @@ def get_property(self, name: str):
 
         """
         if name not in self.h5file:
-            log.debug(f"{name} not in HDF5 file")
+            log.warning(f"{name} not in HDF5 file")
             return None
         else:
             return np.array(self.h5file[name])
 
+
+class LangevinDynamicsReporter(_SimulationReporter):
+    _name = "langevin_reporter"
+
+    def __init__(self, topology: Topology, name: str = "", buffer_size: int = 1):
+        """
+        Initialize the SimulationReporter.
+
+        Parameters
+        ----------
+        topology: openmm.Topology
+        buffer_size : int, optional
+            Number of data points to buffer before writing to disk (default is 1).
+
+        """
+        filename = LangevinDynamicsReporter.get_name()
+        directory = BaseReporter.get_directory()
+        import os
+
+        os.makedirs(directory, exist_ok=True)
+        self.file_path = directory / f"{filename}_{name}"
+
+        self.topology = topology
+        super().__init__(self.file_path)
+
+    @classmethod
+    def get_name(cls):
+        return cls._name
+
     def get_mdtraj_trajectory(self):
         import mdtraj as md
 
@@ -115,21 +163,15 @@ def get_mdtraj_trajectory(self):
         )
 
 
-
 class MultistateReporter:
-    
-    def __init__(self, path_to_dir:str) -> None:
+    def __init__(self, path_to_dir: str) -> None:
         self.path_to_dir = path_to_dir
-        
+
     def _write_trajectories():
         pass
-    
+
     def _write_energies():
         pass
-    
+
     def _write_states():
         pass
-    
-        
-    
-    
\ No newline at end of file
diff --git a/chiron/tests/test_integrators.py b/chiron/tests/test_integrators.py
index 203b409..d50e485 100644
--- a/chiron/tests/test_integrators.py
+++ b/chiron/tests/test_integrators.py
@@ -30,14 +30,16 @@ def test_langevin_dynamics(prep_temp_dir, provide_testsystems_and_potentials):
         )
 
         sampler_state = SamplerState(testsystem.positions)
-        from chiron.reporters import SimulationReporter
+        from chiron.reporters import LangevinDynamicsReporter
+        from chiron.reporters import BaseReporter
 
-        reporter = SimulationReporter(f"{prep_temp_dir}/test{i}.h5", None, 1)
+        BaseReporter.set_directory(prep_temp_dir)
+        reporter = LangevinDynamicsReporter(None, name=f"test{i}")
 
-        integrator = LangevinIntegrator(reporter=reporter)
+        integrator = LangevinIntegrator(reporter=reporter, save_frequency=1)
         integrator.run(
             sampler_state,
             thermodynamic_state,
-            n_steps=5,
+            n_steps=20,
         )
         i = i + 1