From 1ecc1ac20e51d2cfe8af6cf78d993ba64f41c54d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20R=C3=B6hling?= <tobias.roehling@cfel.de>
Date: Mon, 6 Sep 2021 13:30:29 +0200
Subject: [PATCH 01/29] Added pycharm folders to gitignore

---
 .gitignore | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.gitignore b/.gitignore
index 4b0a28a..20a2a61 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,3 +11,6 @@
 .DS_Store
 .vscode/settings.json
 build/
+venv
+.idea
+

From cd63f7c8ad6fc101fe8d9923b7345189cf3df807 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20R=C3=B6hling?= <tobias.roehling@cfel.de>
Date: Mon, 6 Sep 2021 13:32:12 +0200
Subject: [PATCH 02/29] Completed install_requires list with missing entries: -
 h5py is newly required for post-processing pipeline

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 3c08eb6..98c5d8e 100644
--- a/setup.py
+++ b/setup.py
@@ -35,7 +35,7 @@
 
 requires = []
 
-install_requires = ["numpy", "sklearn", "pyyaml"]
+install_requires = ["numpy", "sklearn", "pyyaml", "pyzmq", "pyserial", "h5py"]
 
 console_scripts = ["pymepix-acq=pymepix.pymepix:main"]
 

From d7aaf27bd7a346eb943ae0223694b39da1a03934 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20R=C3=B6hling?= <tobias.roehling@cfel.de>
Date: Mon, 6 Sep 2021 14:03:07 +0200
Subject: [PATCH 03/29] Extracted main.py from pymepix.py

---
 pymepix/main.py    | 233 +++++++++++++++++++++++++++++++++++++++++++++
 pymepix/pymepix.py | 153 +----------------------------
 2 files changed, 234 insertions(+), 152 deletions(-)
 create mode 100644 pymepix/main.py

diff --git a/pymepix/main.py b/pymepix/main.py
new file mode 100644
index 0000000..8bafe62
--- /dev/null
+++ b/pymepix/main.py
@@ -0,0 +1,233 @@
+# This file is part of Pymepix
+#
+# In all scientific work using Pymepix, please reference it as
+#
+# A. F. Al-Refaie, M. Johny, J. Correa, D. Pennicard, P. Svihra, A. Nomerotski, S. Trippel, and J. Küpper:
+# "PymePix: a python library for SPIDR readout of Timepix3", J. Inst. 14, P10003 (2019)
+# https://doi.org/10.1088/1748-0221/14/10/P10003
+# https://arxiv.org/abs/1905.07999
+#
+# Pymepix is free software: you can redistribute it and/or modify it under the terms of the GNU
+# General Public License as published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with this program. If not,
+# see <https://www.gnu.org/licenses/>.
+
+""" Main module for pymepix """
+
+import time
+import argparse
+import logging
+import time
+
+import pymepix.config.load_config as cfg
+from pymepix.processing.rawfilesampler import RawFileSampler
+
+from pymepix.processing.datatypes import MessageType
+from pymepix.pymepix import PollBufferEmpty, Pymepix
+from pymepix.util.storage import open_output_file, store_raw, store_toa, store_tof
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+)
+
+
+def connect_timepix(args):
+    # Connect to SPIDR
+    pymepix = Pymepix((args.ip, args.port))
+    # If there are no valid timepix detected then quit()
+    if len(pymepix) == 0:
+        logging.error(
+            "-------ERROR: SPIDR FOUND BUT NO VALID TIMEPIX DEVICE DETECTED ---------- "
+        )
+        quit()
+    if args.spx:
+        logging.info("Opening Sophy file {}".format(args.spx))
+        pymepix[0].loadConfig(args.spx)
+
+    # Switch to TOF mode if set
+    if args.decode and args.tof:
+        pymepix[0].acquisition.enableEvents = True
+
+    # Set the bias voltage
+    pymepix.biasVoltage = args.bias
+
+    ext = "raw"
+    if args.decode:
+        logging.info("Decoding data enabled")
+        if args.tof:
+            logging.info("Tof calculation enabled")
+            ext = "tof"
+        else:
+            ext = "toa"
+    else:
+        logging.info("No decoding selected")
+
+    output_file = open_output_file(args.output, ext)
+
+    total_time = args.time
+
+    # self._timepix._spidr.resetTimers()
+    # self._timepix._spidr.restartTimers()
+    # time.sleep(1)  # give camera time to reset timers
+
+    # Start acquisition
+    pymepix.start()
+    # start raw2disk
+    # pymepix._timepix_devices[0]._acquisition_pipeline._stages[0]._pipeline_objects[0].outfile_name = args.output
+    # pymepix._timepix_devices[0]._acquisition_pipeline._stages[0]._pipeline_objects[0]._raw2Disk.timer = 1
+    # pymepix._timepix_devices[0]._acquisition_pipeline._stages[0]._pipeline_objects[0].record = 1
+
+    start_time = time.time()
+    logging.info("------Starting acquisition---------")
+
+    while time.time() - start_time < total_time:
+        try:
+            data_type, data = pymepix.poll()
+        except PollBufferEmpty:
+            continue
+        logging.debug("Datatype: {} Data:{}".format(data_type, data))
+        if data_type is MessageType.RawData:
+            if not args.decode:
+                store_raw(output_file, data)
+        elif data_type is MessageType.PixelData:
+            if args.decode and not args.tof:
+                store_toa(output_file, data)
+        elif data_type is MessageType.PixelData:
+            if args.decode and args.tof:
+                store_tof(output_file, data)
+
+    pymepix.stop()
+
+def post_process(args):
+
+    file_sampler = RawFileSampler(args.file.name, args.output_file, args.number_of_processes, 
+                                    args.timewalk_file, args.cent_timewalk_file)
+    start_time = time.time()
+    file_sampler.run()
+    stop_time = time.time()
+
+    print(f'took: {stop_time - start_time}s')
+
+def main():
+
+    parser = argparse.ArgumentParser(description="Timepix acquisition script")
+    subparsers = parser.add_subparsers(required=True)
+
+    parser_connect = subparsers.add_parser('connect', help='Connect to TimePix camera and acquire data.')
+    parser_connect.set_defaults(func=connect_timepix)
+
+    parser_connect.add_argument(
+        "-i",
+        "--ip",
+        dest="ip",
+        type=str,
+        default=cfg.default_cfg["timepix"]["tpx_ip"],
+        help="IP address of Timepix",
+    )
+    parser_connect.add_argument(
+        "-p",
+        "--port",
+        dest="port",
+        type=int,
+        default=50000,
+        help="TCP port to use for the connection",
+    )
+    parser_connect.add_argument(
+        "-s", "--spx", dest="spx", type=str, help="Sophy config file to load"
+    )
+    parser_connect.add_argument(
+        "-v",
+        "--bias",
+        dest="bias",
+        type=float,
+        default=50,
+        help="Bias voltage in Volts",
+    )
+    parser_connect.add_argument(
+        "-t",
+        "--time",
+        dest="time",
+        type=float,
+        help="Acquisition time in seconds",
+        required=True,
+    )
+    parser_connect.add_argument(
+        "-o",
+        "--output",
+        dest="output",
+        type=str,
+        help="output filename prefix",
+        required=True,
+    )
+    parser_connect.add_argument(
+        "-d",
+        "--decode",
+        dest="decode",
+        type=bool,
+        help="Store decoded values instead",
+        default=False,
+    )
+    parser_connect.add_argument(
+        "-T",
+        "--tof",
+        dest="tof",
+        type=bool,
+        help="Compute TOF if decode is enabled",
+        default=False,
+    )
+
+    parser_post_process = subparsers.add_parser('post-process', help='Perform post-processing with a acquired raw data file.')
+    parser_post_process.set_defaults(func=post_process)
+    parser_post_process.add_argument(
+        "-f",
+        "--file",
+        dest="file",
+        type=argparse.FileType('rb'),
+        help="Raw data file for postprocessing",
+        required=True
+    )
+    parser_post_process.add_argument(
+        "-o",
+        "--output_file",
+        dest="output_file",
+        type=str,
+        help="Filename where the processed data is stored",
+        required=True
+    )
+    parser_post_process.add_argument(
+        "-t",
+        "--timewalk_file",
+        dest="timewalk_file",
+        type=argparse.FileType('rb'),
+        help="File containing the time walk information"
+    )
+    parser_post_process.add_argument(
+        "-c",
+        "--cent_timewalk_file",
+        dest="cent_timewalk_file",
+        type=argparse.FileType('rb'),
+        help="File containing the centroided time walk information"
+    )
+    parser_post_process.add_argument(
+        "-n",
+        "--number_of_processes",
+        dest="number_of_processes",
+        type=int,
+        default=-1,
+        help="The number of processes used for the centroiding (default: None which ensures all existing system cores are used')"
+    )
+
+    args = parser.parse_args()
+
+    args.func(args)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/pymepix/pymepix.py b/pymepix/pymepix.py
index 6d57b8a..cebfc4f 100644
--- a/pymepix/pymepix.py
+++ b/pymepix/pymepix.py
@@ -17,9 +17,6 @@
 #
 # You should have received a copy of the GNU General Public License along with this program. If not,
 # see <https://www.gnu.org/licenses/>.
-
-"""Main module for pymepix"""
-
 import threading
 import time
 from collections import deque
@@ -27,7 +24,6 @@
 
 import pymepix.config.load_config as cfg
 from pymepix.core.log import Logger
-
 from .SPIDR.spidrcontroller import SPIDRController
 from .timepixdevice import TimepixDevice
 
@@ -268,151 +264,4 @@ def __len__(self):
         return len(self._timepix_devices)
 
     def getDevice(self, num):
-        return self._timepix_devices[num]
-
-
-def main():
-    import argparse
-    import logging
-    import time
-
-    from .processing.datatypes import MessageType
-    from .util.storage import open_output_file, store_raw, store_toa, store_tof
-
-    logging.basicConfig(
-        level=logging.INFO,
-        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
-    )
-
-    parser = argparse.ArgumentParser(description="Timepix acquisition script")
-    parser.add_argument(
-        "-i",
-        "--ip",
-        dest="ip",
-        type=str,
-        default=cfg.default_cfg["timepix"]["tpx_ip"],
-        help="IP address of Timepix",
-    )
-    parser.add_argument(
-        "-p",
-        "--port",
-        dest="port",
-        type=int,
-        default=50000,
-        help="TCP port to use for the connection",
-    )
-    parser.add_argument(
-        "-s", "--spx", dest="spx", type=str, help="Sophy config file to load"
-    )
-    parser.add_argument(
-        "-v",
-        "--bias",
-        dest="bias",
-        type=float,
-        default=50,
-        help="Bias voltage in Volts",
-    )
-    parser.add_argument(
-        "-t",
-        "--time",
-        dest="time",
-        type=float,
-        help="Acquisition time in seconds",
-        required=True,
-    )
-    parser.add_argument(
-        "-o",
-        "--output",
-        dest="output",
-        type=str,
-        help="output filename prefix",
-        required=True,
-    )
-    parser.add_argument(
-        "-d",
-        "--decode",
-        dest="decode",
-        type=bool,
-        help="Store decoded values instead",
-        default=False,
-    )
-    parser.add_argument(
-        "-T",
-        "--tof",
-        dest="tof",
-        type=bool,
-        help="Compute TOF if decode is enabled",
-        default=False,
-    )
-
-    args = parser.parse_args()
-
-    # Connect to SPIDR
-    pymepix = Pymepix((args.ip, args.port))
-    # If there are no valid timepix detected then quit()
-    if len(pymepix) == 0:
-        logging.error(
-            "-------ERROR: SPIDR FOUND BUT NO VALID TIMEPIX DEVICE DETECTED ---------- "
-        )
-        quit()
-    if args.spx:
-        logging.info("Opening Sophy file {}".format(args.spx))
-        pymepix[0].loadConfig(args.spx)
-
-    # Switch to TOF mode if set
-    if args.decode and args.tof:
-        pymepix[0].acquisition.enableEvents = True
-
-    # Set the bias voltage
-    pymepix.biasVoltage = args.bias
-
-    ext = "raw"
-    if args.decode:
-        logging.info("Decoding data enabled")
-        if args.tof:
-            logging.info("Tof calculation enabled")
-            ext = "tof"
-        else:
-            ext = "toa"
-    else:
-        logging.info("No decoding selected")
-
-    output_file = open_output_file(args.output, ext)
-
-    total_time = args.time
-
-    # self._timepix._spidr.resetTimers()
-    # self._timepix._spidr.restartTimers()
-    # time.sleep(1)  # give camera time to reset timers
-
-    # Start acquisition
-    pymepix.start()
-    # start raw2disk
-    # pymepix._timepix_devices[0]._acquisition_pipeline._stages[0]._pipeline_objects[0].outfile_name = args.output
-    # pymepix._timepix_devices[0]._acquisition_pipeline._stages[0]._pipeline_objects[0]._raw2Disk.timer = 1
-    # pymepix._timepix_devices[0]._acquisition_pipeline._stages[0]._pipeline_objects[0].record = 1
-
-    start_time = time.time()
-    logging.info("------Starting acquisition---------")
-
-    while time.time() - start_time < total_time:
-        try:
-            data_type, data = pymepix.poll()
-        except PollBufferEmpty:
-            continue
-        logging.debug("Datatype: {} Data:{}".format(data_type, data))
-        if data_type is MessageType.RawData:
-            if not args.decode:
-                store_raw(output_file, data)
-        elif data_type is MessageType.PixelData:
-            if args.decode and not args.tof:
-                store_toa(output_file, data)
-        elif data_type is MessageType.PixelData:
-            if args.decode and args.tof:
-                store_tof(output_file, data)
-
-    pymepix.stop()
-
-
-if __name__ == "__main__":
-    main()
+        return self._timepix_devices[num]
\ No newline at end of file

From 91663d35a799cdfe82f4b0ce7acd064119447bf0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20R=C3=B6hling?= <tobias.roehling@cfel.de>
Date: Mon, 6 Sep 2021 14:04:58 +0200
Subject: [PATCH 04/29] Multi processing logging issue resolved: - Logging lead
 to a resource leak due to the logging queue not being closed - this could not
 be resolved with the implemented logging functionality - logging was reduced
 to very basic python logging, which should be fine - the previous logging was
 not really useful and overcomplicated things heavily

---
 pymepix/core/log.py | 118 ++++++++------------------------------------
 1 file changed, 20 insertions(+), 98 deletions(-)

diff --git a/pymepix/core/log.py b/pymepix/core/log.py
index 90d2754..338942d 100644
--- a/pymepix/core/log.py
+++ b/pymepix/core/log.py
@@ -19,13 +19,12 @@
 # see <https://www.gnu.org/licenses/>.
 
 
+from abc import abstractmethod
 import logging
-import threading
-from multiprocessing import Queue
+import multiprocessing as mp
 
 __all__ = ["Logger", "ProcessLogger"]
 
-
 class PymepixLogger(object):
     """Base class for logging in pymepix
 
@@ -41,91 +40,19 @@ class PymepixLogger(object):
 
     """
 
-    _proc_log_queue = Queue()
-
-    _init = False
-
-    @classmethod
-    def getLogQueue(cls):
-        """Provides logging queue for multiprocessing logging
-
-        Returns
-        --------
-        :obj:`multiprocessing.Queue`
-            Queue where logs should go
-
-        """
-        return cls._proc_log_queue
-
-    @classmethod
-    def _logging_thread(cls):
-        """This thread collects logs from Processes and writes them to stream"""
-
-        thread_log = PymepixLogger.getLogger("log_thread")
-        log_queue = cls.getLogQueue()
-
-        thread_log.info("Starting Multiprocess logging")
-        while True:
-            name, log_level, message, args, kwargs = log_queue.get()
-            _log = logging.getLogger(name)
-            _log.log(log_level, message, *args, **kwargs)
-
-    @classmethod
-    def getRootLogger(cls):
-        return cls._root_logger
-
-    @classmethod
-    def getLogger(cls, name):
-        return logging.getLogger("pymepix.{}".format(name))
-
-    @classmethod
-    def reInit(cls):
-        if cls._init is False:
-            cls._root_logger = logging.getLogger("pymepix")
-
-            cls._root_logger.info("Reinitializing PymepixLogger")
-            cls._log_thread = threading.Thread(target=cls._logging_thread)
-            cls._log_thread.daemon = True
-            cls._log_thread.start()
-        cls._init = True
+    
 
     def __init__(self, name):
+        self._logger = self.getLogger(name)
         self._log_name = "pymepix.{}".format(name)
-        PymepixLogger.reInit()
 
     @property
     def logName(self):
         return self._log_name
 
-    def info(self, message, *args, **kwargs):
-        pass
-
-    def warning(self, message, *args, **kwargs):
-        pass
-
-    def debug(self, message, *args, **kwargs):
-        pass
-
-    def error(self, message, *args, **kwargs):
-        pass
-
-    def critical(self, message, *args, **kwargs):
-        pass
-
-
-class Logger(PymepixLogger):
-    """Standard logging using logger library
-
-    Parameters
-    -----------
-    name : str
-        Name used for logging
-
-    """
-
-    def __init__(self, name):
-        PymepixLogger.__init__(self, name)
-        self._logger = logging.getLogger(self.logName)
+    @abstractmethod
+    def getLogger(cls, name):
+        pass 
 
     def info(self, message, *args, **kwargs):
         """ See :class:`logging.Logger` """
@@ -148,8 +75,8 @@ def critical(self, message, *args, **kwargs):
         self._logger.critical(message, *args, **kwargs)
 
 
-class ProcessLogger(PymepixLogger):
-    """Sends logs to queue to be processed by logging thread
+class Logger(PymepixLogger):
+    """Standard logging using logger library
 
     Parameters
     -----------
@@ -161,27 +88,22 @@ class ProcessLogger(PymepixLogger):
     def __init__(self, name):
         PymepixLogger.__init__(self, name)
         self._logger = logging.getLogger(self.logName)
-        self._log_queue = PymepixLogger.getLogQueue()
 
-    def info(self, message, *args, **kwargs):
-        """ See :class:`logging.Logger` """
-        self._log_queue.put((self._log_name, logging.INFO, message, args, kwargs))
+    def getLogger(cls, name):
+        return logging.getLogger("pymepix.{}".format(name))
 
-    def warning(self, message, *args, **kwargs):
-        """ See :class:`logging.Logger` """
-        self._log_queue.put((self._log_name, logging.WARNING, message, args, kwargs))
 
-    def debug(self, message, *args, **kwargs):
-        """ See :class:`logging.Logger` """
-        self._log_queue.put((self._log_name, logging.DEBUG, message, args, kwargs))
+class ProcessLogger(PymepixLogger):
+    """Sends logs to queue to be processed by logging thread
 
-    def error(self, message, *args, **kwargs):
-        """ See :class:`logging.Logger` """
-        self._log_queue.put((self._log_name, logging.ERROR, message, args, kwargs))
+    Parameters
+    -----------
+    name : str
+        Name used for logging
 
-    def critical(self, message, *args, **kwargs):
-        """ See :class:`logging.Logger` """
-        self._log_queue.put((self._log_name, logging.CRITICAL, message, args, kwargs))
+    """
+    def getLogger(self, name):
+        return mp.get_logger()
 
 
 def main():

From 5c731c65382a0a8a624c955b0da2c0485102d37e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20R=C3=B6hling?= <tobias.roehling@cfel.de>
Date: Mon, 6 Sep 2021 14:40:34 +0200
Subject: [PATCH 05/29] Rename pymepix class to solve name conflict with
 pymepix package.

---
 pymepix/__init__.py                           | 2 +-
 pymepix/main.py                               | 4 ++--
 pymepix/{pymepix.py => pymepix_connection.py} | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)
 rename pymepix/{pymepix.py => pymepix_connection.py} (99%)

diff --git a/pymepix/__init__.py b/pymepix/__init__.py
index ad06abc..6ae2e9c 100644
--- a/pymepix/__init__.py
+++ b/pymepix/__init__.py
@@ -19,5 +19,5 @@
 # see <https://www.gnu.org/licenses/>.
 
 from pymepix.processing import MessageType
-from pymepix.pymepix import PollBufferEmpty, Pymepix
+from pymepix.pymepix_connection import PollBufferEmpty, PymepixConnection
 from pymepix.timepixdef import *
diff --git a/pymepix/main.py b/pymepix/main.py
index 8bafe62..849d00a 100644
--- a/pymepix/main.py
+++ b/pymepix/main.py
@@ -29,7 +29,7 @@
 from pymepix.processing.rawfilesampler import RawFileSampler
 
 from pymepix.processing.datatypes import MessageType
-from pymepix.pymepix import PollBufferEmpty, Pymepix
+from pymepix.pymepix_connection import PollBufferEmpty, PymepixConnection
 from pymepix.util.storage import open_output_file, store_raw, store_toa, store_tof
 
 logging.basicConfig(
@@ -40,7 +40,7 @@
 
 def connect_timepix(args):
     # Connect to SPIDR
-    pymepix = Pymepix((args.ip, args.port))
+    pymepix = PymepixConnection((args.ip, args.port))
     # If there are no valid timepix detected then quit()
     if len(pymepix) == 0:
         logging.error(
diff --git a/pymepix/pymepix.py b/pymepix/pymepix_connection.py
similarity index 99%
rename from pymepix/pymepix.py
rename to pymepix/pymepix_connection.py
index cebfc4f..d99fba4 100644
--- a/pymepix/pymepix.py
+++ b/pymepix/pymepix_connection.py
@@ -32,7 +32,7 @@ class PollBufferEmpty(Exception):
     pass
 
 
-class Pymepix(Logger):
+class PymepixConnection(Logger):
     """High level class to work with timepix and perform acquistion
 
     This class performs connection to SPIDR, initilization of timepix and handling of acquisition.

From f2bd824f0ccde478bf9316aeddcba5c5ceae6939 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20R=C3=B6hling?= <tobias.roehling@cfel.de>
Date: Mon, 6 Sep 2021 14:43:19 +0200
Subject: [PATCH 06/29] #15 Implementation of post processing - massive
 refactoring of current pipeline implementation - enable use of package
 processof and centroid calculator in different use cases more flexible -
 enhance performance of centroiding by running in chunks of specific size,
 which gives ideal runtime of DBSCAN and Centroiding - Implementation of test
 cases for new CentroidCalculator class and post processing pipeline

---
 pymepix/processing/acquisition.py             |  12 +-
 pymepix/processing/centroiding.py             | 236 -----------
 .../processing/logic/centroid_calculator.py   | 201 ++++++++++
 pymepix/processing/logic/packet_processor.py  | 267 +++++++++++++
 pymepix/processing/logic/processing_step.py   |  18 +
 pymepix/processing/packetprocessor.py         | 368 ------------------
 .../pipeline_centroid_calculator.py           |  49 +++
 .../processing/pipeline_packet_processor.py   |  86 ++++
 pymepix/processing/rawfilesampler.py          | 273 +++++++++++++
 tests/test_centroid_calculator.py             | 177 +++++++++
 tests/test_post_processing.py                 |  42 ++
 11 files changed, 1119 insertions(+), 610 deletions(-)
 delete mode 100644 pymepix/processing/centroiding.py
 create mode 100644 pymepix/processing/logic/centroid_calculator.py
 create mode 100644 pymepix/processing/logic/packet_processor.py
 create mode 100644 pymepix/processing/logic/processing_step.py
 delete mode 100644 pymepix/processing/packetprocessor.py
 create mode 100644 pymepix/processing/pipeline_centroid_calculator.py
 create mode 100644 pymepix/processing/pipeline_packet_processor.py
 create mode 100644 pymepix/processing/rawfilesampler.py
 create mode 100644 tests/test_centroid_calculator.py
 create mode 100644 tests/test_post_processing.py

diff --git a/pymepix/processing/acquisition.py b/pymepix/processing/acquisition.py
index 72dc88f..2dec24a 100644
--- a/pymepix/processing/acquisition.py
+++ b/pymepix/processing/acquisition.py
@@ -21,8 +21,8 @@
 """Module that contains predefined acquisition pipelines for the user to use"""
 
 from .baseacquisition import AcquisitionPipeline
-from .centroiding import Centroiding
-from .packetprocessor import PacketProcessor
+from .pipeline_centroid_calculator import PipelineCentroidCalculator
+from .pipeline_packet_processor import PipelinePacketProcessor
 from .udpsampler import UdpSampler
 
 
@@ -40,7 +40,7 @@ def __init__(self, data_queue, address, longtime, use_event=False, name="Pixel")
         self._event_window = (0, 10000)
 
         self.addStage(0, UdpSampler, address, longtime)
-        self.addStage(2, PacketProcessor, num_processes=2)
+        self.addStage(2, PipelinePacketProcessor, num_processes=2)
         self._reconfigureProcessor()
 
     def _reconfigureProcessor(self):
@@ -50,7 +50,7 @@ def _reconfigureProcessor(self):
             )
         )
         self.getStage(2).configureStage(
-            PacketProcessor,
+            PipelinePacketProcessor,
             handle_events=self._use_events,
             event_window=self._event_window,
         )
@@ -138,14 +138,14 @@ def __init__(self, data_queue, address, longtime):
         self._samples = 5
         self._epsilon = 2.0
 
-        self.addStage(4, Centroiding, num_processes=25)
+        self.addStage(4, PipelineCentroidCalculator, num_processes=25)
 
         self._reconfigureCentroid()
 
     def _reconfigureCentroid(self):
         self._reconfigureProcessor()
         p = self.getStage(4).configureStage(
-            Centroiding,
+            PipelineCentroidCalculator,
             skip_data=self._skip_centroid,
             tot_filter=self._tot_threshold,
             epsilon=self._epsilon,
diff --git a/pymepix/processing/centroiding.py b/pymepix/processing/centroiding.py
deleted file mode 100644
index 33d8e38..0000000
--- a/pymepix/processing/centroiding.py
+++ /dev/null
@@ -1,236 +0,0 @@
-# This file is part of Pymepix
-#
-# In all scientific work using Pymepix, please reference it as
-#
-# A. F. Al-Refaie, M. Johny, J. Correa, D. Pennicard, P. Svihra, A. Nomerotski, S. Trippel, and J. Küpper:
-# "PymePix: a python library for SPIDR readout of Timepix3", J. Inst. 14, P10003 (2019)
-# https://doi.org/10.1088/1748-0221/14/10/P10003
-# https://arxiv.org/abs/1905.07999
-#
-# Pymepix is free software: you can redistribute it and/or modify it under the terms of the GNU
-# General Public License as published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
-# even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along with this program. If not,
-# see <https://www.gnu.org/licenses/>.
-
-"""Processors relating to centroiding"""
-
-import time
-from multiprocessing.sharedctypes import Value
-
-import numpy as np
-import scipy.ndimage as nd
-from sklearn.cluster import DBSCAN
-
-from .basepipeline import BasePipelineObject
-from .datatypes import MessageType
-
-
-class Centroiding(BasePipelineObject):
-    """Performs centroiding on EventData recieved from Packet processor"""
-
-    tof_scale = 1e7
-
-    def __init__(
-        self,
-        skip_data=1,
-        tot_filter=0,
-        epsilon=2.0,
-        samples=5,
-        input_queue=None,
-        create_output=True,
-        num_outputs=1,
-        shared_output=None,
-    ):
-        BasePipelineObject.__init__(
-            self,
-            Centroiding.__name__,
-            input_queue=input_queue,
-            create_output=create_output,
-            num_outputs=num_outputs,
-            shared_output=shared_output,
-        )
-
-        self._centroid_count = 0
-        self._search_time = 0.0
-        self._blob_time = 0.0
-        self._skip_data = Value("I", skip_data)
-        self._tot_threshold = Value("I", tot_filter)
-        self._epsilon = Value("d", epsilon)
-        self._min_samples = Value("I", samples)
-
-    @property
-    def centroidSkip(self):
-        """Sets whether to process every nth pixel packet.
-
-        For example, setting it to 2 means every second packet is processed. 1 means all pixel packets are processed.
-
-        """
-        return self._skip_data.value
-
-    @centroidSkip.setter
-    def centroidSkip(self, value):
-        value = max(1, value)
-        self._skip_data.value = value
-
-    @property
-    def totThreshold(self):
-        return self._tot_threshold.value
-
-    @totThreshold.setter
-    def totThreshold(self, value):
-        value = max(0, value)
-        self._tot_threshold.value = value
-
-    @property
-    def epsilon(self):
-        """Sets whether to process every nth pixel packet.
-
-        For example, setting it to 2 means every second packet is processed. 1 means all pixel packets are processed.
-
-        """
-        return self._epsilon.value
-
-    @epsilon.setter
-    def epsilon(self, value):
-        # value = max(1,value)
-        self.info("Epsilon set to {}".format(value))
-        self._epsilon.value = value
-
-    @property
-    def samples(self):
-        return self._min_samples.value
-
-    @samples.setter
-    def samples(self, value):
-        self._min_samples.value = value
-
-    def process(self, data_type, data):
-        if data_type != MessageType.EventData:
-            return None, None
-        shot, x, y, tof, tot = data
-
-        res = self.process_centroid(shot, x, y, tof, tot)
-        if res is not None:
-            self.pushOutput(res[0], res[1])
-
-        return None, None
-
-    def process_centroid(self, shot, x, y, tof, tot):
-        tot_filter = tot > self.totThreshold
-        # Filter out pixels
-        shot = shot[tot_filter]
-        x = x[tot_filter]
-        y = y[tot_filter]
-        tof = tof[tot_filter]
-        tot = tot[tot_filter]
-
-        start = time.time()
-        labels = self.find_cluster(
-            shot, x, y, tof, epsilon=self.epsilon, min_samples=self.samples
-        )
-        self._search_time = time.time() - start
-        label_filter = labels != 0
-
-        if labels is None:
-            return None, None
-
-        # print(labels[label_filter ].size)
-        if labels[label_filter].size == 0:
-            return None, None
-        start = time.time()
-        props = self.cluster_properties(
-            shot[label_filter],
-            x[label_filter],
-            y[label_filter],
-            tof[label_filter],
-            tot[label_filter],
-            labels[label_filter],
-        )
-
-        self._blob_time = time.time() - start
-        return MessageType.CentroidData, props
-
-    def find_cluster(self, shot, x, y, tof, epsilon=2, min_samples=2):
-
-        if shot.size == 0:
-            return None
-
-        X = np.vstack((shot * epsilon * 1000, x, y, tof * self.tof_scale)).transpose()
-        dist = DBSCAN(
-            eps=epsilon, min_samples=min_samples, metric="euclidean", n_jobs=1
-        ).fit(X)
-
-        return dist.labels_ + 1
-
-    def cluster_properties(self, shot, x, y, tof, tot, labels):
-        label_index = np.unique(labels)
-        tot_max = np.array(
-            nd.maximum_position(tot, labels=labels, index=label_index)
-        ).flatten()
-
-        tot_sum = nd.sum(tot, labels=labels, index=label_index)
-        cluster_x = np.array(
-            nd.sum(x * tot, labels=labels, index=label_index) / tot_sum
-        ).flatten()
-        cluster_y = np.array(
-            nd.sum(y * tot, labels=labels, index=label_index) / tot_sum
-        ).flatten()
-        cluster_tof = np.array(
-            nd.sum(tof * tot, labels=labels, index=label_index) / tot_sum
-        ).flatten()
-        cluster_tot = tot[tot_max]
-        # cluster_tof = tof[tot_max]
-        cluster_shot = shot[tot_max]
-
-        return cluster_shot, cluster_x, cluster_y, cluster_tof, cluster_tot
-
-    # def cluster_properties(self,shot,x,y,tof,tot,labels):
-    #     label_iter = np.unique(labels)
-    #     total_objects = label_iter.size
-
-    #     valid_objects = 0
-    #     #Prepare our output
-    #     cluster_shot = np.ndarray(shape=(total_objects,),dtype=np.int)
-    #     cluster_x = np.ndarray(shape=(total_objects,),dtype=np.float64)
-    #     cluster_y = np.ndarray(shape=(total_objects,),dtype=np.float64)
-    #     cluster_eig = np.ndarray(shape=(total_objects,2,),dtype=np.float64)
-    #     cluster_area = np.ndarray(shape=(total_objects,),dtype=np.float64)
-    #     cluster_integral = np.ndarray(shape=(total_objects,),dtype=np.float64)
-    #     cluster_tof = np.ndarray(shape=(total_objects,),dtype=np.float64)
-
-    #     for idx in range(total_objects):
-
-    #         obj_slice = (labels == label_iter[idx])
-    #         obj_shot = shot[obj_slice]
-    #         #print(obj_shot.size)
-    #         obj_x = x[obj_slice]
-    #         obj_y = y[obj_slice]
-
-    #         obj_tot = tot[obj_slice]
-    #         max_tot = np.argmax(obj_tot)
-
-    #         moments = self.moments_com(obj_x,obj_y,obj_tot)
-    #         if moments is None:
-    #             continue
-
-    #         x_bar,y_bar,area,integral,evals,evecs = moments
-    #         obj_tof = tof[obj_slice]
-    #         max_tot = np.argmax(obj_tot)
-
-    #         cluster_tof[valid_objects] = obj_tof[max_tot]
-    #         cluster_x[valid_objects] = x_bar
-    #         cluster_y[valid_objects] = y_bar
-    #         cluster_area[valid_objects] = area
-    #         cluster_integral[valid_objects] = integral
-    #         cluster_eig[valid_objects]=evals
-    #         cluster_shot[valid_objects] = obj_shot[0]
-    #         valid_objects+=1
-    #     return cluster_shot[:valid_objects],cluster_x[:valid_objects], \
-    #             cluster_y[:valid_objects],cluster_area[:valid_objects], \
-    #             cluster_integral[:valid_objects],cluster_eig[:valid_objects],cluster_eig[:valid_objects,:],cluster_tof[:valid_objects]
diff --git a/pymepix/processing/logic/centroid_calculator.py b/pymepix/processing/logic/centroid_calculator.py
new file mode 100644
index 0000000..fa4fdce
--- /dev/null
+++ b/pymepix/processing/logic/centroid_calculator.py
@@ -0,0 +1,201 @@
+import multiprocessing as mp
+
+import numpy as np
+import scipy.ndimage as nd
+from sklearn.cluster import DBSCAN
+
+from pymepix.processing.logic.processing_step import ProcessingStep
+
+class CentroidCalculator(ProcessingStep):
+
+    def __init__(self, tot_threshold=0, epsilon=2, min_samples=5, chunk_size_limit=6_500, 
+        cent_timewalk_lut=None):
+
+        super().__init__("CentroidCalculator")
+        self._epsilon = epsilon
+        self._min_samples = min_samples
+        self._tof_scale = 1e7
+        self._tot_threshold = tot_threshold
+        self._cent_timewalk_lut = cent_timewalk_lut
+        self._chunk_size_limit = chunk_size_limit
+
+        self.removed_by_dbscan = 0
+
+    def process(self, data):
+        if data is not None:
+            shot, x, y, tof, tot = data
+
+            chunks = self.__divide_into_chunks(shot, x, y, tof, tot)
+            # chunks = [data]
+            centroids_in_chunks = self.perform_centroiding(chunks)
+
+            return self.__centroid_chunks_to_centroids(centroids_in_chunks)
+        else:
+            return None
+
+    def debug_condition(self, chunks, size):
+        sum = 0
+        found_triggers = []
+        for chunk in chunks:
+            sum += chunk[0].shape[0]
+            found_triggers += np.unique(chunk[0]).tolist()
+        return sum != size or np.all(np.unique(found_triggers, return_counts=True)[1] > 1)
+
+    def __divide_into_chunks(self, shot, x, y, tof, tot):
+        # Reordering the voxels can have an impact on the clusterings result. See CentroidCalculator.perform_clustering string doc for further information!
+        order = shot.argsort()
+        shot, x, y, tof, tot = shot[order], x[order], y[order], tof[order], tot[order]
+        split_indices = self.__calc_trig_chunks_split_indices(shot)
+        if len(split_indices) > 0:
+
+            shot, x, y, tof, tot = [np.split(arr, split_indices) for arr in [shot, x, y, tof, tot]]
+
+            chunks = []
+            for i in range(len(shot)):
+                chunks.append((shot[i], x[i], y[i], tof[i], tot[i]))
+            return chunks
+        else:
+            return [(shot, x, y , tof, tot)]
+        
+    def __calc_trig_chunks_split_indices(self, shot):
+        _, unique_trig_nr_indices, unique_trig_nr_counts = np.unique(shot, return_index=True, return_counts=True)
+        
+        trigger_chunks = []
+        trigger_chunk_voxel_counter = 0
+        for index, unique_trig_nr_index in enumerate(unique_trig_nr_indices):
+            if trigger_chunk_voxel_counter < self._chunk_size_limit:
+                trigger_chunk_voxel_counter += unique_trig_nr_counts[index]
+            else:
+                trigger_chunks.append(unique_trig_nr_index)
+                trigger_chunk_voxel_counter = unique_trig_nr_counts[index]
+
+        return trigger_chunks
+
+    def __centroid_chunks_to_centroids(self, chunks):
+        centroids = [[] for i in range(7)]
+        for chunk in chunks:
+            for index, coordinate in enumerate(chunk):
+                centroids[index].append(coordinate)
+
+        return [np.concatenate(coordinate) for coordinate in centroids]
+
+    def perform_centroiding(self, chunks):
+        return map(self.calculate_centroids, chunks)
+
+    def calculate_centroids(self, chunk):
+        shot, x, y, tof, tot = chunk
+
+        tot_filter = tot > self._tot_threshold
+        # Filter out pixels
+        shot = shot[tot_filter]
+        x = x[tot_filter]
+        y = y[tot_filter]
+        tof = tof[tot_filter]
+        tot = tot[tot_filter]
+
+        labels = self.perform_clustering(shot, x, y, tof)
+
+        label_filter = labels != 0
+
+        self.removed_by_dbscan += np.where(label_filter == False)[0].shape[0]
+
+        if labels is not None and labels[label_filter].size > 0:
+            return self.calculate_centroids_properties(
+                shot[label_filter],
+                x[label_filter],
+                y[label_filter],
+                tof[label_filter],
+                tot[label_filter],
+                labels[label_filter],
+            )
+
+        return None
+        
+    def perform_clustering(self, shot, x, y, tof):
+        """ The clustering with DBSCAN, which is performed in this function is dependent on the order of the data in rare cases. Therefore reordering in any means can
+            lead to slightly changed results, which should not be an issue.
+
+            Martin Ester, Hans-Peter Kriegel, Jiirg Sander, Xiaowei Xu: A Density Based Algorith for Discovering Clusters [p. 229-230] (https://www.aaai.org/Papers/KDD/1996/KDD96-037.pdf)
+            A more specific explaination can be found here: https://stats.stackexchange.com/questions/306829/why-is-dbscan-deterministic"""
+        if x.size >= 0:
+            X = np.column_stack((shot * self._epsilon * 1_000, x, y, tof * self._tof_scale))
+
+            dist = DBSCAN(
+                eps=self._epsilon, min_samples=self._min_samples, metric="euclidean", n_jobs=1
+            ).fit(X)
+
+            return dist.labels_ + 1
+        
+        return None
+
+    def calculate_centroids_properties(self, shot, x, y, tof, tot, labels):
+        """
+        Calculates the properties of the centroids from labeled data points.
+
+        ATTENTION! The order of the points can have an impact on the result due to errors in 
+        the floating point arithmetics. 
+
+        Very simple example: 
+        arr = np.random.random(100)
+        arr.sum() - np.sort(arr).sum()
+        This example shows that there is a very small difference between the two sums. The inaccuracy of 
+        floating point arithmetics can depend on the order of the values. Strongly simplified (3.2 + 3.4) + 2.7 
+        and 3.2 + (3.4 + 2.7) can be unequal for floating point numbers. 
+
+        Therefore there is no guarantee for strictly equal results. Even after sorting. The error we observed
+        can be about 10^-22 nano seconds. 
+
+        Currently this is issue exists only for the TOF-column as the other columns are integer-based values.
+        """
+        label_index, cluster_size = np.unique(labels, return_counts=True)
+        tot_max = np.array(
+            nd.maximum_position(tot, labels=labels, index=label_index)
+        ).flatten()
+
+        tot_sum = nd.sum(tot, labels=labels, index=label_index)
+        tot_mean = nd.mean(tot, labels=labels, index=label_index)
+        cluster_x = np.array(
+            nd.sum(x * tot, labels=labels, index=label_index) / tot_sum
+        ).flatten()
+        cluster_y = np.array(
+            nd.sum(y * tot, labels=labels, index=label_index) / tot_sum
+        ).flatten()
+        cluster_tof = np.array(
+            nd.sum(tof * tot, labels=labels, index=label_index) / tot_sum
+        ).flatten()
+        cluster_totMax = tot[tot_max]
+        cluster_totAvg = tot_mean
+        cluster_shot = shot[tot_max]
+
+        if self._cent_timewalk_lut is not None:
+            # cluster_tof -= self._timewalk_lut[(cluster_tot / 25).astype(np.int) - 1]
+            # cluster_tof *= 1e6
+            cluster_tof -= self._cent_timewalk_lut[np.int(cluster_totMax // 25) - 1] * 1e3
+            # TODO: should totAvg not also be timewalk corrected?!
+            # cluster_tof *= 1e-6
+
+        return cluster_shot, cluster_x, cluster_y, cluster_tof, cluster_totAvg, cluster_totMax, cluster_size
+
+
+class CentroidCalculatorPooled(CentroidCalculator):
+
+    def __init__(self, number_of_processes=4, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self._number_of_processes = number_of_processes
+        
+    def perform_centroiding(self, chunks):
+        return self._pool.map(self.calculate_centroids, chunks)
+        
+    def pre_process(self):
+        self._pool = mp.Pool(self._number_of_processes)
+        return super().pre_process()
+
+    def post_process(self):
+        self._pool.close()
+        self._pool.join()
+        return super().post_process()
+
+    def __getstate__(self):
+        self_dict = self.__dict__.copy()
+        del self_dict['_pool']
+        return self_dict
\ No newline at end of file
diff --git a/pymepix/processing/logic/packet_processor.py b/pymepix/processing/logic/packet_processor.py
new file mode 100644
index 0000000..30ff72b
--- /dev/null
+++ b/pymepix/processing/logic/packet_processor.py
@@ -0,0 +1,267 @@
+from enum import IntEnum
+
+import numpy as np
+from pymepix.core.log import Logger
+
+from pymepix.processing.logic.processing_step import ProcessingStep
+
+
+class PixelOrientation(IntEnum):
+    """Defines how row and col are intepreted in the output"""
+
+    Up = 0
+    """Up is the default, x=column,y=row"""
+    Left = 1
+    """x=row, y=-column"""
+    Down = 2
+    """x=-column, y = -row """
+    Right = 3
+    """x=-row, y=column"""
+
+
+class PacketProcessor(ProcessingStep):
+    def __init__(self, handle_events=True, event_window=(0.0, 10000.0), position_offset=(0, 0), 
+                orientation=PixelOrientation.Up, start_time=0, timewalk_lut=None):
+    
+        super().__init__("PacketProcessor")
+        self._handle_events = handle_events
+        self._min_window, self._max_window = event_window
+        self._orientation = orientation
+        self._x_offset, self._y_offset = position_offset
+        self._start_time =  start_time
+        self._timewalk_lut = timewalk_lut
+
+        self._trigger_counter = 0
+
+        self.clearBuffers()
+
+    def process(self, data):
+        packet_view = memoryview(data)
+        packet = np.frombuffer(packet_view[:-8], dtype=np.uint64)
+        # needs to be an integer or "(ltime >> 28) & 0x3" fails
+        longtime = int(np.frombuffer(packet_view[-8:], dtype=np.uint64)[0])
+
+        if len(packet) > 0:
+
+            header = ((packet & 0xF000000000000000) >> 60) & 0xF
+            subheader = ((packet & 0x0F00000000000000) >> 56) & 0xF
+
+            pixels = packet[np.logical_or(header == 0xA, header == 0xB)]
+            triggers = packet[
+                np.logical_and(
+                    np.logical_or(header == 0x4, header == 0x6), subheader == 0xF
+                )
+            ]
+
+            if pixels.size > 0:
+                self.process_pixels(np.int64(pixels), longtime)
+
+                if triggers.size > 0:
+                    self.process_triggers(np.int64(triggers), longtime)
+
+                if self._handle_events:
+                    return self.find_events_fast()
+
+        return None
+
+    def post_process(self):
+        return self.find_events_fast_post()
+
+    def updateBuffers(self, val_filter):
+        self._x = self._x[val_filter]
+        self._y = self._y[val_filter]
+        self._toa = self._toa[val_filter]
+        self._tot = self._tot[val_filter]
+
+    def getBuffers(self, val_filter=None):
+        if val_filter is None:
+            return (
+                np.copy(self._x),
+                np.copy(self._y),
+                np.copy(self._toa),
+                np.copy(self._tot),
+            )
+        else:
+            return (
+                np.copy(self._x[val_filter]),
+                np.copy(self._y[val_filter]),
+                np.copy(self._toa[val_filter]),
+                np.copy(self._tot[val_filter]),
+            )
+
+    def clearBuffers(self):
+        self._x = None
+        self._y = None
+        self._tot = None
+        self._toa = None
+        self._triggers = None
+
+    def process_triggers(self, pixdata, longtime):
+        coarsetime = pixdata >> 12 & 0xFFFFFFFF
+        coarsetime = self.correct_global_time(coarsetime, longtime)
+        tmpfine = (pixdata >> 5) & 0xF
+        tmpfine = ((tmpfine - 1) << 9) // 12
+        trigtime_fine = (pixdata & 0x0000000000000E00) | (tmpfine & 0x00000000000001FF)
+        time_unit = 25.0 / 4096
+        tdc_time = coarsetime * 25e-9 + trigtime_fine * time_unit * 1e-9
+
+        m_trigTime = tdc_time
+
+        if self._handle_events:
+            if self._triggers is None:
+                self._triggers = m_trigTime
+            else:
+                self._triggers = np.append(self._triggers, m_trigTime)
+
+    def orientPixels(self, col, row):
+        """ Orient the pixels based on Timepix orientation """
+        if self._orientation is PixelOrientation.Up:
+            return col, row
+        elif self._orientation is PixelOrientation.Left:
+            return row, 255 - col
+        elif self._orientation is PixelOrientation.Down:
+            return 255 - col, 255 - row
+        elif self._orientation is PixelOrientation.Right:
+            return 255 - row, col
+
+    def process_pixels(self, pixdata, longtime):
+
+        dcol = (pixdata & 0x0FE0000000000000) >> 52
+        spix = (pixdata & 0x001F800000000000) >> 45
+        pix = (pixdata & 0x0000700000000000) >> 44
+        col = dcol + pix // 4
+        row = spix + (pix & 0x3)
+
+        data = (pixdata & 0x00000FFFFFFF0000) >> 16
+        spidr_time = pixdata & 0x000000000000FFFF
+        ToA = (data & 0x0FFFC000) >> 14
+        FToA = data & 0xF
+        ToT = ((data & 0x00003FF0) >> 4) * 25
+        time_unit = 25.0 / 4096
+
+        ToA_coarse = (
+            self.correct_global_time((spidr_time << 14) | ToA, longtime)
+            & 0xFFFFFFFFFFFF
+        )
+        globalToA = (ToA_coarse << 12) - (FToA << 8)
+        globalToA += ((col // 2) % 16) << 8
+        globalToA[((col // 2) % 16) == 0] += 16 << 8
+        finalToA = globalToA * time_unit * 1e-9
+
+        if self._timewalk_lut is not None:
+            finalToA -= self._timewalk_lut[np.int_(ToT // 25) - 1] * 1e3
+
+        x, y = self.orientPixels(col, row)
+
+        x += self._x_offset
+        y += self._y_offset
+
+        # TODO: don't clatter queue with unnecessary stuff for now
+        # self.pushOutput(MessageType.PixelData, (x, y, finalToA, ToT))
+
+        if self._handle_events:
+            if self._x is None:
+                self._x = x
+                self._y = y
+                self._toa = finalToA
+                self._tot = ToT
+            else:
+                self._x = np.append(self._x, x)
+                self._y = np.append(self._y, y)
+                self._toa = np.append(self._toa, finalToA)
+                self._tot = np.append(self._tot, ToT)
+
+    def correct_global_time(self, arr, ltime):
+        pixelbits = (arr >> 28) & 0x3
+        ltimebits = (ltime >> 28) & 0x3
+        # diff = (ltimebits - pixelbits).astype(np.int64)
+        # neg = (diff == 1) | (diff == -3)
+        # pos = (diff == -1) | (diff == 3)
+        # zero = (diff == 0) | (diff == 2)
+
+        # res = ( (ltime) & 0xFFFFC0000000) | (arr & 0x3FFFFFFF)
+        diff = (ltimebits - pixelbits).astype(np.int64)
+        globaltime = (ltime & 0xFFFFC0000000) | (arr & 0x3FFFFFFF)
+        neg_diff = (diff == 1) | (diff == -3)
+        globaltime[neg_diff] = ((ltime - 0x10000000) & 0xFFFFC0000000) | (
+            arr[neg_diff] & 0x3FFFFFFF
+        )
+        pos_diff = (diff == -1) | (diff == 3)
+        globaltime[pos_diff] = ((ltime + 0x10000000) & 0xFFFFC0000000) | (
+            arr[pos_diff] & 0x3FFFFFFF
+        )
+        # res[neg] =   ( (ltime - 0x10000000) & 0xFFFFC0000000) | (arr[neg] & 0x3FFFFFFF)
+        # res[pos] =   ( (ltime + 0x10000000) & 0xFFFFC0000000) | (arr[pos] & 0x3FFFFFFF)
+        # arr[zero] = ( (ltime) & 0xFFFFC0000000) | (arr[zero] & 0x3FFFFFFF)
+        # arr[zero] =   ( (ltime) & 0xFFFFC0000000) | (arr[zero] & 0x3FFFFFFF)
+
+        return globaltime
+
+    def find_events_fast(self):
+        if self.__exist_enough_triggers():
+            self._triggers = self._triggers[np.argmin(self._triggers) :]
+
+            if self.__toa_is_not_empty():
+                # Get our start/end triggers to bin events accordingly
+                start = self._triggers[0:-1:]
+                if start.size > 0:
+                    trigger_counter = np.arange(
+                        self._trigger_counter, self._trigger_counter + start.size - 1, dtype=int
+                    )
+                    self._trigger_counter = trigger_counter[-1] + 1
+
+                    # end = self._triggers[1:-1:]
+                    # Get the first and last triggers in pile
+                    first_trigger = start[0]
+                    last_trigger = start[-1]
+                    # Delete useless pixels before the first trigger
+                    self.updateBuffers(self._toa >= first_trigger)
+                    # grab only pixels we care about
+                    x, y, toa, tot = self.getBuffers(self._toa < last_trigger)
+                    self.updateBuffers(self._toa >= last_trigger)
+                    try:
+                        event_mapping = np.digitize(toa, start) - 1
+                    except Exception as e:
+                        self.error("Exception has occured {} due to ", str(e))
+                        self.error("Writing output TOA {}".format(toa))
+                        self.error("Writing triggers {}".format(start))
+                        self.error("Flushing triggers!!!")
+                        self._triggers = self._triggers[-2:]
+                        return None
+                    self._triggers = self._triggers[-2:]
+
+                    tof = toa - start[event_mapping]
+                    event_number = trigger_counter[event_mapping]
+
+                    exp_filter = (tof >= self._min_window) & (tof <= self._max_window)
+
+                    result = (
+                        event_number[exp_filter],
+                        x[exp_filter],
+                        y[exp_filter],
+                        tof[exp_filter],
+                        tot[exp_filter],
+                    )
+
+                    if result[0].size > 0:
+                        timeStamps = np.uint64(
+                            start[np.unique(event_mapping)] * 1e9 + self._start_time
+                        )  # timestamp in ns for trigger event
+                        return result, (np.unique(result[0]), timeStamps)
+
+        return None # Clear out the triggers since they have nothing
+
+    def __exist_enough_triggers(self):
+        return self._triggers is not None and self._triggers.size >= 4
+
+    def __toa_is_not_empty(self):
+        return self._toa is not None and self._toa.size > 0
+
+    def find_events_fast_post(self):
+        """Call this function at the very end of to also have the last two trigger events processed"""
+        # add an imaginary last trigger event after last pixel event for np.digitize to work
+        if self._toa is not None:
+            self._triggers = np.concatenate(
+                (self._triggers, np.array([self._toa.max() + 1, self._toa.max() + 2]))
+            )
+        return self.find_events_fast()
\ No newline at end of file
diff --git a/pymepix/processing/logic/processing_step.py b/pymepix/processing/logic/processing_step.py
new file mode 100644
index 0000000..10414c1
--- /dev/null
+++ b/pymepix/processing/logic/processing_step.py
@@ -0,0 +1,18 @@
+from abc import abstractmethod, ABC
+
+from pymepix.core.log import Logger
+
+class ProcessingStep(Logger, ABC):
+
+    def __init__(self, name):
+        super().__init__(name)
+
+    def pre_process(self):
+        pass
+
+    def post_process(self):
+        pass
+
+    @abstractmethod
+    def process(self, data):
+        pass
\ No newline at end of file
diff --git a/pymepix/processing/packetprocessor.py b/pymepix/processing/packetprocessor.py
deleted file mode 100644
index 2dae1af..0000000
--- a/pymepix/processing/packetprocessor.py
+++ /dev/null
@@ -1,368 +0,0 @@
-# This file is part of Pymepix
-#
-# In all scientific work using Pymepix, please reference it as
-#
-# A. F. Al-Refaie, M. Johny, J. Correa, D. Pennicard, P. Svihra, A. Nomerotski, S. Trippel, and J. Küpper:
-# "PymePix: a python library for SPIDR readout of Timepix3", J. Inst. 14, P10003 (2019)
-# https://doi.org/10.1088/1748-0221/14/10/P10003
-# https://arxiv.org/abs/1905.07999
-#
-# Pymepix is free software: you can redistribute it and/or modify it under the terms of the GNU
-# General Public License as published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
-# even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along with this program. If not,
-# see <https://www.gnu.org/licenses/>.
-
-from enum import IntEnum
-from multiprocessing.sharedctypes import Value
-
-import numpy as np
-import zmq
-
-from .basepipeline import BasePipelineObject
-from .datatypes import MessageType
-
-
-class PixelOrientation(IntEnum):
-    """Defines how row and col are intepreted in the output"""
-
-    Up = 0
-    """Up is the default, x=column,y=row"""
-    Left = 1
-    """x=row, y=-column"""
-    Down = 2
-    """x=-column, y = -row """
-    Right = 3
-    """x=-row, y=column"""
-
-
-class PacketProcessor(BasePipelineObject):
-    """Processes Pixel packets for ToA, ToT,triggers and events
-
-    This class, creates a UDP socket connection to SPIDR and recivies the UDP packets from Timepix
-    It then pre-processes them and sends them off for more processing
-    """
-
-    def __init__(
-        self,
-        handle_events=False,
-        event_window=(0.0, 10000.0),
-        position_offset=(0, 0),
-        orientation=PixelOrientation.Up,
-        input_queue=None,
-        create_output=True,
-        num_outputs=1,
-        shared_output=None,
-    ):
-        # set input_queue to None for now, or baseaqusition.build would have to be modified
-        # input_queue is replace by zmq
-        BasePipelineObject.__init__(
-            self,
-            PacketProcessor.__name__,
-            input_queue=None,
-            create_output=create_output,
-            num_outputs=num_outputs,
-            shared_output=shared_output,
-        )
-
-        self.clearBuffers()
-        self._orientation = orientation
-        self._x_offset, self._y_offset = position_offset
-
-        self._trigger_counter = 0
-
-        self._handle_events = handle_events
-        min_window = event_window[0]
-        max_window = event_window[1]
-        self._min_event_window = Value("d", min_window)
-        self._max_event_window = Value("d", max_window)
-
-    def updateBuffers(self, val_filter):
-        self._x = self._x[val_filter]
-        self._y = self._y[val_filter]
-        self._toa = self._toa[val_filter]
-        self._tot = self._tot[val_filter]
-
-    def getBuffers(self, val_filter=None):
-        if val_filter is None:
-            return (
-                np.copy(self._x),
-                np.copy(self._y),
-                np.copy(self._toa),
-                np.copy(self._tot),
-            )
-        else:
-            return (
-                np.copy(self._x[val_filter]),
-                np.copy(self._y[val_filter]),
-                np.copy(self._toa[val_filter]),
-                np.copy(self._tot[val_filter]),
-            )
-
-    def clearBuffers(self):
-        self._x = None
-        self._y = None
-        self._tot = None
-        self._toa = None
-        self._triggers = None
-
-    @property
-    def minWindow(self):
-        return self._min_event_window.value
-
-    @minWindow.setter
-    def minWindow(self, value):
-        self._min_event_window.value = value
-
-    @property
-    def maxWindow(self):
-        return self._max_event_window.value
-
-    @maxWindow.setter
-    def maxWindow(self, value):
-        self._max_event_window.value = value
-
-    @property
-    def _eventWindow(self):
-        return self._min_event_window.value, self._max_event_window.value
-
-    def init_new_process(self):
-        """create connections and initialize variables in new process"""
-        self.debug("create ZMQ socket")
-        ctx = zmq.Context.instance()
-        self._packet_sock = ctx.socket(zmq.PULL)
-        self._packet_sock.connect("ipc:///tmp/packetProcessor")
-
-    def pre_run(self):
-        self.info("Running with triggers? {}".format(self._handle_events))
-        self.init_new_process()
-
-    def post_run(self):
-        self._packet_sock.close()
-        return None, None
-
-    def process(self, data_type=None, data=None):
-        packet_view = memoryview(self._packet_sock.recv(copy=False))
-        packet = np.frombuffer(packet_view[:-8], dtype=np.uint64)
-        # needs to be an integer or "(ltime >> 28) & 0x3" fails
-        longtime = int(np.frombuffer(packet_view[-8:], dtype=np.uint64)[0])
-
-        if len(packet) == 0:
-            return None, None
-
-        # packets, longtime = data
-        # packet = packets
-
-        header = ((packet & 0xF000000000000000) >> 60) & 0xF
-        subheader = ((packet & 0x0F00000000000000) >> 56) & 0xF
-
-        pixels = packet[np.logical_or(header == 0xA, header == 0xB)]
-        triggers = packet[
-            np.logical_and(
-                np.logical_or(header == 0x4, header == 0x6), subheader == 0xF
-            )
-        ]
-
-        if pixels.size > 0:
-            self.process_pixels(np.int64(pixels), longtime)
-
-        if triggers.size > 0:
-            # print('triggers', triggers, longtime)
-            self.process_triggers(np.int64(triggers), longtime)
-
-        if self._handle_events:
-
-            events = self.find_events_fast()
-
-            if events is not None:
-                return MessageType.EventData, events
-
-        return None, None
-
-    def filterBadTriggers(self):
-        self._triggers = self._triggers[np.argmin(self._triggers) :]
-
-    def find_events_fast_post(self):
-        """Call this function at the very end of to also have the last two trigger events processed"""
-        # add an imaginary last trigger event after last pixel event for np.digitize to work
-        self._triggers = np.concatenate(
-            (self._triggers, np.array([self._toa.max() + 1, self._toa.max() + 2]))
-        )
-        return self.find_events_fast()
-
-    def find_events_fast(self):
-        if self._triggers is None:
-            return None
-        if self._triggers.size < 4:
-            return None
-        self._triggers = self._triggers[np.argmin(self._triggers) :]
-        if self._toa is None:
-            return None
-        if self._toa.size == 0:
-            # Clear out the triggers since they have nothing
-            return None
-
-        # Get our start/end triggers to bin events accordingly
-        start = self._triggers[0:-1:]
-        if start.size == 0:
-            return None
-
-        min_window, max_window = self._eventWindow
-
-        trigger_counter = np.arange(
-            self._trigger_counter, self._trigger_counter + start.size - 1, dtype=np.int
-        )
-        self._trigger_counter = trigger_counter[-1] + 1
-
-        # end = self._triggers[1:-1:]
-        # Get the first and last triggers in pile
-        first_trigger = start[0]
-        last_trigger = start[-1]
-        # Delete useless pixels before the first trigger
-        self.updateBuffers(self._toa >= first_trigger)
-        # grab only pixels we care about
-        x, y, toa, tot = self.getBuffers(self._toa < last_trigger)
-        self.updateBuffers(self._toa >= last_trigger)
-        try:
-            event_mapping = np.digitize(toa, start) - 1
-        except Exception as e:
-            self.error("Exception has occured {} due to ", str(e))
-            self.error("Writing output TOA {}".format(toa))
-            self.error("Writing triggers {}".format(start))
-            self.error("Flushing triggers!!!")
-            self._triggers = self._triggers[-2:]
-            return None
-        self._triggers = self._triggers[-2:]
-
-        tof = toa - start[event_mapping]
-        event_number = trigger_counter[event_mapping]
-
-        exp_filter = (tof >= min_window) & (tof <= max_window)
-
-        result = (
-            event_number[exp_filter],
-            x[exp_filter],
-            y[exp_filter],
-            tof[exp_filter],
-            tot[exp_filter],
-        )
-
-        if result[0].size > 0:
-            return result
-        else:
-            return None
-
-    def correct_global_time(self, arr, ltime):
-        pixelbits = (arr >> 28) & 0x3
-        ltimebits = (ltime >> 28) & 0x3
-        # diff = (ltimebits - pixelbits).astype(np.int64)
-        # neg = (diff == 1) | (diff == -3)
-        # pos = (diff == -1) | (diff == 3)
-        # zero = (diff == 0) | (diff == 2)
-
-        # res = ( (ltime) & 0xFFFFC0000000) | (arr & 0x3FFFFFFF)
-        diff = (ltimebits - pixelbits).astype(np.int64)
-        globaltime = (ltime & 0xFFFFC0000000) | (arr & 0x3FFFFFFF)
-        neg_diff = (diff == 1) | (diff == -3)
-        globaltime[neg_diff] = ((ltime - 0x10000000) & 0xFFFFC0000000) | (
-            arr[neg_diff] & 0x3FFFFFFF
-        )
-        pos_diff = (diff == -1) | (diff == 3)
-        globaltime[pos_diff] = ((ltime + 0x10000000) & 0xFFFFC0000000) | (
-            arr[pos_diff] & 0x3FFFFFFF
-        )
-        # res[neg] =   ( (ltime - 0x10000000) & 0xFFFFC0000000) | (arr[neg] & 0x3FFFFFFF)
-        # res[pos] =   ( (ltime + 0x10000000) & 0xFFFFC0000000) | (arr[pos] & 0x3FFFFFFF)
-        # arr[zero] = ( (ltime) & 0xFFFFC0000000) | (arr[zero] & 0x3FFFFFFF)
-        # arr[zero] =   ( (ltime) & 0xFFFFC0000000) | (arr[zero] & 0x3FFFFFFF)
-
-        return globaltime
-
-    def process_triggers(self, pixdata, longtime):
-        coarsetime = pixdata >> 12 & 0xFFFFFFFF
-        coarsetime = self.correct_global_time(coarsetime, longtime)
-        tmpfine = (pixdata >> 5) & 0xF
-        tmpfine = ((tmpfine - 1) << 9) // 12
-        trigtime_fine = (pixdata & 0x0000000000000E00) | (tmpfine & 0x00000000000001FF)
-        time_unit = 25.0 / 4096
-        tdc_time = coarsetime * 25e-9 + trigtime_fine * time_unit * 1e-9
-
-        m_trigTime = tdc_time
-
-        # TODO: don't clatter queue with unnecessary stuff for now
-        # self.pushOutput(MessageType.TriggerData, m_trigTime)
-        # print(m_trigTime)
-        if self._handle_events:
-            if self._triggers is None:
-                self._triggers = m_trigTime
-            else:
-                self._triggers = np.append(self._triggers, m_trigTime)
-
-    def orientPixels(self, col, row):
-        if self._orientation is PixelOrientation.Up:
-            return col, row
-        elif self._orientation is PixelOrientation.Left:
-            return row, 255 - col
-        elif self._orientation is PixelOrientation.Down:
-            return 255 - col, 255 - row
-        elif self._orientation is PixelOrientation.Right:
-            return 255 - row, col
-
-    def process_pixels(self, pixdata, longtime):
-
-        dcol = (pixdata & 0x0FE0000000000000) >> 52
-        spix = (pixdata & 0x001F800000000000) >> 45
-        pix = (pixdata & 0x0000700000000000) >> 44
-        col = dcol + pix // 4
-        row = spix + (pix & 0x3)
-
-        data = (pixdata & 0x00000FFFFFFF0000) >> 16
-        spidr_time = pixdata & 0x000000000000FFFF
-        ToA = (data & 0x0FFFC000) >> 14
-        FToA = data & 0xF
-        ToT = ((data & 0x00003FF0) >> 4) * 25
-        time_unit = 25.0 / 4096
-
-        # print('LONGTIME',longtime*25E-9)
-        # print('SpidrTime',(spidr_time << 14)*25E-9)
-        # print('TOA before global',((spidr_time << 14) |ToA)*25*1E-9)
-
-        ToA_coarse = (
-            self.correct_global_time((spidr_time << 14) | ToA, longtime)
-            & 0xFFFFFFFFFFFF
-        )
-        # print('TOA after global',ToA_coarse*25*1E-9,longtime)
-        globalToA = (ToA_coarse << 12) - (FToA << 8)
-        # print('TOA after FTOa',globalToA*time_unit*1E-9)
-        globalToA += ((col // 2) % 16) << 8
-        globalToA[((col // 2) % 16) == 0] += 16 << 8
-        finalToA = globalToA * time_unit * 1e-9
-
-        # print('finalToa',finalToA)
-        # Orient the pixels based on Timepix orientation
-        x, y = self.orientPixels(col, row)
-
-        # #
-        x += self._x_offset
-        y += self._y_offset
-
-        # TODO: don't clatter queue with unnecessary stuff for now
-        # self.pushOutput(MessageType.PixelData, (x, y, finalToA, ToT))
-
-        # print('PIXEL',finalToA,longtime)
-        if self._handle_events:
-            if self._x is None:
-                self._x = x
-                self._y = y
-                self._toa = finalToA
-                self._tot = ToT
-            else:
-                self._x = np.append(self._x, x)
-                self._y = np.append(self._y, y)
-                self._toa = np.append(self._toa, finalToA)
-                self._tot = np.append(self._tot, ToT)
diff --git a/pymepix/processing/pipeline_centroid_calculator.py b/pymepix/processing/pipeline_centroid_calculator.py
new file mode 100644
index 0000000..c57da4d
--- /dev/null
+++ b/pymepix/processing/pipeline_centroid_calculator.py
@@ -0,0 +1,49 @@
+# This file is part of Pymepix
+#
+# In all scientific work using Pymepix, please reference it as
+#
+# A. F. Al-Refaie, M. Johny, J. Correa, D. Pennicard, P. Svihra, A. Nomerotski, S. Trippel, and J. Küpper:
+# "PymePix: a python library for SPIDR readout of Timepix3", J. Inst. 14, P10003 (2019)
+# https://doi.org/10.1088/1748-0221/14/10/P10003
+# https://arxiv.org/abs/1905.07999
+#
+# Pymepix is free software: you can redistribute it and/or modify it under the terms of the GNU
+# General Public License as published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with this program. If not,
+# see <https://www.gnu.org/licenses/>.
+
+"""Processors relating to centroiding"""
+from pymepix.processing.logic.centroid_calculator import CentroidCalculator
+
+from .basepipeline import BasePipelineObject
+
+
+class PipelineCentroidCalculator(BasePipelineObject):
+    """Performs centroiding on EventData recieved from Packet processor"""
+
+    def __init__(
+        self,
+        centroid_calculator: CentroidCalculator = CentroidCalculator(),
+        input_queue=None,
+        create_output=True,
+        num_outputs=1,
+        shared_output=None,
+    ):
+        super().__init__(
+            self,
+            PipelineCentroidCalculator.__name__,
+            input_queue=input_queue,
+            create_output=create_output,
+            num_outputs=num_outputs,
+            shared_output=shared_output,
+        )
+        self.centroid_calculator = centroid_calculator
+
+    def process(self, data_type, data):
+        return self.centroid_calculator.process((data_type, data))
\ No newline at end of file
diff --git a/pymepix/processing/pipeline_packet_processor.py b/pymepix/processing/pipeline_packet_processor.py
new file mode 100644
index 0000000..2a79d00
--- /dev/null
+++ b/pymepix/processing/pipeline_packet_processor.py
@@ -0,0 +1,86 @@
+# This file is part of Pymepix
+#
+# In all scientific work using Pymepix, please reference it as
+#
+# A. F. Al-Refaie, M. Johny, J. Correa, D. Pennicard, P. Svihra, A. Nomerotski, S. Trippel, and J. Küpper:
+# "PymePix: a python library for SPIDR readout of Timepix3", J. Inst. 14, P10003 (2019)
+# https://doi.org/10.1088/1748-0221/14/10/P10003
+# https://arxiv.org/abs/1905.07999
+#
+# Pymepix is free software: you can redistribute it and/or modify it under the terms of the GNU
+# General Public License as published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with this program. If not,
+# see <https://www.gnu.org/licenses/>.
+
+from enum import IntEnum
+
+import zmq
+
+from .basepipeline import BasePipelineObject
+from .logic.packet_processor import PacketProcessor
+
+
+class PixelOrientation(IntEnum):
+    """Defines how row and col are intepreted in the output"""
+
+    Up = 0
+    """Up is the default, x=column,y=row"""
+    Left = 1
+    """x=row, y=-column"""
+    Down = 2
+    """x=-column, y = -row """
+    Right = 3
+    """x=-row, y=column"""
+
+
+class PipelinePacketProcessor(BasePipelineObject):
+    """Processes Pixel packets for ToA, ToT,triggers and events
+
+    This class, creates a UDP socket connection to SPIDR and recivies the UDP packets from Timepix
+    It then pre-processes them and sends them off for more processing
+    """
+
+    def __init__(
+        self,
+        packet_processor: PacketProcessor = PacketProcessor(),
+        input_queue=None,
+        create_output=True,
+        num_outputs=1,
+        shared_output=None
+    ):
+        # set input_queue to None for now, or baseaqusition.build would have to be modified
+        # input_queue is replace by zmq
+        super().__init__(
+            self,
+            PipelinePacketProcessor.__name__,
+            input_queue=input_queue,
+            create_output=create_output,
+            num_outputs=num_outputs,
+            shared_output=shared_output,
+        )
+        self.packet_processor = packet_processor
+
+    def init_new_process(self):
+        """create connections and initialize variables in new process"""
+        self.debug("create ZMQ socket")
+        ctx = zmq.Context.instance()
+        self._packet_sock = ctx.socket(zmq.PULL)
+        self._packet_sock.connect("ipc:///tmp/packetProcessor")
+
+    def pre_run(self):
+        self.info("Running with triggers? {}".format(self._handle_events))
+        self.init_new_process()
+        self.packet_processor.pre_execution()
+
+    def post_run(self):
+        self._packet_sock.close()
+        return self.packet_processor.post_execution()
+
+    def process(self, data_type=None, data=None):
+        return self.packet_processor.process(self._packet_sock.recv(copy=False))
\ No newline at end of file
diff --git a/pymepix/processing/rawfilesampler.py b/pymepix/processing/rawfilesampler.py
new file mode 100644
index 0000000..41c645b
--- /dev/null
+++ b/pymepix/processing/rawfilesampler.py
@@ -0,0 +1,273 @@
+# This file is part of Pymepix
+#
+# In all scientific work using Pymepix, please reference it as
+#
+# A. F. Al-Refaie, M. Johny, J. Correa, D. Pennicard, P. Svihra, A. Nomerotski, S. Trippel, and J. Küpper:
+# "PymePix: a python library for SPIDR readout of Timepix3", J. Inst. 14, P10003 (2019)
+# https://doi.org/10.1088/1748-0221/14/10/P10003
+# https://arxiv.org/abs/1905.07999
+#
+# Pymepix is free software: you can redistribute it and/or modify it under the terms of the GNU
+# General Public License as published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with this program. If not,
+# see <https://www.gnu.org/licenses/>.
+import time
+import os
+
+import numpy as np
+import h5py
+from .logic.packet_processor import PacketProcessor
+from .logic.centroid_calculator import CentroidCalculator, CentroidCalculatorPooled
+
+
+class RawFileSampler():
+
+    def __init__(
+        self,
+        file_name,
+        output_file,
+        number_of_processes=None,
+        timewalk_file=None,
+        cent_timewalk_file=None
+    ):
+        self._filename = file_name
+        self._output_file = output_file
+        self.timewalk_file = timewalk_file
+        self.cent_timewalk_file = cent_timewalk_file
+
+        self._number_of_processes = number_of_processes
+
+    def init_new_process(self, file, startTime=0):
+        """create connections and initialize variables in new process"""
+
+        self._longtime = -1
+        self._longtime_msb = 0
+        self._longtime_lsb = 0
+        self._packet_buffer = []
+        self._last_longtime = 0
+        self._total_bytes = os.path.getsize(file)
+        self._read_bytes = 0
+        timewalk_lut = None
+        cent_timewalk_lut = None
+        if self.timewalk_file is not None:
+            timewalk_lut = np.load(self.timewalk_file)
+        if self.cent_timewalk_file is not None:
+            cent_timewalk_lut = np.load(self.cent_timewalk_file)
+
+        self.packet_processor = PacketProcessor(start_time=startTime, timewalk_lut=timewalk_lut)
+        self.centroid_calculator = CentroidCalculatorPooled(cent_timewalk_lut=cent_timewalk_lut)
+
+        self._startTime = startTime
+
+    def pre_run(self):
+        """init stuff which should only be available in new process"""
+
+        try:
+            os.remove(self._output_file)
+        except OSError:
+            pass
+
+        self._file = open(self._filename, "rb")
+
+        self.init_new_process(self._filename)
+        self._last_update = time.time()
+
+        self.packet_processor.pre_process()
+        self.centroid_calculator.pre_process()
+
+    def post_run(self):
+        result = self.packet_processor.post_process()
+
+        self._packet_buffer = []
+        if result is not None:
+            self.__calculate_and_save_centroids(*result)
+
+        self.centroid_calculator.post_process()
+        self._file.close()
+
+    def bytes_from_file(self, chunksize=8192):
+        last_progress = 0
+        print("Reading to memory", flush=True)
+        ba = np.fromfile(self._file, dtype="<u8")[:1_000_000]
+        print("Done", flush=True)
+
+        for b in np.nditer(ba):
+            self._read_bytes += 8
+            progress = self._read_bytes * 100.0 / self._total_bytes
+            int_progress = int(progress)
+            if int_progress != 0 and int_progress % 5 == 0 and int_progress != last_progress:
+                print(f"Progress {progress:.1f} %", flush=True)
+                last_progress = int_progress
+            yield b
+
+    def handle_lsb_time(self, pixdata):
+        self._longtime_lsb = (pixdata & 0x0000FFFFFFFF0000) >> 16
+
+    def handle_msb_time(self, pixdata):
+        self._longtime_msb = (pixdata & 0x00000000FFFF0000) << 16
+        tmplongtime = self._longtime_msb | self._longtime_lsb
+        if ((tmplongtime + 0x10000000) < (self._longtime)) and (self._longtime > 0):
+            print(
+                "Large backward time jump {} {} ignoring".format(
+                    self._longtime * 25e-9, tmplongtime * 25e-9
+                )
+            )
+        elif (tmplongtime > (self._longtime + 0x10000000)) and (self._longtime > 0):
+            print(
+                "Large forward time jump {} {}".format(self._longtime * 25e-9, tmplongtime * 25e-9)
+            )
+            self._longtime = (self._longtime_msb - 0x10000000) | self._longtime_lsb
+        else:
+            self._longtime = tmplongtime
+        if self._last_longtime == 0:
+            self._last_longtime = self._longtime
+            return False
+
+        time_diff = (self._longtime - self._last_longtime) * 25e-9
+        # print('msb_time:', time_diff, tmplongtime)
+
+        if (time_diff) > 5.0:
+            self._last_longtime = self._longtime
+            return True
+        else:
+            return False
+
+    def handle_other(self, pixdata):
+        """trash data which arrives before 1st timestamp data (heartbeat)"""
+        if self._longtime == -1:
+            return
+
+        self._packet_buffer.append(pixdata)
+
+    def push_data(self, post=False):
+        result = self.__run_packet_processor(self._packet_buffer)
+
+        self._packet_buffer = []
+        if result is not None:
+            self.__calculate_and_save_centroids(*result)
+
+    def __run_packet_processor(self, packet_buffer):
+        if len(packet_buffer) > 0:
+            packet_buffer.append(np.uint64(self._longtime))
+            return self.packet_processor.process(np.array(packet_buffer, dtype=np.uint64).tobytes())
+
+        return None
+
+    def __calculate_and_save_centroids(self, events, timestamps):
+        centroids = self.centroid_calculator.process(events)
+        self.saveToHDF5(self._output_file, events, centroids, timestamps)
+
+    def saveToHDF5(self, output_file, raw, clusters, timeStamps):
+        if output_file is not None:
+            with h5py.File(output_file, "a") as f:
+                names = ["trigger nr", "x", "y", "tof", "tot avg", "tot max", "clustersize"]
+                ###############
+                # save centroided data
+                if clusters is not None:
+                    if f.keys().__contains__("centroided"):
+                        for i, key in enumerate(names):
+                            dset = f["centroided"][key]
+                            dset.resize(dset.shape[0] + len(clusters[i]), axis=0)
+                            dset[-len(clusters[i]) :] = clusters[i]
+                    else:
+                        grp = f.create_group("centroided")
+                        grp.attrs["description"] = "centroided events"
+                        grp.attrs["nr events"] = 0
+                        for i, key in enumerate(names):
+                            grp.create_dataset(key, data=clusters[i], maxshape=(None,))
+                        f["centroided/tot max"].attrs["unit"] = "s"
+                        f["centroided/tot avg"].attrs["unit"] = "s"
+                        f["centroided/tot max"].attrs[
+                            "description"
+                        ] = "maximum of time above threshold in cluster"
+                        f["centroided/tot avg"].attrs[
+                            "description"
+                        ] = "mean of time above threshold in cluster"
+                        f["centroided/tof"].attrs["unit"] = "s"
+                        f["centroided/x"].attrs["unit"] = "pixel"
+                        f["centroided/y"].attrs["unit"] = "pixel"
+                # out_file.flush()
+
+                ###############
+                # save raw data
+                names = ["trigger nr", "x", "y", "tof", "tot"]
+                if f.keys().__contains__("raw"):
+                    for i, key in enumerate(names):
+                        dset = f["raw"][key]
+                        dset.resize(dset.shape[0] + len(raw[i]), axis=0)
+                        dset[-len(raw[i]) :] = raw[i]
+                else:
+                    grp = f.create_group("raw")
+                    grp.attrs["description"] = "timewalk correted raw events"
+                    grp.attrs["nr events"] = 0
+                    grp.create_dataset("trigger nr", data=raw[0].astype(np.uint64), maxshape=(None,))
+                    grp.create_dataset("x", data=raw[1].astype(np.uint8), maxshape=(None,))
+                    grp.create_dataset("y", data=raw[2].astype(np.uint8), maxshape=(None,))
+                    grp.create_dataset("tof", data=raw[3], maxshape=(None,))
+                    grp.create_dataset("tot", data=raw[4].astype(np.uint32), maxshape=(None,))
+
+                    f["raw/tof"].attrs["unit"] = "s"
+                    f["raw/tot"].attrs["unit"] = "s"
+                    f["raw/x"].attrs["unit"] = "pixel"
+                    f["raw/y"].attrs["unit"] = "pixel"
+
+                ###############
+                # save time stamp data
+                if self._startTime is not None:
+                    names = ["trigger nr", "timestamp"]
+                    if f.keys().__contains__("timing/timepix"):
+                        for i, key in enumerate(names):
+                            dset = f["timing/timepix"][key]
+                            dset.resize(dset.shape[0] + len(timeStamps[i]), axis=0)
+                            dset[-len(timeStamps[i]) :] = timeStamps[i]
+                    else:
+                        grp = f.create_group("timing")
+                        grp.attrs["description"] = "timing information from TimePix and facility"
+                        subgrp = grp.create_group("timepix")
+                        subgrp.attrs["description"] = "timing information from TimePix"
+                        subgrp.attrs["nr events"] = 0
+                        for i, key in enumerate(names):
+                            subgrp.create_dataset(
+                                key, data=timeStamps[i].astype(np.uint64), maxshape=(None,)
+                            )
+                        f["timing/timepix/timestamp"].attrs["unit"] = "ns"
+
+    def run(self):
+        """method which is executed in new process via multiprocessing.Process.start"""
+        self.pre_run()
+
+        for packet in self.bytes_from_file():
+            # if we'd leave this with numpy we had to write
+            # ((packet & 0xF000000000000000) >> np.uint64(60)) & np.uint64(0xF)
+            # see: https://stackoverflow.com/questions/30513741/python-bit-shifting-with-numpy
+            pixdata = int(packet)
+            header = ((pixdata & 0xF000000000000000) >> 60) & 0xF
+            should_push = False
+            # Read Pixel Matrix Sequential (Header=0hA0)
+            # Read Pixel Matrix Data-Driven (Header=0hB0)
+            if header == 0xA or header == 0xB:
+                self.handle_other(pixdata)
+            # 0x4X timer configuration
+            elif header == 0x4 or header == 0x6:
+                subheader = ((pixdata & 0x0F00000000000000) >> 56) & 0xF
+                if subheader == 0xF:
+                    self.handle_other(pixdata)
+                elif subheader == 0x4:
+                    self.handle_lsb_time(pixdata)
+                elif subheader == 0x5:
+                    should_push = self.handle_msb_time(pixdata)
+
+            if should_push:
+                self.push_data()
+
+        if len(self._packet_buffer) > 0:
+            self.push_data()
+        
+        self.post_run()
+        print(f'removed by dbscan: {self.centroid_calculator.removed_by_dbscan}')
\ No newline at end of file
diff --git a/tests/test_centroid_calculator.py b/tests/test_centroid_calculator.py
new file mode 100644
index 0000000..83e5f9a
--- /dev/null
+++ b/tests/test_centroid_calculator.py
@@ -0,0 +1,177 @@
+import numpy as np
+
+from pymepix.processing.logic.centroid_calculator import CentroidCalculator
+
+def test_calculate_centroid_properties_1():
+    centroid_calculator = CentroidCalculator()
+    shot = np.array([1, 1, 1, 1, 1])
+    x = np.array([0, 0, 1, 0, -1])
+    y = np.array([0, 1, 0, -1, 0])
+    tof = np.array([1, 1, 1, 1, 1])
+    tot = np.array([1, 1, 1, 1, 1])
+    label = np.array([0, 0, 0, 0, 0])
+    # cluster_shot, cluster_x, cluster_y, cluster_tof, cluster_totAvg, cluster_totMax, cluster_size
+    expected_result = [1], [0], [0], [1], [1], [1], [5]
+    assertCentroidsEqual(expected_result, centroid_calculator.calculate_centroids_properties(shot, x, y, tof, tot, label))
+
+def test_calculate_centroid_properties_2():
+    centroid_calculator = CentroidCalculator()
+    shot = np.array([1, 1, 1, 1, 1, 2, 2, 2, 2, 2])
+    x = np.array([0, 0, 1, 0, -1, 1, 1, 2, 1, 0])
+    y = np.array([0, 1, 0, -1, 0, 1, 2, 1, 0, 1])
+    tof = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
+    tot = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
+    label = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])
+    # cluster_shot, cluster_x, cluster_y, cluster_tof, cluster_totAvg, cluster_totMax, cluster_size
+    expected_result = [1, 2], [0, 1], [0, 1], [1, 1], [1, 1], [1, 1], [5, 5]
+    assertCentroidsEqual(expected_result, centroid_calculator.calculate_centroids_properties(shot, x, y, tof, tot, label))
+
+def test_calculate_centroid_properties_3():
+    centroid_calculator = CentroidCalculator()
+    shot = np.array([1, 1, 1, 1, 1, 2, 2, 2, 2, 2])
+    x = np.array([0, 0, 1, 0, -1, 1, 1, 2, 1, 0])
+    y = np.array([0, 1, 0, -1, 0, 1, 2, 1, 0, 1])
+    tof = np.array([0, 0, 0, 0, 1, 0, 0, 0, 0, 1])
+    tot = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
+    label = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])
+    # cluster_shot, cluster_x, cluster_y, cluster_tof, cluster_totAvg, cluster_totMax, cluster_size
+    expected_result = [1, 2], [0, 1], [0, 1], [1/5, 1/5], [1, 1], [1, 1], [5, 5]
+    assertCentroidsEqual(expected_result, centroid_calculator.calculate_centroids_properties(shot, x, y, tof, tot, label))
+
+def test_divide_into_chunks_1():
+    centroid_calculator = CentroidCalculator()
+
+    factor = 6_500
+
+    data = [3, 1, 2, 4, 5, 6]
+
+    shot = np.repeat(data, factor)
+    x = np.repeat(data, factor)
+    y = np.repeat(data, factor)
+    tof = np.repeat(data, factor)
+    tot = np.repeat(data, factor)
+
+    chunks = centroid_calculator._CentroidCalculator__divide_into_chunks(shot, x, y, tof, tot)
+
+    for elem in data:
+        for arr in chunks[elem - 1]:
+            assert factor == len(arr)
+
+def test_divide_into_chunks_2():
+    centroid_calculator = CentroidCalculator()
+
+    factor = 1
+
+    shot = np.repeat([1, 2], factor)
+    x = np.repeat([1, 2], factor)
+    y = np.repeat([1, 2], factor)
+    tof = np.repeat([1, 2], factor)
+    tot = np.repeat([1, 2], factor)
+
+    np.testing.assert_array_equal([1, 2],
+        centroid_calculator._CentroidCalculator__divide_into_chunks(shot, x, y, tof, tot)[0][1])
+
+def test_divide_into_chunks_3():
+    centroid_calculator = CentroidCalculator()
+
+    factor = 1
+
+    shot = np.repeat([1, 2], factor)
+    x = np.repeat([1, 2], factor)
+    y = np.repeat([1, 2], factor)
+    tof = np.repeat([1, 2], factor)
+    tot = np.repeat([1, 2], factor)
+
+    chunks = centroid_calculator._CentroidCalculator__divide_into_chunks(shot, x, y, tof, tot)
+    sum = 0
+    found_triggers = []
+    for chunk in chunks:
+        sum += chunk[0].shape[0]
+        assert 0 == chunk[0].shape[0] % factor
+        found_triggers + np.unique(chunk[0]).tolist()
+    assert shot.shape[0] == sum
+    assert np.all(np.unique(found_triggers, return_counts=True)[1] == 1)
+
+def test_divide_into_chunks_4():
+    centroid_calculator = CentroidCalculator()
+
+    factor = 2_500
+
+    data = [3, 1, 2, 4, 5, 6, 4, 4, 4]
+
+    shot = np.repeat(data, factor)
+    x = np.repeat(data, factor)
+    y = np.repeat(data, factor)
+    tof = np.repeat(data, factor)
+    tot = np.repeat(data, factor)
+
+    chunks = centroid_calculator._CentroidCalculator__divide_into_chunks(shot, x, y, tof, tot)
+    sum = 0
+    found_triggers = []
+    for chunk in chunks:
+        sum += chunk[0].shape[0]
+        assert 0 == chunk[0].shape[0] % factor
+        found_triggers + np.unique(chunk[0]).tolist()
+    assert shot.shape[0] == sum
+    assert np.all(np.unique(found_triggers, return_counts=True)[1] == 1)
+
+def test_divide_into_chunks_5():
+    centroid_calculator = CentroidCalculator()
+
+    factor = 3_500
+
+    data = [3, 1, 2, 4, 5, 6, 1, 1]
+
+    shot = np.repeat(data, factor)
+    x = np.repeat(data, factor)
+    y = np.repeat(data, factor)
+    tof = np.repeat(data, factor)
+    tot = np.repeat(data, factor)
+
+    chunks = centroid_calculator._CentroidCalculator__divide_into_chunks(shot, x, y, tof, tot)
+    sum = 0
+    found_triggers = []
+    for chunk in chunks:
+        sum += chunk[0].shape[0]
+        assert 0 == chunk[0].shape[0] % factor
+        found_triggers += np.unique(chunk[0]).tolist()
+    assert shot.shape[0] == sum
+    assert np.all(np.unique(found_triggers, return_counts=True)[1] == 1)
+
+def test_divide_into_chunks_6():
+    centroid_calculator = CentroidCalculator()
+
+    factor = 1
+
+    data = range(0, 10_000)
+
+    shot = np.repeat(data, factor)
+    x = np.repeat(data, factor)
+    y = np.repeat(data, factor)
+    tof = np.repeat(data, factor)
+    tot = np.repeat(data, factor)
+
+    chunks = centroid_calculator._CentroidCalculator__divide_into_chunks(shot, x, y, tof, tot)
+    sum = 0
+    found_triggers = []
+    for chunk in chunks:
+        sum += chunk[0].shape[0]
+        assert 0 == chunk[0].shape[0] % factor
+        found_triggers + np.unique(chunk[0]).tolist()
+    assert shot.shape[0] == sum
+    assert np.all(np.unique(found_triggers, return_counts=True)[1] == 1)
+
+def test_process():
+    centroid_calculator = CentroidCalculator()
+    shot = np.array([1, 1, 1, 1, 1, 1] + [2, 2, 2, 2, 2, 2])
+    x = np.concatenate(([0, 0, 0, 1, 0, -1], np.array([1, 1, 1, 2, 1, 0]) + 5))
+    y = np.concatenate(([0, 0, 1, 0, -1, 0], np.array([1, 1, 2, 1, 0, 1]) + 5))
+    tof = np.array([0, 0, 0, 0, 0, 0] + [0, 0, 0, 0, 0, 0])
+    tot = np.array([1, 1, 1, 1, 1, 1] + [1, 1, 1, 1, 1, 1])
+
+    expected_result = [1, 2], [0, 6], [0, 6], [0, 0], [1, 1], [1, 1], [6, 6]
+    assertCentroidsEqual(expected_result, centroid_calculator.process((shot, x, y, tof, tot)))
+
+def assertCentroidsEqual(expected, actual):
+    for i in range(len(expected)):
+        np.testing.assert_array_equal(expected[i], actual[i])
\ No newline at end of file
diff --git a/tests/test_post_processing.py b/tests/test_post_processing.py
new file mode 100644
index 0000000..2efd198
--- /dev/null
+++ b/tests/test_post_processing.py
@@ -0,0 +1,42 @@
+import numpy as np
+import h5py
+
+from pymepix.processing.rawfilesampler import RawFileSampler
+
+
+def test_run_17_no_time_walk_correction():
+    tmp_file_name = "tmp_run_0017_20191211-0013.hdf5"
+    file_sampler = RawFileSampler("run_0017_20191211-0013.raw", tmp_file_name)
+    file_sampler.run()
+
+    with h5py.File(tmp_file_name) as new_file:
+        print(new_file['centroided'].keys())
+        print(len(new_file['raw/x'][:]))
+        print(len(new_file['centroided/x'][:]))
+
+        # The RawConverter has been slightly adjusted for this comparison. The pixels were sorted by the shot number to match the order which is required
+        # for the processing in chunks. This is required as the DBSCAN algorithm can be non-deterministic regarding the order in some rare scenarios.
+        # Martin Ester, Hans-Peter Kriegel, Jiirg Sander, Xiaowei Xu: A Density Based Algorith for Discovering Clusters [p. 229-230] (https://www.aaai.org/Papers/KDD/1996/KDD96-037.pdf)
+        # https://stats.stackexchange.com/questions/306829/why-is-dbscan-deterministic
+        with h5py.File('run_0017_20191211-0013.hdf5') as old_file:
+            order_new, order_old = new_file['centroided/x'][:].argsort(), old_file['centroided/x'][:].argsort()
+            shot_new, shot_old = new_file['centroided/trigger nr'][:][order_new], old_file['centroided/trigger nr'][:][order_old]
+            x_new, x_old = new_file['centroided/x'][:][order_new], old_file['centroided/x'][:][order_old]
+            y_new, y_old = new_file['centroided/y'][:][order_new], old_file['centroided/y'][:][order_old]
+            tof_new, tof_old = new_file['centroided/tof'][:][order_new], old_file['centroided/tof'][:][order_old]
+            tot_new, tot_old = new_file['centroided/tot max'][:][order_new], old_file['centroided/tot max'][:][order_old]
+            tot_avg_new, tot_avg_old = new_file['centroided/tot avg'][:][order_new], old_file['centroided/tot avg'][:][order_old]
+            size_new, size_old = new_file['centroided/clustersize'][:][order_new], old_file['centroided/clustersize'][:][order_old]
+
+    
+    assertCentroidsAlmostEqual((x_new, y_new, tof_new, tot_new, tot_avg_new, size_new), (x_old, y_old, tof_old, tot_old, tot_avg_old, size_old))
+
+def assertCentroidsAlmostEqual(expected, actual):
+    np.testing.assert_array_equal(expected[0], actual[0])
+    np.testing.assert_array_equal(expected[1], actual[1])
+    # The centroids (TOF) can only be almost equal due to errors in floating point arithmetics.
+    # A more detailed explaination can be found in the documentation of CentroidCalculator.calculate_centroids_properties
+    np.testing.assert_array_almost_equal(expected[2], actual[2], 15)
+    np.testing.assert_array_equal(expected[3], actual[3])
+    np.testing.assert_array_equal(expected[4], actual[4])
+    np.testing.assert_array_equal(expected[5], actual[5])
\ No newline at end of file

From 19e49700e1fbc9c67a145637b5b323cbbbd2f015 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20R=C3=B6hling?= <tobias.roehling@cfel.de>
Date: Mon, 6 Sep 2021 15:34:30 +0200
Subject: [PATCH 07/29] Fix constructor call for pipeline objects

---
 pymepix/processing/acquisition.py     | 15 +++++++++------
 pymepix/processing/baseacquisition.py |  4 ++--
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/pymepix/processing/acquisition.py b/pymepix/processing/acquisition.py
index 2dec24a..e90be0c 100644
--- a/pymepix/processing/acquisition.py
+++ b/pymepix/processing/acquisition.py
@@ -20,6 +20,8 @@
 
 """Module that contains predefined acquisition pipelines for the user to use"""
 
+from pymepix.processing.logic.centroid_calculator import CentroidCalculator
+from pymepix.processing.logic.packet_processor import PacketProcessor
 from .baseacquisition import AcquisitionPipeline
 from .pipeline_centroid_calculator import PipelineCentroidCalculator
 from .pipeline_packet_processor import PipelinePacketProcessor
@@ -51,8 +53,7 @@ def _reconfigureProcessor(self):
         )
         self.getStage(2).configureStage(
             PipelinePacketProcessor,
-            handle_events=self._use_events,
-            event_window=self._event_window,
+            packet_processor=PacketProcessor(self._use_events, self._event_window)
         )
 
     @property
@@ -146,10 +147,12 @@ def _reconfigureCentroid(self):
         self._reconfigureProcessor()
         p = self.getStage(4).configureStage(
             PipelineCentroidCalculator,
-            skip_data=self._skip_centroid,
-            tot_filter=self._tot_threshold,
-            epsilon=self._epsilon,
-            samples=self._samples,
+            centroid_calculator=CentroidCalculator(
+                # skip_data=self._skip_centroid, TODO: Currently not implemented
+                tot_threshold=self._tot_threshold,
+                epsilon=self._epsilon,
+                min_samples=self._samples
+            )
         )
 
     @property
diff --git a/pymepix/processing/baseacquisition.py b/pymepix/processing/baseacquisition.py
index 45b5cb4..c8a4106 100644
--- a/pymepix/processing/baseacquisition.py
+++ b/pymepix/processing/baseacquisition.py
@@ -309,7 +309,7 @@ def main():
     import time
     from multiprocessing.sharedctypes import Value
 
-    from pymepix.processing.packetprocessor import PacketProcessor
+    from pymepix.processing.pipeline_packet_processor import PipelinePacketProcessor
     from pymepix.processing.udpsampler import UdpSampler
 
     # Create the logger
@@ -324,7 +324,7 @@ def main():
     test_value = Value("I", 0)
 
     acqpipline.addStage(0, UdpSampler, ("127.0.0.1", 50000), test_value)
-    acqpipline.addStage(2, PacketProcessor, num_processes=4)
+    acqpipline.addStage(2, PipelinePacketProcessor, num_processes=4)
 
     def get_queue_thread(queue):
         while True:

From dbba6f777f0ccfc0e3648a0b2e5f6afe930e7bde Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20R=C3=B6hling?= <tobias.roehling@cfel.de>
Date: Tue, 7 Sep 2021 13:45:51 +0200
Subject: [PATCH 08/29]  Fix and enhance pipeline packet processor
 implementation

---
 pymepix/processing/logic/packet_processor.py  |  3 ++
 .../processing/pipeline_packet_processor.py   | 31 +++++++------------
 2 files changed, 14 insertions(+), 20 deletions(-)

diff --git a/pymepix/processing/logic/packet_processor.py b/pymepix/processing/logic/packet_processor.py
index 30ff72b..09ed648 100644
--- a/pymepix/processing/logic/packet_processor.py
+++ b/pymepix/processing/logic/packet_processor.py
@@ -64,6 +64,9 @@ def process(self, data):
 
         return None
 
+    def pre_process(self):
+        self.info("Running with triggers? {}".format(self._handle_events))
+
     def post_process(self):
         return self.find_events_fast_post()
 
diff --git a/pymepix/processing/pipeline_packet_processor.py b/pymepix/processing/pipeline_packet_processor.py
index 2a79d00..0dcf74b 100644
--- a/pymepix/processing/pipeline_packet_processor.py
+++ b/pymepix/processing/pipeline_packet_processor.py
@@ -19,26 +19,13 @@
 # see <https://www.gnu.org/licenses/>.
 
 from enum import IntEnum
+from pymepix.processing.datatypes import MessageType
 
 import zmq
 
 from .basepipeline import BasePipelineObject
 from .logic.packet_processor import PacketProcessor
 
-
-class PixelOrientation(IntEnum):
-    """Defines how row and col are intepreted in the output"""
-
-    Up = 0
-    """Up is the default, x=column,y=row"""
-    Left = 1
-    """x=row, y=-column"""
-    Down = 2
-    """x=-column, y = -row """
-    Right = 3
-    """x=-row, y=column"""
-
-
 class PipelinePacketProcessor(BasePipelineObject):
     """Processes Pixel packets for ToA, ToT,triggers and events
 
@@ -57,9 +44,8 @@ def __init__(
         # set input_queue to None for now, or baseaqusition.build would have to be modified
         # input_queue is replace by zmq
         super().__init__(
-            self,
             PipelinePacketProcessor.__name__,
-            input_queue=input_queue,
+            input_queue=None,
             create_output=create_output,
             num_outputs=num_outputs,
             shared_output=shared_output,
@@ -74,13 +60,18 @@ def init_new_process(self):
         self._packet_sock.connect("ipc:///tmp/packetProcessor")
 
     def pre_run(self):
-        self.info("Running with triggers? {}".format(self._handle_events))
         self.init_new_process()
-        self.packet_processor.pre_execution()
+        self.packet_processor.pre_process()
 
     def post_run(self):
         self._packet_sock.close()
-        return self.packet_processor.post_execution()
+        return None, self.packet_processor.post_process()
 
     def process(self, data_type=None, data=None):
-        return self.packet_processor.process(self._packet_sock.recv(copy=False))
\ No newline at end of file
+        # timestamps are not required for online processing
+        result = self.packet_processor.process(self._packet_sock.recv(copy=False))
+        if result is not None:
+            events, _timestamps = result
+            if events is not None:
+                return MessageType.EventData, events
+        return None, None
\ No newline at end of file

From b808a2ddc4d674df2ecab9c73914faff032d25d1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20R=C3=B6hling?= <tobias.roehling@cfel.de>
Date: Tue, 7 Sep 2021 13:46:37 +0200
Subject: [PATCH 09/29] Fix pipeline centroid calculator implementation

---
 pymepix/processing/logic/centroid_calculator.py    | 7 ++++---
 pymepix/processing/pipeline_centroid_calculator.py | 9 ++++++---
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/pymepix/processing/logic/centroid_calculator.py b/pymepix/processing/logic/centroid_calculator.py
index fa4fdce..c5da64c 100644
--- a/pymepix/processing/logic/centroid_calculator.py
+++ b/pymepix/processing/logic/centroid_calculator.py
@@ -73,9 +73,10 @@ def __calc_trig_chunks_split_indices(self, shot):
 
     def __centroid_chunks_to_centroids(self, chunks):
         centroids = [[] for i in range(7)]
-        for chunk in chunks:
-            for index, coordinate in enumerate(chunk):
-                centroids[index].append(coordinate)
+        for chunk in list(chunks):
+            if chunk != None:
+                for index, coordinate in enumerate(chunk):
+                    centroids[index].append(coordinate)
 
         return [np.concatenate(coordinate) for coordinate in centroids]
 
diff --git a/pymepix/processing/pipeline_centroid_calculator.py b/pymepix/processing/pipeline_centroid_calculator.py
index c57da4d..989d90a 100644
--- a/pymepix/processing/pipeline_centroid_calculator.py
+++ b/pymepix/processing/pipeline_centroid_calculator.py
@@ -19,6 +19,7 @@
 # see <https://www.gnu.org/licenses/>.
 
 """Processors relating to centroiding"""
+from pymepix.processing.datatypes import MessageType
 from pymepix.processing.logic.centroid_calculator import CentroidCalculator
 
 from .basepipeline import BasePipelineObject
@@ -36,7 +37,6 @@ def __init__(
         shared_output=None,
     ):
         super().__init__(
-            self,
             PipelineCentroidCalculator.__name__,
             input_queue=input_queue,
             create_output=create_output,
@@ -45,5 +45,8 @@ def __init__(
         )
         self.centroid_calculator = centroid_calculator
 
-    def process(self, data_type, data):
-        return self.centroid_calculator.process((data_type, data))
\ No newline at end of file
+    def process(self, data_type=None, data=None):
+        if data_type == MessageType.EventData:
+            return MessageType.CentroidData, self.centroid_calculator.process(data)
+
+        return None, None
\ No newline at end of file

From 7ec765e7a2c6a828bad5df859e5ed4bfd482c22d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20R=C3=B6hling?= <tobias.roehling@cfel.de>
Date: Wed, 8 Sep 2021 13:12:19 +0200
Subject: [PATCH 10/29] #15 Fix centroiding parameters - Parameters: epsilon,
 min_samples, threshold, skip - Introduced shared memory to enable changes
 from pymepix-viewer

---
 pymepix/processing/acquisition.py             | 111 +-----------------
 .../processing/logic/centroid_calculator.py   |  74 ++++++++----
 2 files changed, 57 insertions(+), 128 deletions(-)

diff --git a/pymepix/processing/acquisition.py b/pymepix/processing/acquisition.py
index e90be0c..e604a99 100644
--- a/pymepix/processing/acquisition.py
+++ b/pymepix/processing/acquisition.py
@@ -134,128 +134,25 @@ def __init__(self, data_queue, address, longtime):
             self, data_queue, address, longtime, use_event=True, name="Centroid"
         )
         self.info("Initializing Centroid pipeline")
-        self._skip_centroid = 1
-        self._tot_threshold = 0
-        self._samples = 5
-        self._epsilon = 2.0
+        self.centroid_calculator=CentroidCalculator()
 
-        self.addStage(4, PipelineCentroidCalculator, num_processes=25)
+        self.addStage(4, PipelineCentroidCalculator, num_processes=6)
 
         self._reconfigureCentroid()
 
     def _reconfigureCentroid(self):
         self._reconfigureProcessor()
-        p = self.getStage(4).configureStage(
+        self.getStage(4).configureStage(
             PipelineCentroidCalculator,
-            centroid_calculator=CentroidCalculator(
-                # skip_data=self._skip_centroid, TODO: Currently not implemented
-                tot_threshold=self._tot_threshold,
-                epsilon=self._epsilon,
-                min_samples=self._samples
-            )
+            centroid_calculator=self.centroid_calculator
         )
 
-    @property
-    def centroidSkip(self):
-        """Perform centroiding on every nth packet
-
-        Parameters
-        -----------
-        value: int
-
-
-        """
-        return self._skip_centroid
-
-    @centroidSkip.setter
-    def centroidSkip(self, value):
-        self.info("Setting Centroid skip to {}".format(value))
-        self._skip_centroid = value
-        self._reconfigureCentroid()
-        if self.isRunning:
-            skip = self._skip_centroid
-            for p in self.getStage(4).processes:
-                p.centroidSkip = skip
-
-    @property
-    def epsilon(self):
-        """Perform centroiding on every nth packet
-
-        Parameters
-        -----------
-        value: int
-
-
-        """
-        return self._epsilon
-
-    @epsilon.setter
-    def epsilon(self, value):
-        self._epsilon = value
-        self._reconfigureCentroid()
-        self.info("Setting epsilon skip to {}".format(value))
-        if self.isRunning:
-            skip = self._epsilon
-            for p in self.getStage(4).processes:
-                p.epsilon = skip
-
-    @property
-    def samples(self):
-        """Perform centroiding on every nth packet
-
-        Parameters
-        -----------
-        value: int
-
-
-        """
-        return self._samples
-
-    @samples.setter
-    def samples(self, value):
-        self._samples = value
-        self._reconfigureCentroid()
-        if self.isRunning:
-            skip = self._samples
-            for p in self.getStage(4).processes:
-                p.samples = skip
-
-    @property
-    def totThreshold(self):
-        """Determines which time over threhsold values to filter before centroiding
-
-        This is useful in reducing the computational time in centroiding and can filter out
-        noise. Changes take effect immediately
-
-        Parameters
-        -----------
-        value: int
-
-
-        Returns
-        -----------
-        int
-
-        """
-        return self._tot_threshold
-
-    @totThreshold.setter
-    def totThreshold(self, value):
-        self._tot_threshold = value
-        self._reconfigureCentroid()
-        if self.isRunning:
-            skip = self._tot_threshold
-            for p in self.getStage(4).processes:
-                p.totThreshold = skip
-
     @property
     def numBlobProcesses(self):
         """Number of python processes to spawn for centroiding
 
         Setting this will spawn the appropriate number of processes to perform centroiding.
         Changes take effect on next acquisition.
-
-
         """
         return self.getStage(4).numProcess
 
diff --git a/pymepix/processing/logic/centroid_calculator.py b/pymepix/processing/logic/centroid_calculator.py
index c5da64c..68a5391 100644
--- a/pymepix/processing/logic/centroid_calculator.py
+++ b/pymepix/processing/logic/centroid_calculator.py
@@ -8,38 +8,72 @@
 
 class CentroidCalculator(ProcessingStep):
 
-    def __init__(self, tot_threshold=0, epsilon=2, min_samples=5, chunk_size_limit=6_500, 
+    def __init__(self, tot_threshold=0, epsilon=2, min_samples=5, triggers_processed=1, chunk_size_limit=6_500, 
         cent_timewalk_lut=None):
 
         super().__init__("CentroidCalculator")
-        self._epsilon = epsilon
-        self._min_samples = min_samples
+        self._epsilon = mp.Value('d', epsilon)
+        self._min_samples = mp.Value('i', min_samples)
+        self._tot_threshold = mp.Value('i', tot_threshold)
+        self._triggers_processed = mp.Value('i', triggers_processed)
+
+        self._chunk_size_limit = chunk_size_limit
         self._tof_scale = 1e7
-        self._tot_threshold = tot_threshold
         self._cent_timewalk_lut = cent_timewalk_lut
-        self._chunk_size_limit = chunk_size_limit
 
-        self.removed_by_dbscan = 0
+    @property
+    def epsilon(self):
+        return self._epsilon.value
+
+    @epsilon.setter
+    def epsilon(self, epsilon):
+        self._epsilon.value = epsilon
+
+    @property
+    def min_samples(self):
+        return self._min_samples.value
+
+    @min_samples.setter
+    def min_samples(self, min_samples):
+        self._min_samples.value = min_samples
+
+    @property
+    def tot_threshold(self):
+        """Determines which time over threshold values to filter before centroiding
+
+        This is useful in reducing the computational time in centroiding and can filter out
+        noise. """
+        return self._tot_threshold.value
+
+    @tot_threshold.setter
+    def tot_threshold(self, tot_threshold):
+        self._tot_threshold.value = tot_threshold
+
+    @property
+    def triggers_processed(self):
+        """ Setting for the number of packets skiped during processing. Every packet_skip packet is processed. 
+        This means for a value of 1 every packet is processed. For 2 only every 2nd packet is processed. """
+        return self._triggers_processed.value
+
+    @triggers_processed.setter
+    def triggers_processed(self, triggers_processed):
+        self._triggers_processed.value = triggers_processed
 
     def process(self, data):
         if data is not None:
-            shot, x, y, tof, tot = data
-
+            shot, x, y, tof, tot = self.__skip_triggers(*data)
             chunks = self.__divide_into_chunks(shot, x, y, tof, tot)
-            # chunks = [data]
             centroids_in_chunks = self.perform_centroiding(chunks)
 
             return self.__centroid_chunks_to_centroids(centroids_in_chunks)
         else:
             return None
 
-    def debug_condition(self, chunks, size):
-        sum = 0
-        found_triggers = []
-        for chunk in chunks:
-            sum += chunk[0].shape[0]
-            found_triggers += np.unique(chunk[0]).tolist()
-        return sum != size or np.all(np.unique(found_triggers, return_counts=True)[1] > 1)
+    def __skip_triggers(self, shot, x, y, tof, tot):
+        unique_shots = np.unique(shot)
+        selected_shots = unique_shots[::self.triggers_processed]
+        mask = np.isin(shot, selected_shots)
+        return shot[mask], x[mask], y[mask], tof[mask], tot[mask]
 
     def __divide_into_chunks(self, shot, x, y, tof, tot):
         # Reordering the voxels can have an impact on the clusterings result. See CentroidCalculator.perform_clustering string doc for further information!
@@ -86,7 +120,7 @@ def perform_centroiding(self, chunks):
     def calculate_centroids(self, chunk):
         shot, x, y, tof, tot = chunk
 
-        tot_filter = tot > self._tot_threshold
+        tot_filter = tot > self.tot_threshold
         # Filter out pixels
         shot = shot[tot_filter]
         x = x[tot_filter]
@@ -98,8 +132,6 @@ def calculate_centroids(self, chunk):
 
         label_filter = labels != 0
 
-        self.removed_by_dbscan += np.where(label_filter == False)[0].shape[0]
-
         if labels is not None and labels[label_filter].size > 0:
             return self.calculate_centroids_properties(
                 shot[label_filter],
@@ -119,10 +151,10 @@ def perform_clustering(self, shot, x, y, tof):
             Martin Ester, Hans-Peter Kriegel, Jiirg Sander, Xiaowei Xu: A Density Based Algorith for Discovering Clusters [p. 229-230] (https://www.aaai.org/Papers/KDD/1996/KDD96-037.pdf)
             A more specific explaination can be found here: https://stats.stackexchange.com/questions/306829/why-is-dbscan-deterministic"""
         if x.size >= 0:
-            X = np.column_stack((shot * self._epsilon * 1_000, x, y, tof * self._tof_scale))
+            X = np.column_stack((shot * self.epsilon * 1_000, x, y, tof * self._tof_scale))
 
             dist = DBSCAN(
-                eps=self._epsilon, min_samples=self._min_samples, metric="euclidean", n_jobs=1
+                eps=self.epsilon, min_samples=self.min_samples, metric="euclidean", n_jobs=1
             ).fit(X)
 
             return dist.labels_ + 1

From de2c427583a81b1514e2a47a780f05c46993ffe5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20R=C3=B6hling?= <tobias.roehling@cfel.de>
Date: Thu, 9 Sep 2021 08:33:31 +0200
Subject: [PATCH 11/29] #14 Fix event_window and handle_events konfiguration.
 See also Changes in pymepix viewer

---
 pymepix/processing/acquisition.py            | 75 ++------------------
 pymepix/processing/logic/packet_processor.py | 37 ++++++++--
 2 files changed, 34 insertions(+), 78 deletions(-)

diff --git a/pymepix/processing/acquisition.py b/pymepix/processing/acquisition.py
index e604a99..8fd4ec2 100644
--- a/pymepix/processing/acquisition.py
+++ b/pymepix/processing/acquisition.py
@@ -38,87 +38,20 @@ class PixelPipeline(AcquisitionPipeline):
     def __init__(self, data_queue, address, longtime, use_event=False, name="Pixel"):
         AcquisitionPipeline.__init__(self, name, data_queue)
         self.info("Initializing Pixel pipeline")
-        self._use_events = use_event
-        self._event_window = (0, 10000)
+        # TODO: Check event_window init value, which order should this be? Probably neet to change to 10_000 / 10e6
+        # Check also in PackatProcessor init method default value and adjust in processingconfig if required!!
+        self.packet_processor = PacketProcessor(handle_events=use_event, event_window=(0, 10000))
 
         self.addStage(0, UdpSampler, address, longtime)
         self.addStage(2, PipelinePacketProcessor, num_processes=2)
         self._reconfigureProcessor()
 
     def _reconfigureProcessor(self):
-        self.debug(
-            "Configuring packet processor handle_events={} event_window={}".format(
-                self._use_events, self._event_window
-            )
-        )
         self.getStage(2).configureStage(
             PipelinePacketProcessor,
-            packet_processor=PacketProcessor(self._use_events, self._event_window)
+            packet_processor=self.packet_processor
         )
 
-    @property
-    def enableEvents(self):
-        """This either enables or disables TOF (Time of Flight) calculation
-
-        Enabling this will ask the packet processor to process both triggers and pixels
-        and compute time of flight rather than time of arrival. Changes take effect on the next
-        acquisition
-
-        Parameters
-        ------------
-        value : bool
-
-
-        Returns
-        ------------
-        bool
-
-        """
-        return self._use_events
-
-    @enableEvents.setter
-    def enableEvents(self, value):
-        self.info("Setting event to {}".format(value))
-        self._use_events = value
-        self._reconfigureProcessor()
-
-    @property
-    def eventWindow(self):
-        """In TOF mode (useEvents is true) the time window in seconds to output further down the pipeline
-
-        When in TOF mode, sets up the packet processor to only output events within a certain time window relative
-        to a trigger. Changes happen immediately.
-        For example to only consider events within a time window between 3 us and 8 us after the trigger do
-
-        >>> pixelpipeline.eventWindow = (3E-6,8E-6)
-
-
-
-        Parameters
-        ------------
-        value: :obj:`tuple` of 2 :obj:`float`
-            A tuple of two floats that represents the (min,max) time of flight window in seconds
-            This is useful to filter out a particular region
-
-
-        Returns
-        ----------
-        :obj:`tuple` of 2 floats
-            Currently set event window
-
-        """
-        return self._event_window
-
-    @eventWindow.setter
-    def eventWindow(self, value):
-        self._event_window = value
-        self._reconfigureProcessor()
-        if self.isRunning:
-            min_win, max_win = self._event_window
-            for p in self.getStage(2).processes:
-                p.minWindow = min_win
-                p.maxWindow = max_win
-
 
 class CentroidPipeline(PixelPipeline):
     """A Pixel pipeline that includes centroiding
diff --git a/pymepix/processing/logic/packet_processor.py b/pymepix/processing/logic/packet_processor.py
index 09ed648..f50d328 100644
--- a/pymepix/processing/logic/packet_processor.py
+++ b/pymepix/processing/logic/packet_processor.py
@@ -1,4 +1,6 @@
 from enum import IntEnum
+from multiprocessing import Value
+from ctypes import c_bool
 
 import numpy as np
 from pymepix.core.log import Logger
@@ -24,8 +26,10 @@ def __init__(self, handle_events=True, event_window=(0.0, 10000.0), position_off
                 orientation=PixelOrientation.Up, start_time=0, timewalk_lut=None):
     
         super().__init__("PacketProcessor")
-        self._handle_events = handle_events
-        self._min_window, self._max_window = event_window
+        self._handle_events = Value(c_bool, handle_events)
+        event_window_min, event_window_max = event_window
+        self._event_window_min = Value('d', event_window_min)
+        self._event_window_max = Value('d', event_window_max)
         self._orientation = orientation
         self._x_offset, self._y_offset = position_offset
         self._start_time =  start_time
@@ -35,6 +39,24 @@ def __init__(self, handle_events=True, event_window=(0.0, 10000.0), position_off
 
         self.clearBuffers()
 
+    @property
+    def event_window(self):
+        return (self._event_window_min.value, self._event_window_max.value)
+
+    @event_window.setter
+    def event_window(self, event_window):
+        event_window_min, event_window_max = event_window
+        self._event_window_min.value = event_window_min
+        self._event_window_max.value = event_window_max
+
+    @property
+    def handle_events(self):
+        return self._handle_events.value
+
+    @handle_events.setter
+    def handle_events(self, handle_events):
+        self._handle_events.value = handle_events
+
     def process(self, data):
         packet_view = memoryview(data)
         packet = np.frombuffer(packet_view[:-8], dtype=np.uint64)
@@ -59,13 +81,13 @@ def process(self, data):
                 if triggers.size > 0:
                     self.process_triggers(np.int64(triggers), longtime)
 
-                if self._handle_events:
+                if self.handle_events:
                     return self.find_events_fast()
 
         return None
 
     def pre_process(self):
-        self.info("Running with triggers? {}".format(self._handle_events))
+        self.info("Running with triggers? {}".format(self.handle_events))
 
     def post_process(self):
         return self.find_events_fast_post()
@@ -110,7 +132,7 @@ def process_triggers(self, pixdata, longtime):
 
         m_trigTime = tdc_time
 
-        if self._handle_events:
+        if self.handle_events:
             if self._triggers is None:
                 self._triggers = m_trigTime
             else:
@@ -162,7 +184,7 @@ def process_pixels(self, pixdata, longtime):
         # TODO: don't clatter queue with unnecessary stuff for now
         # self.pushOutput(MessageType.PixelData, (x, y, finalToA, ToT))
 
-        if self._handle_events:
+        if self.handle_events:
             if self._x is None:
                 self._x = x
                 self._y = y
@@ -236,7 +258,8 @@ def find_events_fast(self):
                     tof = toa - start[event_mapping]
                     event_number = trigger_counter[event_mapping]
 
-                    exp_filter = (tof >= self._min_window) & (tof <= self._max_window)
+                    event_window_min, event_window_max = self.event_window
+                    exp_filter = (tof >= event_window_min) & (tof <= event_window_max)
 
                     result = (
                         event_number[exp_filter],

From be69b8f1d3bc552220394dcedd9490a90f843a86 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20R=C3=B6hling?= <tobias.roehling@cfel.de>
Date: Thu, 9 Sep 2021 08:56:39 +0200
Subject: [PATCH 12/29] #14 Fix initialization of event_window, initialize with
 smaller min_samples value

---
 pymepix/processing/acquisition.py               | 4 +---
 pymepix/processing/logic/centroid_calculator.py | 2 +-
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/pymepix/processing/acquisition.py b/pymepix/processing/acquisition.py
index 8fd4ec2..7299d02 100644
--- a/pymepix/processing/acquisition.py
+++ b/pymepix/processing/acquisition.py
@@ -38,9 +38,7 @@ class PixelPipeline(AcquisitionPipeline):
     def __init__(self, data_queue, address, longtime, use_event=False, name="Pixel"):
         AcquisitionPipeline.__init__(self, name, data_queue)
         self.info("Initializing Pixel pipeline")
-        # TODO: Check event_window init value, which order should this be? Probably neet to change to 10_000 / 10e6
-        # Check also in PackatProcessor init method default value and adjust in processingconfig if required!!
-        self.packet_processor = PacketProcessor(handle_events=use_event, event_window=(0, 10000))
+        self.packet_processor = PacketProcessor(handle_events=use_event, event_window=(0, 1E-3))
 
         self.addStage(0, UdpSampler, address, longtime)
         self.addStage(2, PipelinePacketProcessor, num_processes=2)
diff --git a/pymepix/processing/logic/centroid_calculator.py b/pymepix/processing/logic/centroid_calculator.py
index 68a5391..18e26f2 100644
--- a/pymepix/processing/logic/centroid_calculator.py
+++ b/pymepix/processing/logic/centroid_calculator.py
@@ -8,7 +8,7 @@
 
 class CentroidCalculator(ProcessingStep):
 
-    def __init__(self, tot_threshold=0, epsilon=2, min_samples=5, triggers_processed=1, chunk_size_limit=6_500, 
+    def __init__(self, tot_threshold=0, epsilon=2, min_samples=3, triggers_processed=1, chunk_size_limit=6_500, 
         cent_timewalk_lut=None):
 
         super().__init__("CentroidCalculator")

From 72ef5311b05498c4a78229d89cb94eeadeb12226 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20R=C3=B6hling?= <tobias.roehling@cfel.de>
Date: Mon, 13 Sep 2021 08:12:36 +0200
Subject: [PATCH 13/29] Implement changes to show a progressbar in
 pymepix_viewer

---
 pymepix/__init__.py                          |  1 +
 pymepix/main.py                              | 10 ++------
 pymepix/post_processing.py                   |  5 ++++
 pymepix/processing/logic/packet_processor.py |  2 +-
 pymepix/processing/rawfilesampler.py         | 25 ++++++++++----------
 5 files changed, 21 insertions(+), 22 deletions(-)
 create mode 100644 pymepix/post_processing.py

diff --git a/pymepix/__init__.py b/pymepix/__init__.py
index 6ae2e9c..1e61fe5 100644
--- a/pymepix/__init__.py
+++ b/pymepix/__init__.py
@@ -21,3 +21,4 @@
 from pymepix.processing import MessageType
 from pymepix.pymepix_connection import PollBufferEmpty, PymepixConnection
 from pymepix.timepixdef import *
+from pymepix.post_processing import run_post_processing
diff --git a/pymepix/main.py b/pymepix/main.py
index 849d00a..b714967 100644
--- a/pymepix/main.py
+++ b/pymepix/main.py
@@ -26,7 +26,7 @@
 import time
 
 import pymepix.config.load_config as cfg
-from pymepix.processing.rawfilesampler import RawFileSampler
+from pymepix.post_processing import run_post_processing
 
 from pymepix.processing.datatypes import MessageType
 from pymepix.pymepix_connection import PollBufferEmpty, PymepixConnection
@@ -106,14 +106,8 @@ def connect_timepix(args):
     pymepix.stop()
 
 def post_process(args):
-
-    file_sampler = RawFileSampler(args.file.name, args.output_file, args.number_of_processes, 
+    run_post_processing(args.file.name, args.output_file, args.number_of_processes, 
                                     args.timewalk_file, args.cent_timewalk_file)
-    start_time = time.time()
-    file_sampler.run()
-    stop_time = time.time()
-
-    print(f'took: {stop_time - start_time}s')
 
 def main():
 
diff --git a/pymepix/post_processing.py b/pymepix/post_processing.py
new file mode 100644
index 0000000..fab1424
--- /dev/null
+++ b/pymepix/post_processing.py
@@ -0,0 +1,5 @@
+from pymepix.processing.rawfilesampler import RawFileSampler
+
+def run_post_processing(input_file_name, output_file, number_processes, timewalk_file, cent_timewalk_file, progress_callback=None):
+    file_sampler = RawFileSampler(input_file_name, output_file, number_processes, timewalk_file, cent_timewalk_file, progress_callback)
+    file_sampler.run()
\ No newline at end of file
diff --git a/pymepix/processing/logic/packet_processor.py b/pymepix/processing/logic/packet_processor.py
index f50d328..dd8e175 100644
--- a/pymepix/processing/logic/packet_processor.py
+++ b/pymepix/processing/logic/packet_processor.py
@@ -286,7 +286,7 @@ def __toa_is_not_empty(self):
     def find_events_fast_post(self):
         """Call this function at the very end of to also have the last two trigger events processed"""
         # add an imaginary last trigger event after last pixel event for np.digitize to work
-        if self._toa is not None:
+        if self._toa is not None and self._toa.shape[0] > 0:
             self._triggers = np.concatenate(
                 (self._triggers, np.array([self._toa.max() + 1, self._toa.max() + 2]))
             )
diff --git a/pymepix/processing/rawfilesampler.py b/pymepix/processing/rawfilesampler.py
index 41c645b..f421559 100644
--- a/pymepix/processing/rawfilesampler.py
+++ b/pymepix/processing/rawfilesampler.py
@@ -34,7 +34,8 @@ def __init__(
         output_file,
         number_of_processes=None,
         timewalk_file=None,
-        cent_timewalk_file=None
+        cent_timewalk_file=None,
+        progress_callback=None
     ):
         self._filename = file_name
         self._output_file = output_file
@@ -42,6 +43,7 @@ def __init__(
         self.cent_timewalk_file = cent_timewalk_file
 
         self._number_of_processes = number_of_processes
+        self._progress_callback = progress_callback
 
     def init_new_process(self, file, startTime=0):
         """create connections and initialize variables in new process"""
@@ -51,8 +53,6 @@ def init_new_process(self, file, startTime=0):
         self._longtime_lsb = 0
         self._packet_buffer = []
         self._last_longtime = 0
-        self._total_bytes = os.path.getsize(file)
-        self._read_bytes = 0
         timewalk_lut = None
         cent_timewalk_lut = None
         if self.timewalk_file is not None:
@@ -62,6 +62,7 @@ def init_new_process(self, file, startTime=0):
 
         self.packet_processor = PacketProcessor(start_time=startTime, timewalk_lut=timewalk_lut)
         self.centroid_calculator = CentroidCalculatorPooled(cent_timewalk_lut=cent_timewalk_lut)
+        # TODO: There was an error with the Pooled version!!!!
 
         self._startTime = startTime
 
@@ -92,19 +93,18 @@ def post_run(self):
         self._file.close()
 
     def bytes_from_file(self, chunksize=8192):
-        last_progress = 0
         print("Reading to memory", flush=True)
-        ba = np.fromfile(self._file, dtype="<u8")[:1_000_000]
+        ba = np.fromfile(self._file, dtype="<u8")
         print("Done", flush=True)
 
+        packets_to_process = len(ba)
+        packets_processed = 0
+
         for b in np.nditer(ba):
-            self._read_bytes += 8
-            progress = self._read_bytes * 100.0 / self._total_bytes
-            int_progress = int(progress)
-            if int_progress != 0 and int_progress % 5 == 0 and int_progress != last_progress:
-                print(f"Progress {progress:.1f} %", flush=True)
-                last_progress = int_progress
             yield b
+            packets_processed += 1
+            if self._progress_callback is not None:
+                self._progress_callback(packets_processed / packets_to_process)
 
     def handle_lsb_time(self, pixdata):
         self._longtime_lsb = (pixdata & 0x0000FFFFFFFF0000) >> 16
@@ -269,5 +269,4 @@ def run(self):
         if len(self._packet_buffer) > 0:
             self.push_data()
         
-        self.post_run()
-        print(f'removed by dbscan: {self.centroid_calculator.removed_by_dbscan}')
\ No newline at end of file
+        self.post_run()
\ No newline at end of file

From 53164616efabf39b03ef7c5405cdc505ddf18c04 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20R=C3=B6hling?= <tobias.roehling@cfel.de>
Date: Mon, 13 Sep 2021 13:11:22 +0200
Subject: [PATCH 14/29] Extract wrapper class to support multiprocesing in
 pipeline and post processing (Pool)

---
 pymepix/processing/logic/centroid_calculator.py    | 11 ++++++-----
 pymepix/processing/logic/packet_processor.py       |  9 +++++----
 pymepix/processing/logic/processing_parameter.py   | 12 ++++++++++++
 .../logic/shared_processing_parameter.py           | 14 ++++++++++++++
 4 files changed, 37 insertions(+), 9 deletions(-)
 create mode 100644 pymepix/processing/logic/processing_parameter.py
 create mode 100644 pymepix/processing/logic/shared_processing_parameter.py

diff --git a/pymepix/processing/logic/centroid_calculator.py b/pymepix/processing/logic/centroid_calculator.py
index 18e26f2..b68a480 100644
--- a/pymepix/processing/logic/centroid_calculator.py
+++ b/pymepix/processing/logic/centroid_calculator.py
@@ -1,6 +1,7 @@
 import multiprocessing as mp
 
 import numpy as np
+from pymepix.processing.logic.processing_parameter import ProcessingParameter
 import scipy.ndimage as nd
 from sklearn.cluster import DBSCAN
 
@@ -9,13 +10,13 @@
 class CentroidCalculator(ProcessingStep):
 
     def __init__(self, tot_threshold=0, epsilon=2, min_samples=3, triggers_processed=1, chunk_size_limit=6_500, 
-        cent_timewalk_lut=None):
+        cent_timewalk_lut=None, parameter_wrapper_class=ProcessingParameter):
 
         super().__init__("CentroidCalculator")
-        self._epsilon = mp.Value('d', epsilon)
-        self._min_samples = mp.Value('i', min_samples)
-        self._tot_threshold = mp.Value('i', tot_threshold)
-        self._triggers_processed = mp.Value('i', triggers_processed)
+        self._epsilon = parameter_wrapper_class(epsilon)
+        self._min_samples = parameter_wrapper_class(min_samples)
+        self._tot_threshold = parameter_wrapper_class(tot_threshold)
+        self._triggers_processed = parameter_wrapper_class(triggers_processed)
 
         self._chunk_size_limit = chunk_size_limit
         self._tof_scale = 1e7
diff --git a/pymepix/processing/logic/packet_processor.py b/pymepix/processing/logic/packet_processor.py
index dd8e175..0e0ce3e 100644
--- a/pymepix/processing/logic/packet_processor.py
+++ b/pymepix/processing/logic/packet_processor.py
@@ -4,6 +4,7 @@
 
 import numpy as np
 from pymepix.core.log import Logger
+from pymepix.processing.logic.processing_parameter import ProcessingParameter
 
 from pymepix.processing.logic.processing_step import ProcessingStep
 
@@ -23,13 +24,13 @@ class PixelOrientation(IntEnum):
 
 class PacketProcessor(ProcessingStep):
     def __init__(self, handle_events=True, event_window=(0.0, 10000.0), position_offset=(0, 0), 
-                orientation=PixelOrientation.Up, start_time=0, timewalk_lut=None):
+                orientation=PixelOrientation.Up, start_time=0, timewalk_lut=None, parameter_wrapper_class=ProcessingParameter):
     
         super().__init__("PacketProcessor")
-        self._handle_events = Value(c_bool, handle_events)
+        self._handle_events = parameter_wrapper_class(handle_events)
         event_window_min, event_window_max = event_window
-        self._event_window_min = Value('d', event_window_min)
-        self._event_window_max = Value('d', event_window_max)
+        self._event_window_min = parameter_wrapper_class(event_window_min)
+        self._event_window_max = parameter_wrapper_class(event_window_max)
         self._orientation = orientation
         self._x_offset, self._y_offset = position_offset
         self._start_time =  start_time
diff --git a/pymepix/processing/logic/processing_parameter.py b/pymepix/processing/logic/processing_parameter.py
new file mode 100644
index 0000000..7c8541c
--- /dev/null
+++ b/pymepix/processing/logic/processing_parameter.py
@@ -0,0 +1,12 @@
+class ProcessingParameter:
+
+    def __init__(self, value) :
+        self._value = value
+
+    @property
+    def value(self):
+        return self._value
+
+    @value.setter
+    def value(self, value):
+        self._value = value
\ No newline at end of file
diff --git a/pymepix/processing/logic/shared_processing_parameter.py b/pymepix/processing/logic/shared_processing_parameter.py
new file mode 100644
index 0000000..ec809e7
--- /dev/null
+++ b/pymepix/processing/logic/shared_processing_parameter.py
@@ -0,0 +1,14 @@
+from multiprocessing import Value
+
+class SharedProcessingParameter:
+
+    def __init__(self, value) :
+        super().__init__(Value(type(value), value))
+
+    @property
+    def value(self):
+        return self._value.value
+
+    @value.setter
+    def value(self, value):
+        self._value.value = value
\ No newline at end of file

From 5c2215f584bbef66a0fcd149e28b44e3c330deab Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20R=C3=B6hling?= <tobias.roehling@cfel.de>
Date: Mon, 13 Sep 2021 14:12:53 +0200
Subject: [PATCH 15/29] Use wrapper for the settings to enable parameters to
 work with online pipeline and raw postprocessing

---
 pymepix/processing/logic/centroid_calculator.py    |  2 +-
 .../logic/shared_processing_parameter.py           | 14 ++++++++++++--
 pymepix/processing/pipeline_centroid_calculator.py |  5 +++--
 pymepix/processing/pipeline_packet_processor.py    |  3 ++-
 pymepix/processing/rawfilesampler.py               |  4 ++--
 5 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/pymepix/processing/logic/centroid_calculator.py b/pymepix/processing/logic/centroid_calculator.py
index b68a480..07d8739 100644
--- a/pymepix/processing/logic/centroid_calculator.py
+++ b/pymepix/processing/logic/centroid_calculator.py
@@ -213,7 +213,7 @@ def calculate_centroids_properties(self, shot, x, y, tof, tot, labels):
 
 class CentroidCalculatorPooled(CentroidCalculator):
 
-    def __init__(self, number_of_processes=4, *args, **kwargs):
+    def __init__(self, number_of_processes=None, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self._number_of_processes = number_of_processes
         
diff --git a/pymepix/processing/logic/shared_processing_parameter.py b/pymepix/processing/logic/shared_processing_parameter.py
index ec809e7..6ea21c8 100644
--- a/pymepix/processing/logic/shared_processing_parameter.py
+++ b/pymepix/processing/logic/shared_processing_parameter.py
@@ -1,9 +1,19 @@
 from multiprocessing import Value
 
-class SharedProcessingParameter:
+from pymepix.processing.logic.processing_parameter import ProcessingParameter
+
+class UnknownParameterTypeException(Exception):
+    pass
+
+class SharedProcessingParameter(ProcessingParameter):
 
     def __init__(self, value) :
-        super().__init__(Value(type(value), value))
+        if isinstance(value, int):
+            super().__init__(Value('i', value, lock=False))
+        elif isinstance(value, float):
+            super().__init__(Value('d', value, lock=False))
+        else:
+            raise UnknownParameterTypeException()
 
     @property
     def value(self):
diff --git a/pymepix/processing/pipeline_centroid_calculator.py b/pymepix/processing/pipeline_centroid_calculator.py
index 989d90a..0086282 100644
--- a/pymepix/processing/pipeline_centroid_calculator.py
+++ b/pymepix/processing/pipeline_centroid_calculator.py
@@ -21,6 +21,7 @@
 """Processors relating to centroiding"""
 from pymepix.processing.datatypes import MessageType
 from pymepix.processing.logic.centroid_calculator import CentroidCalculator
+from pymepix.processing.logic.shared_processing_parameter import SharedProcessingParameter
 
 from .basepipeline import BasePipelineObject
 
@@ -30,7 +31,7 @@ class PipelineCentroidCalculator(BasePipelineObject):
 
     def __init__(
         self,
-        centroid_calculator: CentroidCalculator = CentroidCalculator(),
+        centroid_calculator: CentroidCalculator = CentroidCalculator(parameter_wrapper_class=SharedProcessingParameter),
         input_queue=None,
         create_output=True,
         num_outputs=1,
@@ -41,7 +42,7 @@ def __init__(
             input_queue=input_queue,
             create_output=create_output,
             num_outputs=num_outputs,
-            shared_output=shared_output,
+            shared_output=shared_output
         )
         self.centroid_calculator = centroid_calculator
 
diff --git a/pymepix/processing/pipeline_packet_processor.py b/pymepix/processing/pipeline_packet_processor.py
index 0dcf74b..7d1bba7 100644
--- a/pymepix/processing/pipeline_packet_processor.py
+++ b/pymepix/processing/pipeline_packet_processor.py
@@ -20,6 +20,7 @@
 
 from enum import IntEnum
 from pymepix.processing.datatypes import MessageType
+from pymepix.processing.logic.shared_processing_parameter import SharedProcessingParameter
 
 import zmq
 
@@ -35,7 +36,7 @@ class PipelinePacketProcessor(BasePipelineObject):
 
     def __init__(
         self,
-        packet_processor: PacketProcessor = PacketProcessor(),
+        packet_processor: PacketProcessor = PacketProcessor(parameter_wrapper_class=SharedProcessingParameter),
         input_queue=None,
         create_output=True,
         num_outputs=1,
diff --git a/pymepix/processing/rawfilesampler.py b/pymepix/processing/rawfilesampler.py
index f421559..fbb74c0 100644
--- a/pymepix/processing/rawfilesampler.py
+++ b/pymepix/processing/rawfilesampler.py
@@ -52,6 +52,7 @@ def init_new_process(self, file, startTime=0):
         self._longtime_msb = 0
         self._longtime_lsb = 0
         self._packet_buffer = []
+        
         self._last_longtime = 0
         timewalk_lut = None
         cent_timewalk_lut = None
@@ -61,8 +62,7 @@ def init_new_process(self, file, startTime=0):
             cent_timewalk_lut = np.load(self.cent_timewalk_file)
 
         self.packet_processor = PacketProcessor(start_time=startTime, timewalk_lut=timewalk_lut)
-        self.centroid_calculator = CentroidCalculatorPooled(cent_timewalk_lut=cent_timewalk_lut)
-        # TODO: There was an error with the Pooled version!!!!
+        self.centroid_calculator = CentroidCalculator(cent_timewalk_lut=cent_timewalk_lut)
 
         self._startTime = startTime
 

From 166dce5a1b64e25e6a0731e1da23c88b30090cd4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20R=C3=B6hling?= <tobias.roehling@cfel.de>
Date: Tue, 14 Sep 2021 09:39:26 +0200
Subject: [PATCH 16/29] Activating TOA processing again  - Write TOA data to
 queue  - Fix post processing to ignore included toa data

---
 pymepix/processing/basepipeline.py              |  5 +----
 pymepix/processing/logic/packet_processor.py    | 14 ++++++++------
 pymepix/processing/pipeline_packet_processor.py | 10 +++++++---
 pymepix/processing/rawfilesampler.py            |  6 +++---
 4 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/pymepix/processing/basepipeline.py b/pymepix/processing/basepipeline.py
index 01236b0..538f015 100644
--- a/pymepix/processing/basepipeline.py
+++ b/pymepix/processing/basepipeline.py
@@ -201,11 +201,8 @@ def run(self):
                 self.error(traceback.format_exc())
                 break
         output_type, result = self.post_run()
-        if (
-            output_type is not None and result is not None
-        ):  # TODO: not quite sure what happens without "enabled"
+        if output_type is not None and result is not None:
             self.pushOutput(output_type, result)
-        # print(f'iterations {self.loop_count}')
 
         self.info("Job complete")
 
diff --git a/pymepix/processing/logic/packet_processor.py b/pymepix/processing/logic/packet_processor.py
index 0e0ce3e..ff9ca4d 100644
--- a/pymepix/processing/logic/packet_processor.py
+++ b/pymepix/processing/logic/packet_processor.py
@@ -64,6 +64,7 @@ def process(self, data):
         # needs to be an integer or "(ltime >> 28) & 0x3" fails
         longtime = int(np.frombuffer(packet_view[-8:], dtype=np.uint64)[0])
 
+        event_data, pixel_data, timestamps = None, None, None
         if len(packet) > 0:
 
             header = ((packet & 0xF000000000000000) >> 60) & 0xF
@@ -77,15 +78,17 @@ def process(self, data):
             ]
 
             if pixels.size > 0:
-                self.process_pixels(np.int64(pixels), longtime)
+                pixel_data = self.process_pixels(np.int64(pixels), longtime)
 
                 if triggers.size > 0:
                     self.process_triggers(np.int64(triggers), longtime)
 
                 if self.handle_events:
-                    return self.find_events_fast()
+                    result = self.find_events_fast()
+                    if result is not None:
+                        event_data, timestamps = result
 
-        return None
+        return event_data, pixel_data, timestamps
 
     def pre_process(self):
         self.info("Running with triggers? {}".format(self.handle_events))
@@ -182,9 +185,6 @@ def process_pixels(self, pixdata, longtime):
         x += self._x_offset
         y += self._y_offset
 
-        # TODO: don't clatter queue with unnecessary stuff for now
-        # self.pushOutput(MessageType.PixelData, (x, y, finalToA, ToT))
-
         if self.handle_events:
             if self._x is None:
                 self._x = x
@@ -197,6 +197,8 @@ def process_pixels(self, pixdata, longtime):
                 self._toa = np.append(self._toa, finalToA)
                 self._tot = np.append(self._tot, ToT)
 
+        return x, y, finalToA, ToT
+
     def correct_global_time(self, arr, ltime):
         pixelbits = (arr >> 28) & 0x3
         ltimebits = (ltime >> 28) & 0x3
diff --git a/pymepix/processing/pipeline_packet_processor.py b/pymepix/processing/pipeline_packet_processor.py
index 7d1bba7..8f75e84 100644
--- a/pymepix/processing/pipeline_packet_processor.py
+++ b/pymepix/processing/pipeline_packet_processor.py
@@ -72,7 +72,11 @@ def process(self, data_type=None, data=None):
         # timestamps are not required for online processing
         result = self.packet_processor.process(self._packet_sock.recv(copy=False))
         if result is not None:
-            events, _timestamps = result
-            if events is not None:
-                return MessageType.EventData, events
+            event_data, pixel_data, _timestamps = result
+
+            if pixel_data is not None:
+                self.pushOutput(MessageType.PixelData, pixel_data)
+
+            if event_data is not None:
+                return MessageType.EventData, event_data
         return None, None
\ No newline at end of file
diff --git a/pymepix/processing/rawfilesampler.py b/pymepix/processing/rawfilesampler.py
index fbb74c0..cd38df0 100644
--- a/pymepix/processing/rawfilesampler.py
+++ b/pymepix/processing/rawfilesampler.py
@@ -159,9 +159,9 @@ def __run_packet_processor(self, packet_buffer):
 
         return None
 
-    def __calculate_and_save_centroids(self, events, timestamps):
-        centroids = self.centroid_calculator.process(events)
-        self.saveToHDF5(self._output_file, events, centroids, timestamps)
+    def __calculate_and_save_centroids(self, event_data, _pixel_data, timestamps):
+        centroids = self.centroid_calculator.process(event_data)
+        self.saveToHDF5(self._output_file, event_data, centroids, timestamps)
 
     def saveToHDF5(self, output_file, raw, clusters, timeStamps):
         if output_file is not None:

From ac253d6aa612deb27cdffd3dd140d0c5b9f18ac5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20R=C3=B6hling?= <tobias.roehling@cfel.de>
Date: Thu, 16 Sep 2021 10:31:05 +0200
Subject: [PATCH 17/29] #15 Fix save timestamps

---
 pymepix/processing/rawfilesampler.py | 12 +++++++-----
 setup.py                             |  2 +-
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/pymepix/processing/rawfilesampler.py b/pymepix/processing/rawfilesampler.py
index cd38df0..c027282 100644
--- a/pymepix/processing/rawfilesampler.py
+++ b/pymepix/processing/rawfilesampler.py
@@ -19,6 +19,7 @@
 # see <https://www.gnu.org/licenses/>.
 import time
 import os
+import struct
 
 import numpy as np
 import h5py
@@ -74,8 +75,6 @@ def pre_run(self):
         except OSError:
             pass
 
-        self._file = open(self._filename, "rb")
-
         self.init_new_process(self._filename)
         self._last_update = time.time()
 
@@ -90,11 +89,11 @@ def post_run(self):
             self.__calculate_and_save_centroids(*result)
 
         self.centroid_calculator.post_process()
-        self._file.close()
 
     def bytes_from_file(self, chunksize=8192):
         print("Reading to memory", flush=True)
-        ba = np.fromfile(self._file, dtype="<u8")
+        with open(self._filename, 'rb') as file:
+            ba = np.fromfile(file, dtype="<u8")
         print("Done", flush=True)
 
         packets_to_process = len(ba)
@@ -219,7 +218,10 @@ def saveToHDF5(self, output_file, raw, clusters, timeStamps):
 
                 ###############
                 # save time stamp data
-                if self._startTime is not None:
+                with open(self._filename, 'rb') as file:
+                    startTime = struct.unpack("L", file.read(8))[0]
+                
+                if startTime is not None:
                     names = ["trigger nr", "timestamp"]
                     if f.keys().__contains__("timing/timepix"):
                         for i, key in enumerate(names):
diff --git a/setup.py b/setup.py
index 98c5d8e..4702036 100644
--- a/setup.py
+++ b/setup.py
@@ -37,7 +37,7 @@
 
 install_requires = ["numpy", "sklearn", "pyyaml", "pyzmq", "pyserial", "h5py"]
 
-console_scripts = ["pymepix-acq=pymepix.pymepix:main"]
+console_scripts = ["pymepix-acq=pymepix.main:main"]
 
 entry_points = {"console_scripts": console_scripts}
 

From 42d6463d24d816c2d274502b4dac060ab0a55cf4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20R=C3=B6hling?= <tobias.roehling@cfel.de>
Date: Mon, 20 Sep 2021 16:25:29 +0200
Subject: [PATCH 18/29] #18 Progress bar for CLI post processing implemented

---
 pymepix/post_processing.py | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/pymepix/post_processing.py b/pymepix/post_processing.py
index fab1424..1901ac8 100644
--- a/pymepix/post_processing.py
+++ b/pymepix/post_processing.py
@@ -1,5 +1,14 @@
+from tqdm import tqdm
+
 from pymepix.processing.rawfilesampler import RawFileSampler
 
-def run_post_processing(input_file_name, output_file, number_processes, timewalk_file, cent_timewalk_file, progress_callback=None):
-    file_sampler = RawFileSampler(input_file_name, output_file, number_processes, timewalk_file, cent_timewalk_file, progress_callback)
-    file_sampler.run()
\ No newline at end of file
+class ProgressBar(tqdm):
+
+    def update_to(self, progress):
+        return self.update(progress - self.n)
+
+
+def run_post_processing(input_file_name, output_file, number_processes, timewalk_file, cent_timewalk_file):
+    with ProgressBar(total=1.0, dynamic_ncols=True) as progress_bar:
+        file_sampler = RawFileSampler(input_file_name, output_file, number_processes, timewalk_file, cent_timewalk_file, progress_bar.update_to)
+        file_sampler.run()
\ No newline at end of file

From daeabec5925cfd89d8cbd1bb72da269ce7f822d4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20R=C3=B6hling?= <tobias.roehling@cfel.de>
Date: Tue, 21 Sep 2021 07:51:08 +0200
Subject: [PATCH 19/29] Fix start time not being calculated correctly

---
 pymepix/processing/logic/packet_processor.py |  8 +++++++-
 pymepix/processing/rawfilesampler.py         | 16 +++++++---------
 tests/test_post_processing.py                |  8 ++++----
 3 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/pymepix/processing/logic/packet_processor.py b/pymepix/processing/logic/packet_processor.py
index ff9ca4d..b638cc4 100644
--- a/pymepix/processing/logic/packet_processor.py
+++ b/pymepix/processing/logic/packet_processor.py
@@ -293,4 +293,10 @@ def find_events_fast_post(self):
             self._triggers = np.concatenate(
                 (self._triggers, np.array([self._toa.max() + 1, self._toa.max() + 2]))
             )
-        return self.find_events_fast()
\ No newline at end of file
+
+        event_data, timestamps = None, None
+        result = self.find_events_fast()
+        if result is not None:
+            event_data, timestamps = result
+
+        return event_data, None, timestamps
\ No newline at end of file
diff --git a/pymepix/processing/rawfilesampler.py b/pymepix/processing/rawfilesampler.py
index c027282..00efad1 100644
--- a/pymepix/processing/rawfilesampler.py
+++ b/pymepix/processing/rawfilesampler.py
@@ -46,8 +46,11 @@ def __init__(
         self._number_of_processes = number_of_processes
         self._progress_callback = progress_callback
 
-    def init_new_process(self, file, startTime=0):
+    def init_new_process(self, file):
         """create connections and initialize variables in new process"""
+        self._startTime = None
+        with open(self._filename, 'rb') as file:
+            self._startTime = struct.unpack("L", file.read(8))[0]
 
         self._longtime = -1
         self._longtime_msb = 0
@@ -62,11 +65,9 @@ def init_new_process(self, file, startTime=0):
         if self.cent_timewalk_file is not None:
             cent_timewalk_lut = np.load(self.cent_timewalk_file)
 
-        self.packet_processor = PacketProcessor(start_time=startTime, timewalk_lut=timewalk_lut)
+        self.packet_processor = PacketProcessor(start_time=self._startTime, timewalk_lut=timewalk_lut)
         self.centroid_calculator = CentroidCalculator(cent_timewalk_lut=cent_timewalk_lut)
 
-        self._startTime = startTime
-
     def pre_run(self):
         """init stuff which should only be available in new process"""
 
@@ -217,11 +218,8 @@ def saveToHDF5(self, output_file, raw, clusters, timeStamps):
                     f["raw/y"].attrs["unit"] = "pixel"
 
                 ###############
-                # save time stamp data
-                with open(self._filename, 'rb') as file:
-                    startTime = struct.unpack("L", file.read(8))[0]
-                
-                if startTime is not None:
+                # save time stamp data                
+                if self._startTime is not None:
                     names = ["trigger nr", "timestamp"]
                     if f.keys().__contains__("timing/timepix"):
                         for i, key in enumerate(names):
diff --git a/tests/test_post_processing.py b/tests/test_post_processing.py
index 2efd198..3178f51 100644
--- a/tests/test_post_processing.py
+++ b/tests/test_post_processing.py
@@ -4,9 +4,9 @@
 from pymepix.processing.rawfilesampler import RawFileSampler
 
 
-def test_run_17_no_time_walk_correction():
-    tmp_file_name = "tmp_run_0017_20191211-0013.hdf5"
-    file_sampler = RawFileSampler("run_0017_20191211-0013.raw", tmp_file_name)
+def test_converted_hdf5():
+    tmp_file_name = "run_0685_20191217-1533-new.hdf5"
+    file_sampler = RawFileSampler("run_0685_20191217-1533.raw", tmp_file_name)
     file_sampler.run()
 
     with h5py.File(tmp_file_name) as new_file:
@@ -18,7 +18,7 @@ def test_run_17_no_time_walk_correction():
         # for the processing in chunks. This is required as the DBSCAN algorithm can be non-deterministic regarding the order in some rare scenarios.
         # Martin Ester, Hans-Peter Kriegel, Jiirg Sander, Xiaowei Xu: A Density Based Algorith for Discovering Clusters [p. 229-230] (https://www.aaai.org/Papers/KDD/1996/KDD96-037.pdf)
         # https://stats.stackexchange.com/questions/306829/why-is-dbscan-deterministic
-        with h5py.File('run_0017_20191211-0013.hdf5') as old_file:
+        with h5py.File('run_0685_20191217-1533.hdf5') as old_file:
             order_new, order_old = new_file['centroided/x'][:].argsort(), old_file['centroided/x'][:].argsort()
             shot_new, shot_old = new_file['centroided/trigger nr'][:][order_new], old_file['centroided/trigger nr'][:][order_old]
             x_new, x_old = new_file['centroided/x'][:][order_new], old_file['centroided/x'][:][order_old]

From 646955ec963d8e3789d04776327a3d880ff1bac0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20R=C3=B6hling?= <tobias.roehling@cfel.de>
Date: Mon, 27 Sep 2021 12:28:17 +0200
Subject: [PATCH 20/29] #19 Fix post-processing test and use git lfs for
 storage of example files

---
 .gitattributes                |   2 ++
 tests/files/raw_test_data.raw | Bin 2560 -> 129 bytes
 tests/test_post_processing.py |  11 ++++++++---
 3 files changed, 10 insertions(+), 3 deletions(-)
 create mode 100644 .gitattributes

diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..9aedbe7
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,2 @@
+*.raw filter=lfs diff=lfs merge=lfs -text
+*.hdf5 filter=lfs diff=lfs merge=lfs -text
diff --git a/tests/files/raw_test_data.raw b/tests/files/raw_test_data.raw
index 737e091f93ff47094e136daaef9c0adf3dd014e5..72fbc96a1bff023431e7392a4f192c9a12607007 100644
GIT binary patch
literal 129
zcmWN}K@x)?3;@78uiyg}2!iNu5+H?{wm6M?=<DlgZ+q3R^zqT{ICj~LdG_P+VwrB+
zNmCwgBYVx}a%%LVr6Gj46{v0jTab~1!xCX#k>`-+98*jja`rJ1y0l;+vJ}8tPq_v_
N9~qSNU)A?y)gRhFC$9hi

literal 2560
zcmXYye{d9M9me-^*R=jYE1etE+6;zC&q{%ao3<)yb}x5W8DkQgR+vDMPE;I=mrY|W
z9+LN9t3vJM3TJ<qG6)Th{t<JjC)b(=mh8z@M_b0;={RZGzA<(j8>UG)T3C34&%@nc
zpLxFD=lkP*cW=*`nRAP4y~Ml~cHFUae%`uIbBD5^t+}stg1^w*!+qe{fO`}3&4xS3
z{DYdiwU)n$dH7Bafg26?<QTZgaIYr7e`#)lzQ3Yh8XVQ!)-3pv=6bCAU7g#(cx%92
zPJK*sud)y720OtA47Vc-P6pf|=C>NImj+h_+z#sII(Lxz8SIk<mjvC{7(WL;<IdF7
zR_gJ>)N<;lu}>C!>-1ED@xS1Fj3;qE#=DAB&D7rY?jZALgYI(1$4^hSGTt0?n;BPi
zZZqRAoSxdscx%vYWqbjB)A<GK$k?u1wb*;s_FFYAX1!<KP^W2HysG2YHxt>v6-_U3
z-fDNsPo2r1w|brOujj#?qWlQ8Yn1Pz{(&gJO?{79K27~BNPV|i{s;A+LFy-><)2W0
zv90{ne7<6>==d?_U#QK$P@7-XaqCxC9{y*s%S&9aS{-k7I$vcL{Fd;x^nst#-a0e{
zCLHh382G60&LzO-jkz5ke({3!HaK66?_mASrq|B;6T)j}{f#Mal=VAN$NJw2PqF@l
zIk$xMmyEe~)(@Co1M7P*m-Un8+-l|@5#F1uzlu86kD7B^=>IS3m|x%KZDCy~>NT+L
z-==qt{-dThO8=|yU+dH6EupV|y|;wE+v1+0?^w*6r0>DtTmyYS{Ib_h-?6ASIh23j
z8ac=N`=EAzAJp!zs^gZi?`~P#<|Qs#cRI50a`vM2F>S_v16(HL$YpSqBV(Jvb&d@0
z0zc}=rso(JGW0UI5_|*H9hvC??-bHbf@{EG@av|`O@N<tWH0k~2w9j0Z$rN%c)O4-
z!^{iWGy$S-3-jM_RH=0QqSdM`mU7?*Z84Ss@6Z+t74Qq%Vkn%4c4Q0p`KgexUF@UH
z6na3@Q6sOzcTA`d>kk-y>>&8CPy?*L$B{Po#~ual|9v4d+(QSGAod+#pHDe{jD0X~
z_V?gphF>}WKIEv}d2lo4vd``Cv(F0L7yEn-_ryL?;TQf6-xlF_vpyV^g$cwD3t5^5
zw+r8X0r9w_!aUDzoS*r7k?#TTb5u9aoe|3B-aVN29O6+SyLW;0DP@-s_YHrRd+kiA
z5Pk3B`3)mKZpc~AS!b&O`o4pB5_+?v>>M=iw~2F)nzAy1_*(R39iDeLealiROyB!L
z<#-QWLeBOezFeD$(T}+y#t#Zr;rYJnsNM`%#69ucIA54?++U^w;`vvmQMbYLE8G|N
zS>XA<l2R?qe+l=<^L-ZY3(xm?M^-rRq>#B`@O4w#oQrehk|6dgl-L)3>UBa6(1-V`
zm%bJ7PlNY6YJhu9*eXQ-h){(bXbTmqfI6Np&+(CzYGQmNo-g-qF#TDcV?W+=?$y*L
zGwgdmH1}(W`Z4<UMrDR`P8qT=4B~wnpsxk*dlGz7sBX?VYx)_Uca=61qVI8AwUnU$
zgZtzBgHbs$jd%`Z{RX@Ttbb9cUe^CXs21+`N5ijhue*c{bFVe{J<<P!>4&(_XG~cd
zhHj1dBb?WaJT-rh)8LQr9FyQXwu*87$88m6|6H3aO(1^U@NM>Axktq~?|><1S^sXE
zpJPAVM}_tHy)-fY&R*5T`onwWEbCW4uDaRxfvBHhpB`IvvrltMR_GsW^GodW(0<iR
z->Y$dgg)%iOJC>peueoR`&5|uLoq+c{4IOb0P}DDOf{8`U$R#2-i`0urP}w+uYKSA
zYdUU;Wtaa~w7kS+YxI^u_^#A^S8Beh#&_`f6P04hODtMzUZQWY=3A`!sv5pe?^!H9
z=_R!Ov5f-0K!5drG0<QAU##l5rA@|fc;$$fsOx{sh@{Ts>-v9gL{6o_X9AH_7Ho7P
zOBvs1M3zz?)gs%N-!JqdL*TuR-q;5wh2GU!<&gyVsL&TW$Uvm=47j~6aw-d!wTP7l
zN3=*6HTo>D&z}r^TOa5cy4eYS+tkO|=VBnT_Dq$H^luV+V;bBS)yG)}2KvA$Lznb#
z4o23}S8wVjeZMjFrOXcpBik6i6xA2#J7DNt%nz^EP3FIPUBqJkmvKG7`1fLZ7vuZe
x^rh*1uz%AP-ea(KkD=N<hOX(j^@$B1y7`rHFL7P}jp-D8^)+98%~#cN>;GO;h&TWM

diff --git a/tests/test_post_processing.py b/tests/test_post_processing.py
index 3178f51..2e168c9 100644
--- a/tests/test_post_processing.py
+++ b/tests/test_post_processing.py
@@ -1,12 +1,15 @@
 import numpy as np
 import h5py
+import pathlib
+import os
 
 from pymepix.processing.rawfilesampler import RawFileSampler
 
 
 def test_converted_hdf5():
-    tmp_file_name = "run_0685_20191217-1533-new.hdf5"
-    file_sampler = RawFileSampler("run_0685_20191217-1533.raw", tmp_file_name)
+    folder_path = pathlib.Path(__file__).parent / "files"
+    tmp_file_name = folder_path / "run_0685_20191217-1533-temp.hdf5"
+    file_sampler = RawFileSampler(folder_path / "run_0685_20191217-1533.raw", tmp_file_name)
     file_sampler.run()
 
     with h5py.File(tmp_file_name) as new_file:
@@ -18,7 +21,7 @@ def test_converted_hdf5():
         # for the processing in chunks. This is required as the DBSCAN algorithm can be non-deterministic regarding the order in some rare scenarios.
         # Martin Ester, Hans-Peter Kriegel, Jiirg Sander, Xiaowei Xu: A Density Based Algorith for Discovering Clusters [p. 229-230] (https://www.aaai.org/Papers/KDD/1996/KDD96-037.pdf)
         # https://stats.stackexchange.com/questions/306829/why-is-dbscan-deterministic
-        with h5py.File('run_0685_20191217-1533.hdf5') as old_file:
+        with h5py.File(folder_path / 'run_0685_20191217-1533.hdf5') as old_file:
             order_new, order_old = new_file['centroided/x'][:].argsort(), old_file['centroided/x'][:].argsort()
             shot_new, shot_old = new_file['centroided/trigger nr'][:][order_new], old_file['centroided/trigger nr'][:][order_old]
             x_new, x_old = new_file['centroided/x'][:][order_new], old_file['centroided/x'][:][order_old]
@@ -31,6 +34,8 @@ def test_converted_hdf5():
     
     assertCentroidsAlmostEqual((x_new, y_new, tof_new, tot_new, tot_avg_new, size_new), (x_old, y_old, tof_old, tot_old, tot_avg_old, size_old))
 
+    os.remove(tmp_file_name)
+
 def assertCentroidsAlmostEqual(expected, actual):
     np.testing.assert_array_equal(expected[0], actual[0])
     np.testing.assert_array_equal(expected[1], actual[1])

From 1f447a8418d8f93ae2534354c43d66dddec5df1e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20R=C3=B6hling?= <tobias.roehling@cfel.de>
Date: Mon, 27 Sep 2021 12:30:57 +0200
Subject: [PATCH 21/29] removed .raw and .hdf5 files from gitignore to use git
 lfs for this type of files instead

---
 .gitignore | 2 --
 1 file changed, 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index 20a2a61..5bd6242 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,3 @@
-*.raw
-*.hdf5
 *.dat
 *.egg
 *.egg-info/

From ff043f653ff0dff9383a2468e57e14dc1152317b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20R=C3=B6hling?= <tobias.roehling@cfel.de>
Date: Mon, 27 Sep 2021 12:31:41 +0200
Subject: [PATCH 22/29] #19 include large datasets raw files and the expected
 results generated with the old raw converter

---
 tests/files/run_0685_20191217-1533.hdf5 | 3 +++
 tests/files/run_0685_20191217-1533.raw  | 3 +++
 2 files changed, 6 insertions(+)
 create mode 100644 tests/files/run_0685_20191217-1533.hdf5
 create mode 100644 tests/files/run_0685_20191217-1533.raw

diff --git a/tests/files/run_0685_20191217-1533.hdf5 b/tests/files/run_0685_20191217-1533.hdf5
new file mode 100644
index 0000000..c7c8d68
--- /dev/null
+++ b/tests/files/run_0685_20191217-1533.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6d7fba34eb8bdc7aa2c1048f287f413abfdd9d4cb64cba17459ed1fa24897bd8
+size 99200882
diff --git a/tests/files/run_0685_20191217-1533.raw b/tests/files/run_0685_20191217-1533.raw
new file mode 100644
index 0000000..f2361eb
--- /dev/null
+++ b/tests/files/run_0685_20191217-1533.raw
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fe345a4cc069a6aff68ee125841d88a29d95ee1419dc729fd4086ed601859586
+size 27405600

From 2da5c228150871eef5f10a1437c621202e7ef38e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20R=C3=B6hling?= <tobias.roehling@cfel.de>
Date: Mon, 27 Sep 2021 13:17:23 +0200
Subject: [PATCH 23/29] #17 Update documentation for the pymepix CLI

---
 doc/users/pymepixacq.rst | 31 +++++++++++++------------------
 1 file changed, 13 insertions(+), 18 deletions(-)

diff --git a/doc/users/pymepixacq.rst b/doc/users/pymepixacq.rst
index a4a5ba5..fdd0a07 100644
--- a/doc/users/pymepixacq.rst
+++ b/doc/users/pymepixacq.rst
@@ -5,32 +5,27 @@ PymepixAcq - Command line
 ===========================
 
 
-Included with pymepix is a command line code using the pymepix library to acquire from timepix. It is run using::
-
-    pymepix-acq --time 10 --output my_file
+Included with pymepix is a command line code using the pymepix library to acquire from timepix. The command line interface includes two different commands:
+ - "connect": to connect to a running timepix camera and record data
+ - "post-process": to post-process recorded raw data files into easier usable hdf5 files containing raw and centroided data
 
 Doing::
 
     pymepix-acq --help
 
-
 Outputs the help::
 
-    usage: pymepix-acq [-h] [-i IP] [-p PORT] [-s SPX] [-v BIAS] -t TIME -o OUTPUT
-                    [-d DECODE] [-T TOF]
+    usage: pymepix-acq [-h] {connect,post-process} ...
 
     Timepix acquisition script
 
+    positional arguments:
+        {connect,post-process}
+         connect             Connect to TimePix camera and acquire data.
+         post-process        Perform post-processing with a acquired raw data file.
+
     optional arguments:
-    -h, --help                 show this help message and exit
-    -i IP, --ip IP             IP address of Timepix
-    -p PORT, --port PORT       TCP port to use for the connection
-    -s SPX, --spx SPX          Sophy config file to load
-    -v BIAS, --bias BIAS       Bias voltage in Volts
-    -t TIME, --time TIME       Acquisition time in seconds
-    -o OUTPUT, --output OUTPUT output filename prefix
-    -d DECODE, --decode DECODE Store decoded values instead
-    -T TOF, --tof TOF          Compute TOF if decode is enabled
-
-
-TODO: MORE DOCS
+        -h, --help            show this help message and exit
+
+
+You can access the documentation for both commands by executing "pymepix-acq connect -h" or "pymepix-acq post-process -h" respectively.
\ No newline at end of file

From 080cf6354769ca7c88f07312863c0ef102cbc02e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20R=C3=B6hling?= <tobias.roehling@cfel.de>
Date: Mon, 27 Sep 2021 14:10:41 +0200
Subject: [PATCH 24/29] Add tqdm as a dependency

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 4702036..39f0c69 100644
--- a/setup.py
+++ b/setup.py
@@ -35,7 +35,7 @@
 
 requires = []
 
-install_requires = ["numpy", "sklearn", "pyyaml", "pyzmq", "pyserial", "h5py"]
+install_requires = ["numpy", "sklearn", "pyyaml", "pyzmq", "pyserial", "h5py", "tqdm"]
 
 console_scripts = ["pymepix-acq=pymepix.main:main"]
 

From 965586d24a04a163b2a236cda569a9f5aaecb960 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20R=C3=B6hling?= <tobias.roehling@cfel.de>
Date: Mon, 27 Sep 2021 14:27:09 +0200
Subject: [PATCH 25/29] Adjust dependencies in requirements file

---
 requirements.txt | 1 +
 setup.py         | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 0662730..f18a322 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,3 +4,4 @@ pyzmq
 h5py
 pyyaml
 pyserial
+tqdm
diff --git a/setup.py b/setup.py
index 39f0c69..4103c92 100644
--- a/setup.py
+++ b/setup.py
@@ -35,7 +35,7 @@
 
 requires = []
 
-install_requires = ["numpy", "sklearn", "pyyaml", "pyzmq", "pyserial", "h5py", "tqdm"]
+install_requires = ["numpy", "scikit-learn", "pyyaml", "pyzmq", "pyserial", "h5py", "tqdm"]
 
 console_scripts = ["pymepix-acq=pymepix.main:main"]
 

From e2b47d04532c5f80c30ebbf07deaa1969e82b9be Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20R=C3=B6hling?= <tobias.roehling@cfel.de>
Date: Mon, 27 Sep 2021 14:35:34 +0200
Subject: [PATCH 26/29] Fix import in test file

---
 tests/test_sophyconfig_pytest.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_sophyconfig_pytest.py b/tests/test_sophyconfig_pytest.py
index fd6076c..e7d066f 100644
--- a/tests/test_sophyconfig_pytest.py
+++ b/tests/test_sophyconfig_pytest.py
@@ -3,7 +3,7 @@
 
 import numpy as np
 
-from pymepix import Pymepix
+from pymepix.pymepix_connection import PymepixConnection
 from pymepix.config.sophyconfig import SophyConfig
 from pymepix.SPIDR.spidrcmds import SpidrCmds
 from pymepix.timepixdef import DacRegisterCodes
@@ -173,7 +173,7 @@ def test_send_config():
     server_thread.start()
     ip, port = server.server_address
 
-    tpx = Pymepix((ip, port))
+    tpx = PymepixConnection((ip, port))
     tpx[0].loadConfig(CONFIG_PATH)
 
     shutdown_event.set()

From e2311dc99a381409e7492dd96852edd19bd3851b Mon Sep 17 00:00:00 2001
From: BenMoon <BenMoon@users.noreply.github.com>
Date: Wed, 29 Sep 2021 10:33:17 +0200
Subject: [PATCH 27/29] Typo

---
 doc/users/pymepixacq.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/users/pymepixacq.rst b/doc/users/pymepixacq.rst
index fdd0a07..89dae4c 100644
--- a/doc/users/pymepixacq.rst
+++ b/doc/users/pymepixacq.rst
@@ -22,10 +22,10 @@ Outputs the help::
     positional arguments:
         {connect,post-process}
          connect             Connect to TimePix camera and acquire data.
-         post-process        Perform post-processing with a acquired raw data file.
+         post-process        Perform post-processing for an acquired raw data file.
 
     optional arguments:
         -h, --help            show this help message and exit
 
 
-You can access the documentation for both commands by executing "pymepix-acq connect -h" or "pymepix-acq post-process -h" respectively.
\ No newline at end of file
+You can access the documentation for both commands by executing "pymepix-acq connect -h" or "pymepix-acq post-process -h" respectively.

From 52e1aa4d7c688a55855467d8006791465173ad4e Mon Sep 17 00:00:00 2001
From: BenMoon <BenMoon@users.noreply.github.com>
Date: Wed, 29 Sep 2021 13:05:30 +0200
Subject: [PATCH 28/29] Fix typo

---
 pymepix/processing/pipeline_packet_processor.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pymepix/processing/pipeline_packet_processor.py b/pymepix/processing/pipeline_packet_processor.py
index 8f75e84..3368119 100644
--- a/pymepix/processing/pipeline_packet_processor.py
+++ b/pymepix/processing/pipeline_packet_processor.py
@@ -28,7 +28,7 @@
 from .logic.packet_processor import PacketProcessor
 
 class PipelinePacketProcessor(BasePipelineObject):
-    """Processes Pixel packets for ToA, ToT,triggers and events
+    """Processes Pixel packets for ToA, ToT, triggers and events
 
     This class, creates a UDP socket connection to SPIDR and recivies the UDP packets from Timepix
     It then pre-processes them and sends them off for more processing
@@ -79,4 +79,4 @@ def process(self, data_type=None, data=None):
 
             if event_data is not None:
                 return MessageType.EventData, event_data
-        return None, None
\ No newline at end of file
+        return None, None

From 8818bce0ef11de8d033ea0315b5b94f485083b0c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20R=C3=B6hling?= <tobias.roehling@cfel.de>
Date: Thu, 30 Sep 2021 16:05:29 +0200
Subject: [PATCH 29/29] Include changes suggested in pull-request

---
 pymepix/main.py                               | 24 ------
 pymepix/post_processing.py                    |  2 +-
 pymepix/processing/acquisition.py             |  9 ++-
 pymepix/processing/baseacquisition.py         |  4 +-
 .../processing/logic/centroid_calculator.py   | 73 +++++++++++++++++--
 pymepix/processing/logic/packet_processor.py  | 44 +++++++++++
 .../processing/logic/processing_parameter.py  | 20 +++++
 pymepix/processing/logic/processing_step.py   | 31 +++++++-
 .../logic/shared_processing_parameter.py      | 21 +++++-
 .../pipeline_centroid_calculator.py           |  2 +-
 .../processing/pipeline_packet_processor.py   |  3 +-
 pymepix/processing/rawfilesampler.py          | 45 ++++++------
 requirements.txt                              |  1 +
 tests/test_centroid_calculator.py             | 56 ++++++--------
 tests/test_post_processing.py                 | 11 ++-
 15 files changed, 245 insertions(+), 101 deletions(-)

diff --git a/pymepix/main.py b/pymepix/main.py
index b714967..6f690ec 100644
--- a/pymepix/main.py
+++ b/pymepix/main.py
@@ -28,9 +28,7 @@
 import pymepix.config.load_config as cfg
 from pymepix.post_processing import run_post_processing
 
-from pymepix.processing.datatypes import MessageType
 from pymepix.pymepix_connection import PollBufferEmpty, PymepixConnection
-from pymepix.util.storage import open_output_file, store_raw, store_toa, store_tof
 
 logging.basicConfig(
     level=logging.INFO,
@@ -58,19 +56,6 @@ def connect_timepix(args):
     # Set the bias voltage
     pymepix.biasVoltage = args.bias
 
-    ext = "raw"
-    if args.decode:
-        logging.info("Decoding data enabled")
-        if args.tof:
-            logging.info("Tof calculation enabled")
-            ext = "tof"
-        else:
-            ext = "toa"
-    else:
-        logging.info("No decoding selected")
-
-    output_file = open_output_file(args.output, ext)
-
     total_time = args.time
 
     # self._timepix._spidr.resetTimers()
@@ -93,15 +78,6 @@ def connect_timepix(args):
         except PollBufferEmpty:
             continue
         logging.debug("Datatype: {} Data:{}".format(data_type, data))
-        if data_type is MessageType.RawData:
-            if not args.decode:
-                store_raw(output_file, data)
-        elif data_type is MessageType.PixelData:
-            if args.decode and not args.tof:
-                store_toa(output_file, data)
-        elif data_type is MessageType.PixelData:
-            if args.decode and args.tof:
-                store_tof(output_file, data)
 
     pymepix.stop()
 
diff --git a/pymepix/post_processing.py b/pymepix/post_processing.py
index 1901ac8..42662ba 100644
--- a/pymepix/post_processing.py
+++ b/pymepix/post_processing.py
@@ -11,4 +11,4 @@ def update_to(self, progress):
 def run_post_processing(input_file_name, output_file, number_processes, timewalk_file, cent_timewalk_file):
     with ProgressBar(total=1.0, dynamic_ncols=True) as progress_bar:
         file_sampler = RawFileSampler(input_file_name, output_file, number_processes, timewalk_file, cent_timewalk_file, progress_bar.update_to)
-        file_sampler.run()
\ No newline at end of file
+        file_sampler.run()
diff --git a/pymepix/processing/acquisition.py b/pymepix/processing/acquisition.py
index 7299d02..0a95529 100644
--- a/pymepix/processing/acquisition.py
+++ b/pymepix/processing/acquisition.py
@@ -35,10 +35,13 @@ class PixelPipeline(AcquisitionPipeline):
     This class can be used as a base for all acqusition pipelines.
     """
 
-    def __init__(self, data_queue, address, longtime, use_event=False, name="Pixel"):
+    def __init__(self, data_queue, address, longtime, use_event=False, name="Pixel", event_window=(0, 1E-3)):
+        """ 
+        Parameters:
+        use_event (boolean): If packets are forwarded to the centroiding. If True centroids are calculated."""
         AcquisitionPipeline.__init__(self, name, data_queue)
         self.info("Initializing Pixel pipeline")
-        self.packet_processor = PacketProcessor(handle_events=use_event, event_window=(0, 1E-3))
+        self.packet_processor = PacketProcessor(handle_events=use_event, event_window=event_window)
 
         self.addStage(0, UdpSampler, address, longtime)
         self.addStage(2, PipelinePacketProcessor, num_processes=2)
@@ -56,8 +59,6 @@ class CentroidPipeline(PixelPipeline):
 
     Same as the pixel pipeline but also includes centroid processing, note that this can be extremely slow
     when dealing with a huge number of objects
-
-
     """
 
     def __init__(self, data_queue, address, longtime):
diff --git a/pymepix/processing/baseacquisition.py b/pymepix/processing/baseacquisition.py
index c8a4106..46d64ce 100644
--- a/pymepix/processing/baseacquisition.py
+++ b/pymepix/processing/baseacquisition.py
@@ -321,9 +321,9 @@ def main():
 
     acqpipline = AcquisitionPipeline("Test", end_queue)
 
-    test_value = Value("I", 0)
+    longtime = Value("I", 0)
 
-    acqpipline.addStage(0, UdpSampler, ("127.0.0.1", 50000), test_value)
+    acqpipline.addStage(0, UdpSampler, ("127.0.0.1", 50000), longtime)
     acqpipline.addStage(2, PipelinePacketProcessor, num_processes=4)
 
     def get_queue_thread(queue):
diff --git a/pymepix/processing/logic/centroid_calculator.py b/pymepix/processing/logic/centroid_calculator.py
index 07d8739..aa6a3e4 100644
--- a/pymepix/processing/logic/centroid_calculator.py
+++ b/pymepix/processing/logic/centroid_calculator.py
@@ -1,3 +1,22 @@
+# This file is part of Pymepix
+#
+# In all scientific work using Pymepix, please reference it as
+#
+# A. F. Al-Refaie, M. Johny, J. Correa, D. Pennicard, P. Svihra, A. Nomerotski, S. Trippel, and J. Küpper:
+# "PymePix: a python library for SPIDR readout of Timepix3", J. Inst. 14, P10003 (2019)
+# https://doi.org/10.1088/1748-0221/14/10/P10003
+# https://arxiv.org/abs/1905.07999
+#
+# Pymepix is free software: you can redistribute it and/or modify it under the terms of the GNU
+# General Public License as published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with this program. If not,
+# see <https://www.gnu.org/licenses/>.
 import multiprocessing as mp
 
 import numpy as np
@@ -8,6 +27,35 @@
 from pymepix.processing.logic.processing_step import ProcessingStep
 
 class CentroidCalculator(ProcessingStep):
+    """
+    Class responsible for calculating centroids in timepix data. This includes the calculation
+    of the clusters first and the centroids. The data processed is not the direct raw data but the 
+    data that has been processed by the PacketProcessor before (x, y, tof, tot).
+
+    Attributes
+    ----------
+    tot_threshold : int
+        Threshold applied before calculating the clusters. A higher threshold can lead to more clusters found in some cases.
+    epsilon : float
+        see DBSCAN
+    min_samples : int
+        see DBSCAN
+    triggers_processed : int
+        every triggers_processed trigger is used for the calculation. Increasing the value can speed up online processing if the 
+        data rate is too high to process all triggers directly.
+    chunk_size_limit : int
+        Maximum size of the chunks to increase the performance of DBSCAN. Higher and Lower values might increase the runtime.
+    cent_timewalk_lut
+        Data for correction of the time-walk
+    parameter_wrapper_classe : ProcessingParameter
+        Class used to wrap the processing parameters to make them changable while processing is running (useful for online optimization)
+
+    Methods
+    -------
+    process(data):
+        Process data and return the result. To use this class only this method should be used! Use the other methods only for testing or 
+        if you are sure about what you are doing
+    """
 
     def __init__(self, tot_threshold=0, epsilon=2, min_samples=3, triggers_processed=1, chunk_size_limit=6_500, 
         cent_timewalk_lut=None, parameter_wrapper_class=ProcessingParameter):
@@ -77,7 +125,8 @@ def __skip_triggers(self, shot, x, y, tof, tot):
         return shot[mask], x[mask], y[mask], tof[mask], tot[mask]
 
     def __divide_into_chunks(self, shot, x, y, tof, tot):
-        # Reordering the voxels can have an impact on the clusterings result. See CentroidCalculator.perform_clustering string doc for further information!
+        """ Reordering the voxels can have an impact on the clusterings result. See CentroidCalculator.perform_clustering 
+        string doc for further information! """
         order = shot.argsort()
         shot, x, y, tof, tot = shot[order], x[order], y[order], tof[order], tot[order]
         split_indices = self.__calc_trig_chunks_split_indices(shot)
@@ -107,13 +156,14 @@ def __calc_trig_chunks_split_indices(self, shot):
         return trigger_chunks
 
     def __centroid_chunks_to_centroids(self, chunks):
-        centroids = [[] for i in range(7)]
+        # range(7) because the centroids have 7 dimensions: shot, x, y, tof, tot avg, tot max, cluster size
+        """centroids = [[] for i in range(7)]
         for chunk in list(chunks):
             if chunk != None:
                 for index, coordinate in enumerate(chunk):
-                    centroids[index].append(coordinate)
+                    centroids[index].append(coordinate)"""
 
-        return [np.concatenate(coordinate) for coordinate in centroids]
+        return np.concatenate(list(chunks), axis=1)
 
     def perform_centroiding(self, chunks):
         return map(self.calculate_centroids, chunks)
@@ -146,11 +196,14 @@ def calculate_centroids(self, chunk):
         return None
         
     def perform_clustering(self, shot, x, y, tof):
-        """ The clustering with DBSCAN, which is performed in this function is dependent on the order of the data in rare cases. Therefore reordering in any means can
+        """ The clustering with DBSCAN, which is performed in this function is dependent on the 
+            order of the data in rare cases. Therefore reordering in any means can
             lead to slightly changed results, which should not be an issue.
 
-            Martin Ester, Hans-Peter Kriegel, Jiirg Sander, Xiaowei Xu: A Density Based Algorith for Discovering Clusters [p. 229-230] (https://www.aaai.org/Papers/KDD/1996/KDD96-037.pdf)
-            A more specific explaination can be found here: https://stats.stackexchange.com/questions/306829/why-is-dbscan-deterministic"""
+            Martin Ester, Hans-Peter Kriegel, Jiirg Sander, Xiaowei Xu: A Density Based Algorith for 
+            Discovering Clusters [p. 229-230] (https://www.aaai.org/Papers/KDD/1996/KDD96-037.pdf)
+            A more specific explaination can be found here: 
+            https://stats.stackexchange.com/questions/306829/why-is-dbscan-deterministic"""
         if x.size >= 0:
             X = np.column_stack((shot * self.epsilon * 1_000, x, y, tof * self._tof_scale))
 
@@ -213,6 +266,10 @@ def calculate_centroids_properties(self, shot, x, y, tof, tot, labels):
 
 class CentroidCalculatorPooled(CentroidCalculator):
 
+    """
+    Parallelized implementation of CentroidCalculator using mp.Pool for parallelization.
+    """
+
     def __init__(self, number_of_processes=None, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self._number_of_processes = number_of_processes
@@ -232,4 +289,4 @@ def post_process(self):
     def __getstate__(self):
         self_dict = self.__dict__.copy()
         del self_dict['_pool']
-        return self_dict
\ No newline at end of file
+        return self_dict
diff --git a/pymepix/processing/logic/packet_processor.py b/pymepix/processing/logic/packet_processor.py
index b638cc4..b7167f2 100644
--- a/pymepix/processing/logic/packet_processor.py
+++ b/pymepix/processing/logic/packet_processor.py
@@ -1,3 +1,22 @@
+# This file is part of Pymepix
+#
+# In all scientific work using Pymepix, please reference it as
+#
+# A. F. Al-Refaie, M. Johny, J. Correa, D. Pennicard, P. Svihra, A. Nomerotski, S. Trippel, and J. Küpper:
+# "PymePix: a python library for SPIDR readout of Timepix3", J. Inst. 14, P10003 (2019)
+# https://doi.org/10.1088/1748-0221/14/10/P10003
+# https://arxiv.org/abs/1905.07999
+#
+# Pymepix is free software: you can redistribute it and/or modify it under the terms of the GNU
+# General Public License as published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with this program. If not,
+# see <https://www.gnu.org/licenses/>.
 from enum import IntEnum
 from multiprocessing import Value
 from ctypes import c_bool
@@ -23,6 +42,31 @@ class PixelOrientation(IntEnum):
 
 
 class PacketProcessor(ProcessingStep):
+    """ Class reposnsible to transform the raw data coming from the timepix directly into an easier 
+    processible data format. Takes into account the pixel- and trigger data to calculate toa and tof
+    dimensions.
+
+    Attributes
+    ----------
+    handle_events : boolean
+        Calculate events (tof) only if handle_events is True. Otherwise only pixel-data (toa only) is provided.
+    event_window : (float, float)
+        The range of tof, used for processing data. Information/ data outside of this range is discarded.
+    min_samples : (float, float)
+        Offset/ shift of x- and y-position
+    orientation : int
+    start_time : int
+    timewalk_lut
+        Data for correction of the time-walk
+    parameter_wrapper_classe : ProcessingParameter
+        Class used to wrap the processing parameters to make them changable while processing is running (useful for online optimization)
+
+    Methods
+    -------
+    process(data):
+        Process data and return the result. To use this class only this method should be used! Use the other methods only for testing or 
+        if you are sure about what you are doing
+    """
     def __init__(self, handle_events=True, event_window=(0.0, 10000.0), position_offset=(0, 0), 
                 orientation=PixelOrientation.Up, start_time=0, timewalk_lut=None, parameter_wrapper_class=ProcessingParameter):
     
diff --git a/pymepix/processing/logic/processing_parameter.py b/pymepix/processing/logic/processing_parameter.py
index 7c8541c..575cd83 100644
--- a/pymepix/processing/logic/processing_parameter.py
+++ b/pymepix/processing/logic/processing_parameter.py
@@ -1,3 +1,23 @@
+# This file is part of Pymepix
+#
+# In all scientific work using Pymepix, please reference it as
+#
+# A. F. Al-Refaie, M. Johny, J. Correa, D. Pennicard, P. Svihra, A. Nomerotski, S. Trippel, and J. Küpper:
+# "PymePix: a python library for SPIDR readout of Timepix3", J. Inst. 14, P10003 (2019)
+# https://doi.org/10.1088/1748-0221/14/10/P10003
+# https://arxiv.org/abs/1905.07999
+#
+# Pymepix is free software: you can redistribute it and/or modify it under the terms of the GNU
+# General Public License as published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with this program. If not,
+# see <https://www.gnu.org/licenses/>.
+
 class ProcessingParameter:
 
     def __init__(self, value) :
diff --git a/pymepix/processing/logic/processing_step.py b/pymepix/processing/logic/processing_step.py
index 10414c1..9038da9 100644
--- a/pymepix/processing/logic/processing_step.py
+++ b/pymepix/processing/logic/processing_step.py
@@ -1,8 +1,37 @@
+# This file is part of Pymepix
+#
+# In all scientific work using Pymepix, please reference it as
+#
+# A. F. Al-Refaie, M. Johny, J. Correa, D. Pennicard, P. Svihra, A. Nomerotski, S. Trippel, and J. Küpper:
+# "PymePix: a python library for SPIDR readout of Timepix3", J. Inst. 14, P10003 (2019)
+# https://doi.org/10.1088/1748-0221/14/10/P10003
+# https://arxiv.org/abs/1905.07999
+#
+# Pymepix is free software: you can redistribute it and/or modify it under the terms of the GNU
+# General Public License as published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with this program. If not,
+# see <https://www.gnu.org/licenses/>.
 from abc import abstractmethod, ABC
 
 from pymepix.core.log import Logger
 
 class ProcessingStep(Logger, ABC):
+    """Representation of one processing step in the pipeline for processing timepix raw data. 
+    Implementations are provided by PacketProcessor and CentroidCalculator. To combine those (and possibly other) classes
+    into a pipeline they have to implement this interface.
+    Also provides pre- and post-process implementations which are required for integration in the online processing pipeline 
+    (see PipelineCentroidCalculator and PipelinePacketProcessor).
+    
+    Currently the picture is the following:
+     - For post processing the CentroidCalculator and the PacketProcessor are used directly
+     - PipelineCentroidCalculator and PipelinePacketProcessor build on top of CentroidCalculator and PacketProcessor
+     to provide an integration in the existing online processing pipeline for online analysis."""
 
     def __init__(self, name):
         super().__init__(name)
@@ -15,4 +44,4 @@ def post_process(self):
 
     @abstractmethod
     def process(self, data):
-        pass
\ No newline at end of file
+        pass
diff --git a/pymepix/processing/logic/shared_processing_parameter.py b/pymepix/processing/logic/shared_processing_parameter.py
index 6ea21c8..171e5fb 100644
--- a/pymepix/processing/logic/shared_processing_parameter.py
+++ b/pymepix/processing/logic/shared_processing_parameter.py
@@ -1,3 +1,22 @@
+# This file is part of Pymepix
+#
+# In all scientific work using Pymepix, please reference it as
+#
+# A. F. Al-Refaie, M. Johny, J. Correa, D. Pennicard, P. Svihra, A. Nomerotski, S. Trippel, and J. Küpper:
+# "PymePix: a python library for SPIDR readout of Timepix3", J. Inst. 14, P10003 (2019)
+# https://doi.org/10.1088/1748-0221/14/10/P10003
+# https://arxiv.org/abs/1905.07999
+#
+# Pymepix is free software: you can redistribute it and/or modify it under the terms of the GNU
+# General Public License as published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with this program. If not,
+# see <https://www.gnu.org/licenses/>.
 from multiprocessing import Value
 
 from pymepix.processing.logic.processing_parameter import ProcessingParameter
@@ -21,4 +40,4 @@ def value(self):
 
     @value.setter
     def value(self, value):
-        self._value.value = value
\ No newline at end of file
+        self._value.value = value
diff --git a/pymepix/processing/pipeline_centroid_calculator.py b/pymepix/processing/pipeline_centroid_calculator.py
index 0086282..cb401d9 100644
--- a/pymepix/processing/pipeline_centroid_calculator.py
+++ b/pymepix/processing/pipeline_centroid_calculator.py
@@ -50,4 +50,4 @@ def process(self, data_type=None, data=None):
         if data_type == MessageType.EventData:
             return MessageType.CentroidData, self.centroid_calculator.process(data)
 
-        return None, None
\ No newline at end of file
+        return None, None
diff --git a/pymepix/processing/pipeline_packet_processor.py b/pymepix/processing/pipeline_packet_processor.py
index 8f75e84..f902588 100644
--- a/pymepix/processing/pipeline_packet_processor.py
+++ b/pymepix/processing/pipeline_packet_processor.py
@@ -54,7 +54,6 @@ def __init__(
         self.packet_processor = packet_processor
 
     def init_new_process(self):
-        """create connections and initialize variables in new process"""
         self.debug("create ZMQ socket")
         ctx = zmq.Context.instance()
         self._packet_sock = ctx.socket(zmq.PULL)
@@ -79,4 +78,4 @@ def process(self, data_type=None, data=None):
 
             if event_data is not None:
                 return MessageType.EventData, event_data
-        return None, None
\ No newline at end of file
+        return None, None
diff --git a/pymepix/processing/rawfilesampler.py b/pymepix/processing/rawfilesampler.py
index 00efad1..f222555 100644
--- a/pymepix/processing/rawfilesampler.py
+++ b/pymepix/processing/rawfilesampler.py
@@ -196,30 +196,31 @@ def saveToHDF5(self, output_file, raw, clusters, timeStamps):
 
                 ###############
                 # save raw data
-                names = ["trigger nr", "x", "y", "tof", "tot"]
-                if f.keys().__contains__("raw"):
-                    for i, key in enumerate(names):
-                        dset = f["raw"][key]
-                        dset.resize(dset.shape[0] + len(raw[i]), axis=0)
-                        dset[-len(raw[i]) :] = raw[i]
-                else:
-                    grp = f.create_group("raw")
-                    grp.attrs["description"] = "timewalk correted raw events"
-                    grp.attrs["nr events"] = 0
-                    grp.create_dataset("trigger nr", data=raw[0].astype(np.uint64), maxshape=(None,))
-                    grp.create_dataset("x", data=raw[1].astype(np.uint8), maxshape=(None,))
-                    grp.create_dataset("y", data=raw[2].astype(np.uint8), maxshape=(None,))
-                    grp.create_dataset("tof", data=raw[3], maxshape=(None,))
-                    grp.create_dataset("tot", data=raw[4].astype(np.uint32), maxshape=(None,))
-
-                    f["raw/tof"].attrs["unit"] = "s"
-                    f["raw/tot"].attrs["unit"] = "s"
-                    f["raw/x"].attrs["unit"] = "pixel"
-                    f["raw/y"].attrs["unit"] = "pixel"
+                if raw is not None:
+                    names = ["trigger nr", "x", "y", "tof", "tot"]
+                    if f.keys().__contains__("raw"):
+                        for i, key in enumerate(names):
+                            dset = f["raw"][key]
+                            dset.resize(dset.shape[0] + len(raw[i]), axis=0)
+                            dset[-len(raw[i]) :] = raw[i]
+                    else:
+                        grp = f.create_group("raw")
+                        grp.attrs["description"] = "timewalk correted raw events"
+                        grp.attrs["nr events"] = 0
+                        grp.create_dataset("trigger nr", data=raw[0].astype(np.uint64), maxshape=(None,))
+                        grp.create_dataset("x", data=raw[1].astype(np.uint8), maxshape=(None,))
+                        grp.create_dataset("y", data=raw[2].astype(np.uint8), maxshape=(None,))
+                        grp.create_dataset("tof", data=raw[3], maxshape=(None,))
+                        grp.create_dataset("tot", data=raw[4].astype(np.uint32), maxshape=(None,))
+
+                        f["raw/tof"].attrs["unit"] = "s"
+                        f["raw/tot"].attrs["unit"] = "s"
+                        f["raw/x"].attrs["unit"] = "pixel"
+                        f["raw/y"].attrs["unit"] = "pixel"
 
                 ###############
                 # save time stamp data                
-                if self._startTime is not None:
+                if timeStamps is not None and self._startTime is not None:
                     names = ["trigger nr", "timestamp"]
                     if f.keys().__contains__("timing/timepix"):
                         for i, key in enumerate(names):
@@ -269,4 +270,4 @@ def run(self):
         if len(self._packet_buffer) > 0:
             self.push_data()
         
-        self.post_run()
\ No newline at end of file
+        self.post_run()
diff --git a/requirements.txt b/requirements.txt
index f18a322..8d1adce 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
+# This is only used for github actions. 
 numpy
 scikit-learn
 pyzmq
diff --git a/tests/test_centroid_calculator.py b/tests/test_centroid_calculator.py
index 83e5f9a..d7da62e 100644
--- a/tests/test_centroid_calculator.py
+++ b/tests/test_centroid_calculator.py
@@ -2,6 +2,13 @@
 
 from pymepix.processing.logic.centroid_calculator import CentroidCalculator
 
+"""
+The purpose of this test is the validation of the implemented calculation of centroids. The implemented 
+centroiding consists of slightly more functionality than DBSCAN + calculation of centroids. In addition 
+the data is split into chunks to optimise the performance of DBSCAN. The following tests have the purpose to
+verify this splitting procedure.
+"""
+
 def test_calculate_centroid_properties_1():
     centroid_calculator = CentroidCalculator()
     shot = np.array([1, 1, 1, 1, 1])
@@ -45,11 +52,7 @@ def test_divide_into_chunks_1():
 
     data = [3, 1, 2, 4, 5, 6]
 
-    shot = np.repeat(data, factor)
-    x = np.repeat(data, factor)
-    y = np.repeat(data, factor)
-    tof = np.repeat(data, factor)
-    tot = np.repeat(data, factor)
+    shot, x, y, tof, tot = __create_timepix_data(data, factor)
 
     chunks = centroid_calculator._CentroidCalculator__divide_into_chunks(shot, x, y, tof, tot)
 
@@ -61,12 +64,7 @@ def test_divide_into_chunks_2():
     centroid_calculator = CentroidCalculator()
 
     factor = 1
-
-    shot = np.repeat([1, 2], factor)
-    x = np.repeat([1, 2], factor)
-    y = np.repeat([1, 2], factor)
-    tof = np.repeat([1, 2], factor)
-    tot = np.repeat([1, 2], factor)
+    shot, x, y, tof, tot = __create_timepix_data([1, 2], factor)
 
     np.testing.assert_array_equal([1, 2],
         centroid_calculator._CentroidCalculator__divide_into_chunks(shot, x, y, tof, tot)[0][1])
@@ -75,12 +73,7 @@ def test_divide_into_chunks_3():
     centroid_calculator = CentroidCalculator()
 
     factor = 1
-
-    shot = np.repeat([1, 2], factor)
-    x = np.repeat([1, 2], factor)
-    y = np.repeat([1, 2], factor)
-    tof = np.repeat([1, 2], factor)
-    tot = np.repeat([1, 2], factor)
+    shot, x, y, tof, tot = __create_timepix_data([1, 2], factor)
 
     chunks = centroid_calculator._CentroidCalculator__divide_into_chunks(shot, x, y, tof, tot)
     sum = 0
@@ -99,11 +92,7 @@ def test_divide_into_chunks_4():
 
     data = [3, 1, 2, 4, 5, 6, 4, 4, 4]
 
-    shot = np.repeat(data, factor)
-    x = np.repeat(data, factor)
-    y = np.repeat(data, factor)
-    tof = np.repeat(data, factor)
-    tot = np.repeat(data, factor)
+    shot, x, y, tof, tot = __create_timepix_data(data, factor)
 
     chunks = centroid_calculator._CentroidCalculator__divide_into_chunks(shot, x, y, tof, tot)
     sum = 0
@@ -122,11 +111,7 @@ def test_divide_into_chunks_5():
 
     data = [3, 1, 2, 4, 5, 6, 1, 1]
 
-    shot = np.repeat(data, factor)
-    x = np.repeat(data, factor)
-    y = np.repeat(data, factor)
-    tof = np.repeat(data, factor)
-    tot = np.repeat(data, factor)
+    shot, x, y, tof, tot = __create_timepix_data(data, factor)
 
     chunks = centroid_calculator._CentroidCalculator__divide_into_chunks(shot, x, y, tof, tot)
     sum = 0
@@ -144,12 +129,7 @@ def test_divide_into_chunks_6():
     factor = 1
 
     data = range(0, 10_000)
-
-    shot = np.repeat(data, factor)
-    x = np.repeat(data, factor)
-    y = np.repeat(data, factor)
-    tof = np.repeat(data, factor)
-    tot = np.repeat(data, factor)
+    shot, x, y, tof, tot = __create_timepix_data(data, factor)
 
     chunks = centroid_calculator._CentroidCalculator__divide_into_chunks(shot, x, y, tof, tot)
     sum = 0
@@ -174,4 +154,12 @@ def test_process():
 
 def assertCentroidsEqual(expected, actual):
     for i in range(len(expected)):
-        np.testing.assert_array_equal(expected[i], actual[i])
\ No newline at end of file
+        np.testing.assert_array_equal(expected[i], actual[i])
+
+def __create_timepix_data(data, factor):
+    shot = np.repeat(data, factor)
+    x = np.repeat(data, factor)
+    y = np.repeat(data, factor)
+    tof = np.repeat(data, factor)
+    tot = np.repeat(data, factor)
+    return shot, x, y, tof, tot
diff --git a/tests/test_post_processing.py b/tests/test_post_processing.py
index 2e168c9..bc389ae 100644
--- a/tests/test_post_processing.py
+++ b/tests/test_post_processing.py
@@ -5,6 +5,15 @@
 
 from pymepix.processing.rawfilesampler import RawFileSampler
 
+"""Perform the calculation of centroids (post-processing) for a complete, real-world dataset.
+The result is verified against data processed with the previously, internally used RawConverter. 
+The results of the RawConverter are therefore assumed to be absolute Truth. 
+
+If changes are expected due to adjustments in Clustering or Centroiding, the ground truth data has
+to be replaced or adjusted! If there are issues with this test do not assume the code to be wrong in any case
+as this test is only based on the RawConverters results. Different results are not necessarily bad! But check 
+your results very carefully if they differ!"""
+
 
 def test_converted_hdf5():
     folder_path = pathlib.Path(__file__).parent / "files"
@@ -44,4 +53,4 @@ def assertCentroidsAlmostEqual(expected, actual):
     np.testing.assert_array_almost_equal(expected[2], actual[2], 15)
     np.testing.assert_array_equal(expected[3], actual[3])
     np.testing.assert_array_equal(expected[4], actual[4])
-    np.testing.assert_array_equal(expected[5], actual[5])
\ No newline at end of file
+    np.testing.assert_array_equal(expected[5], actual[5])