feat: customisable duration timer functions (#10)

By default, `run_durations` will use `time.perf_counter`, but this is now customisable to use any timing function, e.g. `time.perf_counter_ns` or `time.monotonic` for specialist use cases.
alubbock · Aug 17, 2024 · e51208a · e51208a
1 parent 07dafa4
commit e51208a
Show file tree

Hide file tree

Showing 3 changed files with 59 additions and 11 deletions.
diff --git a/README.md b/README.md
@@ -79,7 +79,12 @@ results = pd.read_json(basic_bench.outfile, lines=True)
 ```
 
 The above example captures the fields `start_time`, `finish_time`,
-`run_durations` and `function_name`. Microbench can capture many
+`run_durations` (of each function call, in seconds by default), `function_name`,
+`timestamp_tz` (timezone name, see Timezones section of this README), and
+`duration_counter` (the name of the function used to calculate
+durations, see Duration timings section of this README).
+
+Microbench can capture many
 other types of metadata from the environment, resource usage, and
 hardware, which are covered below.
 
@@ -90,20 +95,22 @@ names) to extend functionality. Note that keyword arguments can be supplied
 to the constructor (in this case `some_info=123`) to specify additional
 information to capture. We also specify `iterations=3`, which means that the
 called function with be executed 3 times (the returned result will always
-be from the final run) with timings captured for each run.
+be from the final run) with timings captured for each run. We specify a custom
+duration counter, `time.monotonic` instead of the default `time.perf_counter`
+(see Duration timings section later in this README for explanation).
 This example also specifies the `outfile` option,
 which appends metadata to a file on disk.
 
 ```python
 from microbench import *
-import numpy, pandas
+import numpy, pandas, time
 
 class MyBench(MicroBench, MBFunctionCall, MBPythonVersion, MBHostInfo):
     outfile = '/home/user/my-benchmarks'
     capture_versions = (numpy, pandas)  # Or use MBGlobalPackages/MBInstalledPackages
     env_vars = ('SLURM_ARRAY_TASK_ID', )
 
-benchmark = MyBench(some_info=123, iterations=3)
+benchmark = MyBench(some_info=123, iterations=3, duration_counter=time.monotonic)
 ```
 
 The `env_vars` option from the example above specifies a list of environment
@@ -162,9 +169,16 @@ separate line in the file. The output from the minimal example above for a
 single run will look similar to the following:
 
 ```json
-{"start_time": "2018-08-06T10:28:24.806493+00:00", "finish_time": "2018-08-06T10:28:24.867456+00:00", "run_durations": [0.60857599999999999], "function_name": "my_function"}
+{"start_time": "2018-08-06T10:28:24.806493+00:00", "finish_time": "2018-08-06T10:28:24.867456+00:00", "run_durations": [0.60857599999999999], "function_name": "my_function", "timestamp_tz": "UTC", "duration_counter": "perf_counter"}
 ```
 
+Start and finish times are given as timestamps in ISO-8601 format, in the UTC
+timezone by default (see Timezones section of this README).
+
+Run_durations are given in seconds, captured using the `time.perf_counter`
+function by default, but this can be overridden (see Duration timings section
+of this README).
+
 The simplest way to examine results in detail is to load them into a
 [pandas](https://pandas.pydata.org/) dataframe:
 
@@ -451,11 +465,21 @@ although the latter is a one-off per invocation and typically less than one seco
 Telemetry capture intervals should be kept relatively infrequent (e.g., every minute
 or two, rather than every second) to avoid significant runtime impacts.
 
+### Duration timings
+
+By default, `run_durations` are given in seconds using the `time.perf_counter` function,
+which should be sufficient for most use cases. You can use any function that
+returns a float or integer number for time. Some examples would be `time.perf_counter_ns`
+if you want time in nanoseconds, or `time.monotonic` for a monotonic clock impervious
+to clock adjustments (ideal for very long-running code). Use the `duration_counter=...`
+argument when creating a benchmark suite object, as seen in the Extended examples
+section of this README, above.
+
 ### Timezones
 
 Microbench captures `start_time` and `finish_time` in the UTC timezone by default.
-This can be overriden by passing a `timezone=tz` argument when creating a benchmark
-class, where `tz` is a timezone object (e.g. created using the `pytz` library).
+This can be overriden by passing a `tz=...` argument when creating a benchmark
+class, where the value is a timezone object (e.g. created using the `pytz` library).
 
 ## Feedback
 

diff --git a/microbench/__init__.py b/microbench/__init__.py
@@ -14,6 +14,7 @@
 import subprocess
 import io
 import threading
+import time
 import signal
 import warnings
 try:
@@ -37,7 +38,6 @@
     import pandas
 except ImportError:
     pandas = None
-from .diff import envdiff
 
 
 from ._version import get_versions
@@ -75,7 +75,26 @@ class JSONEncodeWarning(Warning):
 class MicroBench(object):
     def __init__(self, outfile=None, json_encoder=JSONEncoder,
                  tz=timezone.utc, iterations=1,
+                 duration_counter=time.perf_counter,
                  *args, **kwargs):
+        """Benchmark and metadata capture suite.
+
+        Args:
+            outfile (file or buf, optional): Output file or buffer.
+                Defaults to None, which captures data using a StringIO
+                buffer.
+            json_encoder (json.JSONEncoder, optional): JSONEncoder for
+                benchmark results. Defaults to JSONEncoder.
+            tz (timezone, optional): Timezone for start_time and finish_time.
+                Defaults to timezone.utc.
+            iterations (int, optional): Number of iterations to run function.
+                Defaults to 1.
+            duration_counter (_type_, optional): Timer function to use for
+                run_durations. Defaults to time.perf_counter.
+
+        Raises:
+            ValueError: If unknown position arguments are used.
+        """
         if args:
             raise ValueError('Only keyword arguments are allowed')
         self._bm_static = kwargs
@@ -84,12 +103,15 @@ def __init__(self, outfile=None, json_encoder=JSONEncoder,
         elif not hasattr(self, 'outfile'):
             self.outfile = io.StringIO()
         self._json_encoder = json_encoder
+        self._duration_counter = duration_counter
         self.tz = tz
         self.iterations = iterations
 
     def pre_start_triggers(self, bm_data):
         # Store timezone
         bm_data['timestamp_tz'] = str(self.tz)
+        # Store duration counter function name
+        bm_data['duration_counter'] = self._duration_counter.__name__
 
         # Capture environment variables
         if hasattr(self, 'env_vars'):
@@ -144,10 +166,10 @@ def post_finish_triggers(self, bm_data):
                     method(bm_data)
 
     def pre_run_triggers(self, bm_data):
-        bm_data['_run_start'] = datetime.now(self.tz)
+        bm_data['_run_start'] = self._duration_counter()
 
     def post_run_triggers(self, bm_data):
-        bm_data['run_durations'].append(datetime.now(self.tz) - bm_data['_run_start'])
+        bm_data['run_durations'].append(self._duration_counter() - bm_data['_run_start'])
 
     def capture_function_name(self, bm_data):
         bm_data['function_name'] = bm_data['_func'].__name__

diff --git a/microbench/tests/test_base.py b/microbench/tests/test_base.py
@@ -34,6 +34,9 @@ def my_function():
     runtimes = results['finish_time'] - results['start_time']
     assert (runtimes > datetime.timedelta(0)).all()
 
+    assert results['timestamp_tz'][0] == 'UTC'
+    assert results['duration_counter'][0] == 'perf_counter'
+
 
 def test_multi_iterations():
     class MyBench(MicroBench):
@@ -58,7 +61,6 @@ def my_function():
 
     assert len(results['run_durations'][0]) == iterations
     assert all(dur >= 0 for dur in results['run_durations'][0])
-    assert sum(results['run_durations'][0]) <= runtimes[0].total_seconds()
 
 
 def test_capture_global_packages():