Added a __main__.py so it can be used as script

MSeifert04 · Dec 4, 2020 · 12b2557 · 12b2557
1 parent b285ebb
commit 12b2557
Show file tree

Hide file tree

Showing 6 changed files with 274 additions and 0 deletions.
diff --git a/README.rst b/README.rst
@@ -90,3 +90,35 @@ To save the plotted benchmark as PNG file::
     >>> plt.savefig('sum_example.png')
 
 .. image:: ./docs/source/sum_example.png
+
+Command-Line interface
+----------------------
+
+.. warning::
+   The command line interface is highly experimental. It's very likely to
+   change its API.
+
+It's an experiment to run it as command-line tool, especially useful if you
+want to run it on multiple files and don't want the boilerplate.
+
+File ``sum.py``::
+
+   import numpy as np
+
+   def bench_sum(l, func=sum):  # <-- function name needs to start with "bench_"
+      return func(l)
+
+   def bench_numpy_sum(l, func=np.sum):  # <-- using func parameter with the actual function helps
+      return np.sum(l)
+
+   def args_list_length():  # <-- function providing the argument starts with "args_"
+      for i in [1, 10, 100, 1000, 10000, 100000]:
+         yield i, [1] * i
+
+Then run::
+
+    $ python -m simple_benchmark sum.py sum.png
+
+With a similar result:
+
+.. image:: ./docs/source/sum_example_cli.png
diff --git a/docs/source/changes.rst b/docs/source/changes.rst
@@ -4,6 +4,9 @@ Changelog
 0.1.0 (not released)
 --------------------
 
+- Added a command-line command for performing benchmarks on carefully constructed
+  Python files (experimental)
+
 - Added the functions ``assert_same_results`` and ``assert_not_mutating_input``.
 
 - The argument ``time_per_benchmark`` of ``benchmark`` and ``BenchmarkBuilder`` now expects

diff --git a/docs/source/command_line.rst b/docs/source/command_line.rst
@@ -0,0 +1,54 @@
+Command Line
+============
+
+Using the Command Line
+----------------------
+
+.. warning::
+   The command line interface is highly experimental. It's very likely to
+   change its API.
+
+When you have all optional dependencies installed you can also run
+``simple_benchmark``, in the most basic form it would be::
+
+    $ python -m simple_benchmark INPUT_FILE OUTPUT_FILE
+
+Which processes the ``INPUT_FILE`` and writes a plot to ``OUTPUT_FILE``.
+
+However in order to work correctly the ``INPUT_FILE`` has to fulfill several
+criteria:
+
+- It must be a valid Python file.
+- All functions that should be benchmarked have to have a name starting with ``bench_``
+  and everything thereafter is used for the label.
+- The function generating the arguments for the benchmark has to start with ``args_``
+  and everything thereafter is used for the label of the x-axis.
+
+Also if the benchmarked function has a ``func`` parameter with a default it
+will be used to determine the ``alias`` (the displayed name in the table and
+plot).
+
+
+Parameters
+----------
+
+The first two parameters are the input and output file. However there are a
+few more parameters. These can be also seen when running::
+
+    $ python -m simple_benchmark -h
+    usage: __main__.py [-h] [-s FIGSIZE] [--time-per-benchmark TIME_PER_BENCHMARK] [-v] [--write-csv] filename out
+
+    Benchmark a file
+
+    positional arguments:
+    filename              the file to run the benchmark on.
+    out                   Specifies the output file for the plot
+
+    optional arguments:
+    -h, --help            show this help message and exit
+    -s FIGSIZE, --figsize FIGSIZE
+                            Specify the output size in inches, needs to be wrapped in quotes on most shells, e.g. "15, 9" (default: 15, 9)
+    --time-per-benchmark TIME_PER_BENCHMARK
+                            The target time for each individual benchmark in seconds (default: 0.1)
+    -v, --verbose         prints additional information on stdout (default: False)
+    --write-csv           Writes an additional CSV file of the results (default: False)
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -64,11 +64,45 @@ Or with ``matplotlib`` (has to be installed too)::
 
 .. image:: ./sum_example.png
 
+Command-Line interface
+----------------------
+
+.. warning::
+   The command line interface is highly experimental. It's very likely to
+   change its API.
+
+It's an experiment to run it as command-line tool, especially useful if you
+want to run it on multiple files and don't want the boilerplate.
+
+File ``sum.py``::
+
+   import numpy as np
+
+   def bench_sum(l, func=sum):
+      return func(l)
+
+   def bench_numpy_sum(l, func=np.sum):
+      return np.sum(l)
+
+   def args_list_length():
+      for i in [1, 10, 100, 1000, 10000, 100000]:
+         yield i, [1]*i
+
+Then run::
+
+    $ python -m simple_benchmark sum.py sum.png
+
+With a similar result ``sum.png``:
+
+.. image:: ./sum_example_cli.png
+
+
 .. toctree::
    :maxdepth: 2
    :caption: Contents:
 
    extended
+   command_line
    api
    changes
    license

diff --git a/docs/source/sum_example_cli.png b/docs/source/sum_example_cli.png
diff --git a/simple_benchmark/__main__.py b/simple_benchmark/__main__.py
@@ -0,0 +1,151 @@
+# Licensed under Apache License Version 2.0 - see LICENSE
+import argparse
+import datetime
+import importlib
+import importlib.util
+import inspect
+import pathlib
+
+import matplotlib.pyplot as plt
+
+from simple_benchmark import BenchmarkBuilder
+
+
+def _startswith_and_remainder(string, prefix):
+    """Returns if the string starts with the prefix and the string without the prefix."""
+    if string.startswith(prefix):
+        return True, string[len(prefix):]
+    else:
+        return False, ''
+
+def _import_file(filename, filepath):
+    """This loads a python module by filepath"""
+    spec = importlib.util.spec_from_file_location(filename, filepath)
+    foo = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(foo)
+    return foo
+
+def _get_version_for_module(module_name):
+    """Imports the module by its name and tries to get the version. Could fail..."""
+    module = importlib.import_module(module_name)
+    return module.__version__
+
+
+def _hacky_parse_sig(function):
+    """Extracts a useable alias by inspecting the function signature."""
+    sig = inspect.signature(function)
+    # Yeah, this looks for a parameter
+    function_parameter = sig.parameters.get('func', None)
+    if function_parameter:
+        benchmarked_function = function_parameter.default
+        if benchmarked_function:
+            # __module__ will likely contain additional submodules. However
+            # only the main module is (probably) of interest.
+            module = benchmarked_function.__module__.split('.')[0]
+            # Not every function has a __name__ attribute. But the rest of the
+            # function is hacky too...
+            name = benchmarked_function.__name__
+            try:
+                return f"{module} {name} ({_get_version_for_module(module)})"
+            except Exception:
+                # Something went wrong while determining the version. That's
+                # okay, just omit it then...
+                return f"{module} {name}"
+
+def main(filename, outfilename, figsize, time_per_benchmark, write_csv, verbose):
+    if verbose:
+        print("Performing a Benchmark using simple_benchmark")
+        print("---------------------------------------------")
+        print("Effective Options:")
+        print(f"Input-File: {filename}")
+        print(f"Output-File: {outfilename}")
+        print(f"Time per individual benchmark: {time_per_benchmark.total_seconds()} seconds")
+        print(f"Figure size (inches): {figsize}")
+        print(f"Verbose: {verbose}")
+
+    path = pathlib.Path(filename).absolute()
+    filename = path.name
+
+    if verbose:
+        print("")
+        print("Process file")
+        print("------------")
+
+    module = _import_file(filename, path)
+
+    b = BenchmarkBuilder(time_per_benchmark)
+
+    for function_name in sorted(dir(module)):
+        function = getattr(module, function_name)
+        is_benchmark, benchmark_name = _startswith_and_remainder(function_name, 'bench_')
+        if is_benchmark:
+            try:
+                alias = _hacky_parse_sig(function)
+            except Exception:
+                pass
+
+            if not alias:
+                alias = benchmark_name
+
+            b.add_function(alias=alias)(function)
+            continue
+
+        is_args, args_name = _startswith_and_remainder(function_name, 'args_')
+        if is_args:
+            b.add_arguments(args_name)(function)
+            continue
+
+    if verbose:
+        print("successful")
+        print("")
+        print("Running Benchmark")
+        print("-----------------")
+        print("this may take a while...")
+
+    r = b.run()
+
+    if verbose:
+        print("successful")
+        print("")
+        print("Benchmark Result")
+        print("----------------")
+        print(r.to_pandas_dataframe())
+
+    plt.figure(figsize=figsize)
+    r.plot()
+    plt.savefig(outfilename)
+
+    out_file_path = pathlib.Path(outfilename)
+
+    if verbose:
+        print("")
+        print(f"Written benchmark plot to {out_file_path.absolute()}")
+
+    if write_csv:
+        csv_file_path = out_file_path.with_suffix('.csv')
+        # wtf ... pandas is using %-formatting ...
+        # well, so %.9f should suppress scientific notation and display 9
+        # decimals (nanosecond-resolution more is probably not useful anyway).
+        r.to_pandas_dataframe().to_csv(str(csv_file_path), float_format='%.9f')
+        print(f"Written CSV to {csv_file_path.absolute()}")
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Benchmark a file', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument('filename', help='the file to run the benchmark on.')
+    parser.add_argument('out', help='Specifies the output file for the plot')
+    parser.add_argument('-s', '--figsize', help='Specify the output size in inches, needs to be wrapped in quotes on most shells, e.g. "15, 9"', default='15, 9')
+    parser.add_argument('--time-per-benchmark', help='The target time for each individual benchmark in seconds', default='0.1')
+    parser.add_argument('-v', '--verbose', help='prints additional information on stdout', action="store_true")
+    parser.add_argument('--write-csv', help='Writes an additional CSV file of the results', action="store_true")
+
+    args = parser.parse_args()
+
+    filename = args.filename
+    outfilename = args.out
+
+    verbose = args.verbose
+    figsize = [int(value) for value in args.figsize.split(',')]
+    time_per_benchmark = datetime.timedelta(seconds=float(args.time_per_benchmark))
+    write_csv = args.write_csv
+    main(filename, outfilename, figsize=figsize, time_per_benchmark=time_per_benchmark, write_csv=write_csv, verbose=verbose)