From 1a65b777b2d2781cea73ebd4d17577337d26515c Mon Sep 17 00:00:00 2001 From: Alex Lubbock Date: Sat, 17 Aug 2024 01:40:22 +0100 Subject: [PATCH] fix: update pandas.read_json syntax Pandas has deprecated read_json(string), so we now use read_json(StringIO) instead. README has been updated with new syntax. --- README.md | 4 ++-- microbench/__init__.py | 2 ++ microbench/tests/test_base.py | 14 +++++++------- microbench/tests/test_line_profiler.py | 3 ++- microbench/tests/test_nvidia.py | 2 +- microbench/tests/test_psutil.py | 2 +- 6 files changed, 15 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index cc49eb3..fa48df4 100644 --- a/README.md +++ b/README.md @@ -75,7 +75,7 @@ into a `io.StringIO()` buffer, which can be read as follows ```python import pandas as pd -results = pd.read_json(basic_bench.outfile.getvalue(), lines=True) +results = pd.read_json(basic_bench.outfile, lines=True) ``` The above example captures the fields `start_time`, `finish_time`, @@ -219,7 +219,7 @@ def my_function(): my_function() # Read the results into a Pandas DataFrame -results = pandas.read_json(lpbench.outfile.getvalue(), lines=True) +results = pandas.read_json(lpbench.outfile, lines=True) # Get the line profiler report as an object lp = MBLineProfiler.decode_line_profile(results['line_profiler'][0]) diff --git a/microbench/__init__.py b/microbench/__init__.py index a131c18..b7bc022 100644 --- a/microbench/__init__.py +++ b/microbench/__init__.py @@ -175,6 +175,8 @@ def output_result(self, bm_data): else: # Assume file-like object self.outfile.write(bm_str) + # Seek to beginning to allow read + self.outfile.seek(0) def __call__(self, func): def inner(*args, **kwargs): diff --git a/microbench/tests/test_base.py b/microbench/tests/test_base.py index 5b8aa7e..f61e24d 100644 --- a/microbench/tests/test_base.py +++ b/microbench/tests/test_base.py @@ -28,7 +28,7 @@ def my_function(): for _ in range(3): assert my_function() == 499999500000 - results = pandas.read_json(benchmark.outfile.getvalue(), lines=True) + results = pandas.read_json(benchmark.outfile, lines=True) assert (results['function_name'] == 'my_function').all() assert results['package_versions'][0]['pandas'] == pandas.__version__ runtimes = results['finish_time'] - results['start_time'] @@ -50,7 +50,7 @@ def my_function(): # call the function my_function() - results = pandas.read_json(benchmark.outfile.getvalue(), lines=True) + results = pandas.read_json(benchmark.outfile, lines=True) assert (results['function_name'] == 'my_function').all() runtimes = results['finish_time'] - results['start_time'] assert (runtimes >= datetime.timedelta(0)).all() @@ -68,7 +68,7 @@ def noop(): noop() - results = pandas.read_json(globals_bench.outfile.getvalue(), lines=True) + results = pandas.read_json(globals_bench.outfile, lines=True) # We should've captured microbench and pandas versions from top level # imports in this file @@ -89,7 +89,7 @@ def noop(): noop() - results = pandas.read_json(pkg_bench.outfile.getvalue(), lines=True) + results = pandas.read_json(pkg_bench.outfile, lines=True) assert pandas.__version__ == results['package_versions'][0]['pandas'] @@ -111,7 +111,7 @@ def noop(): assert not telem_bench._telemetry_thread.is_alive() # Check some telemetry was captured - results = pandas.read_json(telem_bench.outfile.getvalue(), lines=True) + results = pandas.read_json(telem_bench.outfile, lines=True) assert len(results['telemetry']) > 0 @@ -135,7 +135,7 @@ def dummy(arg1, arg2): assert all(issubclass(w_.category, JSONEncodeWarning) for w_ in w) - results = pandas.read_json(bench.outfile.getvalue(), lines=True) + results = pandas.read_json(bench.outfile, lines=True) assert results['args'][0] == [_UNENCODABLE_PLACEHOLDER_VALUE] assert results['kwargs'][0] == {'arg2': _UNENCODABLE_PLACEHOLDER_VALUE} assert results['return_value'][0] == _UNENCODABLE_PLACEHOLDER_VALUE @@ -173,5 +173,5 @@ def dummy(): dummy() - results = pandas.read_json(bench.outfile.getvalue(), lines=True) + results = pandas.read_json(bench.outfile, lines=True) assert results['return_value'][0] == str(obj) diff --git a/microbench/tests/test_line_profiler.py b/microbench/tests/test_line_profiler.py index f21410f..430b211 100644 --- a/microbench/tests/test_line_profiler.py +++ b/microbench/tests/test_line_profiler.py @@ -1,5 +1,6 @@ from microbench import MicroBench, MBLineProfiler import pandas +import io def test_line_profiler(): @@ -20,7 +21,7 @@ def my_function(): for _ in range(3): assert my_function() == 499999500000 - results = pandas.read_json(lpbench.outfile.getvalue(), lines=True) + results = pandas.read_json(lpbench.outfile, lines=True) lp = MBLineProfiler.decode_line_profile(results['line_profiler'][0]) assert lp.__class__.__name__ == 'LineStats' MBLineProfiler.print_line_profile(results['line_profiler'][0]) diff --git a/microbench/tests/test_nvidia.py b/microbench/tests/test_nvidia.py index 000aa3d..44d4b6b 100644 --- a/microbench/tests/test_nvidia.py +++ b/microbench/tests/test_nvidia.py @@ -23,6 +23,6 @@ def test(): test() - results = pandas.read_json(bench.outfile.getvalue(), lines=True) + results = pandas.read_json(bench.outfile, lines=True) assert 'nvidia_gpu_name' in results.columns assert 'nvidia_memory.total' in results.columns diff --git a/microbench/tests/test_psutil.py b/microbench/tests/test_psutil.py index 011167b..30d48c7 100644 --- a/microbench/tests/test_psutil.py +++ b/microbench/tests/test_psutil.py @@ -14,6 +14,6 @@ def test_func(): test_func() - results = pandas.read_json(mybench.outfile.getvalue(), lines=True) + results = pandas.read_json(mybench.outfile, lines=True) assert results['cpu_cores_logical'][0] >= 1 assert results['ram_total'][0] > 0