Add memoize decorator to improve performance.

The bottleneck for the `make_subplots` function was due to excessive lookups in the `plot_schema`. These lookups are actually pretty computation-intensive so caching these computations can give us a large performance boost. Note that Python 3.2+ has a new [`functools.lru_cache`](https://docs.python.org/3/library/functools.html#functools.lru_cache) which can be used for this. HOWEVER, we support Python 2.7+ and I didn’t see a backport for it. There are numerous `memoize` packages on PyPI, so many that I didn’t want to commit to one. It’s fairly simple to write this and then we don’t need another dependency.
zhenfenghan · Jan 31, 2017 · dd04d95 · dd04d95
1 parent adce1bb
commit dd04d95
Show file tree

Hide file tree

Showing 4 changed files with 146 additions and 2 deletions.
diff --git a/plotly/tests/test_core/test_utils/test_utils.py b/plotly/tests/test_core/test_utils/test_utils.py
@@ -1,10 +1,12 @@
 from __future__ import absolute_import
 
+from inspect import getargspec
 from unittest import TestCase
 
 from requests.compat import json as _json
 
-from plotly.utils import PlotlyJSONEncoder, get_by_path, node_generator
+from plotly.utils import (PlotlyJSONEncoder, get_by_path, memoize,
+                          node_generator)
 
 
 class TestJSONEncoder(TestCase):
@@ -50,3 +52,98 @@ def test_node_generator(self):
         ]
         for i, item in enumerate(node_generator(node0)):
             self.assertEqual(item, expected_node_path_tuples[i])
+
+
+class TestMemoizeDecorator(TestCase):
+
+    # In Python 2.x, globals should be module-scoped. By defining and
+    # instantiating a class, we *access* the global first before attempting
+    # to update a value. I.e., you *cannot* simply mutate the global value
+    # on it's own.
+    class Namespace(object):
+        pass
+
+    def test_memoize(self):
+        name_space = self.Namespace()
+        name_space.call_count = 0
+
+        @memoize()
+        def add(a, b):
+            name_space.call_count += 1
+            return a + b
+
+        tests = [[(1, 1), 2], [(2, 3), 5], [(3, -3), 0]]
+
+        self.assertEqual(name_space.call_count, 0)
+        for i, (inputs, result) in enumerate(tests, 1):
+            for _ in range(10):
+                self.assertEqual(add(*inputs), result)
+                self.assertEqual(name_space.call_count, i)
+
+    def test_memoize_maxsize(self):
+        name_space = self.Namespace()
+        name_space.call_count = 0
+
+        maxsize = 10
+
+        @memoize(maxsize=maxsize)
+        def identity(a):
+            name_space.call_count += 1
+            return a
+
+        # Function hasn't been called yet, we should get *up to* maxsize cache.
+        for i in range(maxsize):
+            self.assertEqual(identity(i), i)
+            self.assertEqual(name_space.call_count, i + 1)
+
+        # Nothing should have been discarded yet. no additional calls.
+        for i in range(maxsize):
+            self.assertEqual(identity(i), i)
+            self.assertEqual(name_space.call_count, maxsize)
+
+        # Make a new call...
+        self.assertEqual(identity(maxsize), maxsize)
+        self.assertEqual(name_space.call_count, maxsize + 1)
+
+        # All but the first call should be remembered.
+        for i in range(1, maxsize + 1):
+            self.assertEqual(identity(i), i)
+            self.assertEqual(name_space.call_count, maxsize + 1)
+
+        # The *initial* call should now be forgotten for each new call.
+        for i in range(maxsize):
+            self.assertEqual(identity(i), i)
+            self.assertEqual(name_space.call_count, maxsize + 1 + i + 1)
+
+    def test_memoize_maxsize_none(self):
+        name_space = self.Namespace()
+        name_space.call_count = 0
+
+        @memoize(maxsize=None)
+        def identity(a):
+            name_space.call_count += 1
+            return a
+
+        # Function hasn't been called yet, we should get *up to* maxsize cache.
+        for i in range(400):
+            self.assertEqual(identity(i), i)
+            self.assertEqual(name_space.call_count, i + 1)
+
+        # Nothing should have been discarded. no additional calls.
+        for i in range(400):
+            self.assertEqual(identity(i), i)
+            self.assertEqual(name_space.call_count, 400)
+
+    def test_memoize_function_info(self):
+        # We use the decorator module to assure that function info is not
+        # overwritten by the decorator.
+
+        @memoize()
+        def foo(a, b, c='see?'):
+            """Foo is foo."""
+            pass
+
+        self.assertEqual(foo.__doc__, 'Foo is foo.')
+        self.assertEqual(foo.__name__, 'foo')
+        self.assertEqual(getargspec(foo).args, ['a', 'b', 'c'])
+        self.assertEqual(getargspec(foo).defaults, ('see?',))
diff --git a/plotly/utils.py b/plotly/utils.py
@@ -12,8 +12,10 @@
 import sys
 import threading
 import decimal
+from collections import deque
 
 import pytz
+from decorator import decorator
 from requests.compat import json as _json
 
 from plotly.optional_imports import get_module
@@ -444,3 +446,47 @@ def set_sharing_and_world_readable(option_set):
             option_set['world_readable'] = True
         else:
             option_set['world_readable'] = False
+
+
+def _default_memoize_key_function(*args, **kwargs):
+    """Factored out in case we want to allow callers to specify this func."""
+    if kwargs:
+        # frozenset is used to ensure hashability
+        return args, frozenset(kwargs.items())
+    else:
+        return args
+
+
+def memoize(maxsize=128):
+    """
+    Memoize a function by its arguments. Note, if the wrapped function returns
+    a mutable result, the caller is responsible for *not* mutating the result
+    as it will mutate the cache itself.
+
+    :param (int|None) maxsize: Limit the number of cached results. This is a
+                               simple way to prevent memory leaks. Setting this
+                               to `None` will remember *all* calls. The 128
+                               number is used for parity with the Python 3.2
+                               `functools.lru_cache` tool.
+
+    """
+    keys = deque()
+    cache = {}
+
+    def _memoize(*all_args, **kwargs):
+        func = all_args[0]
+        args = all_args[1:]
+        key = _default_memoize_key_function(*args, **kwargs)
+
+        if key in keys:
+            return cache[key]
+
+        if maxsize is not None and len(keys) == maxsize:
+            cache.pop(keys.pop())
+
+        result = func(*args, **kwargs)
+        keys.appendleft(key)
+        cache[key] = result
+        return result
+
+    return decorator(_memoize)
diff --git a/setup.py b/setup.py
@@ -45,5 +45,5 @@ def readme():
                 'plotly/matplotlylib/mplexporter',
                 'plotly/matplotlylib/mplexporter/renderers'],
       package_data={'plotly': ['package_data/*']},
-      install_requires=['requests', 'six', 'pytz'],
+      install_requires=['decorator', 'requests', 'six', 'pytz'],
       zip_safe=False)
diff --git a/tox.ini b/tox.ini
@@ -51,6 +51,7 @@ whitelist_externals=
     mkdir
 deps=
     coverage==4.3.1
+    decorator==4.0.9
     mock==2.0.0
     nose==1.3.7
     requests==2.12.4