From 34c7a25fa03503be34f67f3c8bb64da802d342d1 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Wed, 13 Dec 2023 20:10:26 +0000 Subject: [PATCH 01/18] Deactivate CI --- .github/dependabot.yml | 20 -------------------- .github/workflows/build_docs.yml | 8 ++++---- .github/workflows/release.yml | 8 ++++---- .github/workflows/testing.yml | 18 ++++++++++-------- 4 files changed, 18 insertions(+), 36 deletions(-) delete mode 100644 .github/dependabot.yml diff --git a/.github/dependabot.yml b/.github/dependabot.yml deleted file mode 100644 index 57fa59b..0000000 --- a/.github/dependabot.yml +++ /dev/null @@ -1,20 +0,0 @@ -# To get started with Dependabot version updates, you'll need to specify which -# package ecosystems to update and where the package manifests are located. -# Please see the documentation for all configuration options: -# https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates - -version: 2 -updates: - - package-ecosystem: "github-actions" - directory: "/" - schedule: - interval: "daily" - reviewers: - - "jGaboardi" - - - package-ecosystem: "pip" - directory: "/" - schedule: - interval: "daily" - reviewers: - - "jGaboardi" diff --git a/.github/workflows/build_docs.yml b/.github/workflows/build_docs.yml index 4d42cf2..0adc82d 100644 --- a/.github/workflows/build_docs.yml +++ b/.github/workflows/build_docs.yml @@ -2,10 +2,10 @@ name: Build Docs on: - push: - # Sequence of patterns matched against refs/tags - tags: - - 'v*' # Push events to matching v*, i.e. v1.0, v20.15.10 + # push: + # # Sequence of patterns matched against refs/tags + # tags: + # - 'v*' # Push events to matching v*, i.e. v1.0, v20.15.10 workflow_dispatch: inputs: version: diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index b5418e6..f46c616 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -1,10 +1,10 @@ name: Release Package on: - push: - # Sequence of patterns matched against refs/tags - tags: - - 'v*' # Push events to matching v*, i.e. v1.0, v20.15.10 + # push: + # # Sequence of patterns matched against refs/tags + # tags: + # - 'v*' # Push events to matching v*, i.e. v1.0, v20.15.10 workflow_dispatch: inputs: version: diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index 8a7cf60..94c1931 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -1,14 +1,16 @@ name: Continuous Integration on: - push: - branches: - - "*" - pull_request: - branches: - - "*" - schedule: - - cron: "59 21 * * *" + # push: + # branches: + # - "*" + # pull_request: + # branches: + # - "*" + # schedule: + # - cron: "59 21 * * *" + workflow_dispatch: + jobs: testing: From 2a85d9e56b7e2c0d0ab96f1e8ea7a4cfcf730460 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Wed, 13 Dec 2023 20:58:41 +0000 Subject: [PATCH 02/18] Add _fisher_jenks_means_without_numpy and Monkey patch code. --- README.md | 18 +++++ mapclassify/classifiers.py | 136 ++++++++++++++++++++++++++++++++++++- 2 files changed, 152 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 9f73d13..41b660f 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,21 @@ +# mapclassif-Iron: _fisher_jenks_means for IronPython, and other environments without numpy. + +*Warning!* Installing the main branch of this fork normally with pip, will not produce a working library. +An ImportError will be raised on import. Unless your Python finds numpy, pandas and SciKitlearn, in which +case you are most likely running CPython or Jupyter anyway, and therefore you have no need of this fork and +should use mapclassify instead. + +This project is included as a dependency in another project (sDNA_GH) by static linking the code +(copy and pasting) from a branch that has everything that won't work without +numpy, pandas and SciKitlearn etc. removed. + +Only the function, _fisher_jenks_means has been adapted (back to something similar to +Daniel J Lewis's original snippet). + +No other functionality from mapclassify has been changed, and none has been added. + +Parent project's README.md: + # mapclassify: Classification Schemes for Choropleth Maps [![Continuous Integration](https://github.com/pysal/mapclassify/actions/workflows/testing.yml/badge.svg)](https://github.com/pysal/mapclassify/actions/workflows/testing.yml) diff --git a/mapclassify/classifiers.py b/mapclassify/classifiers.py index 27d5188..638926e 100644 --- a/mapclassify/classifiers.py +++ b/mapclassify/classifiers.py @@ -1,14 +1,17 @@ """ A module of classification schemes for choropleth mapping. """ +import sys import copy import functools import warnings -import numpy as np import scipy.stats as stats from sklearn.cluster import KMeans + + + __author__ = "Sergio J. Rey" __all__ = [ @@ -60,6 +63,58 @@ FMT = "{:.2f}" + + + + + +class MockNumpy(object): + + def __init__(self, int_type = None, float_type = None): + if int_type is None or float_type is None: + try: + if sys.implementation.name != 'ironpython': + raise ImportError + import System + except ImportError: + class System: + Int16 = int + Single = float + + self.int32 = int_type or System.Int16 + self.float32 = float_type or System.Single + + @classmethod + def zeros(self, dims, dtype = System.Int16): + + if len(dims) == 1: + zero = dtype(0) + return [zero for __ in range(dims[0])] + + return [cls.zeros(dims[1:], dtype) for __ in range(dims[0])] + + int32 = System.Int16 + float32 = System.Single + inf = System.Single('inf') + + @staticmethod + def delete(arr, index): + return arr[:index] + arr[index+1:] + + +try: + if sys.implementation.name != 'cpython': + raise ImportError + import numpy as np + HAS_NUMPY = True +except ImportError: + HAS_NUMPY = False + + default_mock_numpy = MockNumpy() + + + + try: from numba import njit @@ -581,7 +636,7 @@ def natural_breaks(values, k=5, init=10): @njit("f8[:](f8[:], u2)") -def _fisher_jenks_means(values, classes=5): +def _fisher_jenks_means_numpy(values, classes=5): """ Jenks Optimal (Natural Breaks) algorithm implemented in Python. @@ -637,6 +692,83 @@ def _fisher_jenks_means(values, classes=5): return np.delete(kclass, 0) +def _fisher_jenks_means_without_numpy( + values, + classes=5, + np = default_mock_numpy + ): + """ + As for _fisher_jenks_means_numpy above, to keep the code as far as possible + exactly the same, except with np passable in as a dependency, and with + matrix[i, j] replaced with matrix[i][j] for speed. + + + Jenks Optimal (Natural Breaks) algorithm implemented in Python. + + Notes + ----- + + The original Python code comes from here: + http://danieljlewis.org/2010/06/07/jenks-natural-breaks-algorithm-in-python/ + and is based on a JAVA and Fortran code available here: + https://stat.ethz.ch/pipermail/r-sig-geo/2006-March/000811.html + + + + """ + values.sort() + n_data = len(values) + mat1 = np.zeros((n_data + 1, classes + 1), dtype=np.int32) + mat2 = np.zeros((n_data + 1, classes + 1), dtype=np.float32) + + # System.Array.Fill not suppported on Multi-dimensional arrays + for j in range(1, classes + 1): + mat1[1][j] = 1 + for i in range(2, n_data+1): + mat2[i][j] = np.inf + v = 0 + for _l in range(2, len(values) + 1): + s1 = 0 + s2 = 0 + w = 0 + for m in range(1, _l + 1): + i3 = _l - m + 1 + val = values[i3 - 1] + s2 += val * val + s1 += val + w += 1 + v = s2 - (s1 * s1) / np.float32(w) + i4 = i3 - 1 + if i4 != 0: + for j in range(2, classes + 1): + if mat2[_l][j] >= (v + mat2[i4][j - 1]): + mat1[_l][j] = i3 + mat2[_l][j] = v + mat2[i4][j - 1] + + mat1[_l][1] = 1 + mat2[_l][1] = v + +# for row in mat1: +# print(row) + k = len(values) + + kclass = np.zeros((classes + 1,), dtype=type(values[0])) + kclass[classes] = values[len(values) - 1] + kclass[0] = values[0] + for countNum in range(classes, 1, -1): + pivot = mat1[k][countNum] + _id = int(pivot - 2) + kclass[countNum - 1] = values[_id] + k = int(pivot - 1) + return np.delete(kclass, 0) + + +if HAS_NUMPY: + _fisher_jenks_means = _fisher_jenks_means_numpy +else: + _fisher_jenks_means = _fisher_jenks_means_without_numpy + + class MapClassifier: r""" Abstract class for all map classifications :cite:`Slocum_2009` From 6fac0d217ad6c084e120e768872a7c33dcb785d4 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Wed, 13 Dec 2023 21:07:33 +0000 Subject: [PATCH 03/18] Update README.md --- README.md | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 41b660f..5b863d1 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,7 @@ # mapclassif-Iron: _fisher_jenks_means for IronPython, and other environments without numpy. -*Warning!* Installing the main branch of this fork normally with pip, will not produce a working library. -An ImportError will be raised on import. Unless your Python finds numpy, pandas and SciKitlearn, in which -case you are most likely running CPython or Jupyter anyway, and therefore you have no need of this fork and -should use mapclassify instead. - -This project is included as a dependency in another project (sDNA_GH) by static linking the code -(copy and pasting) from a branch that has everything that won't work without -numpy, pandas and SciKitlearn etc. removed. +This branch does not currently work, but is intended to very soon work with, and provide PRs for mapclassify, and the +resulting installation should work when installed by directing pip to this github repository branch Only the function, _fisher_jenks_means has been adapted (back to something similar to Daniel J Lewis's original snippet). From 270d47565dbd2b271038dc5f79eff54350850199 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Thu, 14 Dec 2023 00:12:09 +0000 Subject: [PATCH 04/18] Monkey patch in _fisher_jenks_means_without_numpy if no Numba (even if Numpy) --- mapclassify/classifiers.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mapclassify/classifiers.py b/mapclassify/classifiers.py index 638926e..269ead0 100644 --- a/mapclassify/classifiers.py +++ b/mapclassify/classifiers.py @@ -110,7 +110,6 @@ def delete(arr, index): except ImportError: HAS_NUMPY = False - default_mock_numpy = MockNumpy() @@ -132,6 +131,9 @@ def wrapper_decorator(*args, **kwargs): return decorator_njit + # Numba requires Numpy + default_mock_numpy = MockNumpy() + def _format_intervals(mc, fmt="{:.0f}"): """ @@ -763,7 +765,7 @@ def _fisher_jenks_means_without_numpy( return np.delete(kclass, 0) -if HAS_NUMPY: +if HAS_NUMBA: _fisher_jenks_means = _fisher_jenks_means_numpy else: _fisher_jenks_means = _fisher_jenks_means_without_numpy From 51a79fba4cc4a810b06ed2000d1f0b6bebc5393b Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Thu, 14 Dec 2023 10:26:36 +0000 Subject: [PATCH 05/18] Only use CPython types. --- mapclassify/classifiers.py | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/mapclassify/classifiers.py b/mapclassify/classifiers.py index 269ead0..d810a28 100644 --- a/mapclassify/classifiers.py +++ b/mapclassify/classifiers.py @@ -71,21 +71,14 @@ class MockNumpy(object): def __init__(self, int_type = None, float_type = None): - if int_type is None or float_type is None: - try: - if sys.implementation.name != 'ironpython': - raise ImportError - import System - except ImportError: - class System: - Int16 = int - Single = float - - self.int32 = int_type or System.Int16 - self.float32 = float_type or System.Single + + self.int32 = int_type or int + self.float32 = float_type or float + + self.inf = self.float32('inf') @classmethod - def zeros(self, dims, dtype = System.Int16): + def zeros(self, dims, dtype = int): if len(dims) == 1: zero = dtype(0) @@ -93,10 +86,7 @@ def zeros(self, dims, dtype = System.Int16): return [cls.zeros(dims[1:], dtype) for __ in range(dims[0])] - int32 = System.Int16 - float32 = System.Single - inf = System.Single('inf') - + @staticmethod def delete(arr, index): return arr[:index] + arr[index+1:] From 8c44ba0edf126b9122306009e9d96012739a87db Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Thu, 14 Dec 2023 10:43:24 +0000 Subject: [PATCH 06/18] FisherJenks now accepts _fisher_jenks_means as an injected dependency. Remove unused HAS_NUMPY check --- mapclassify/classifiers.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/mapclassify/classifiers.py b/mapclassify/classifiers.py index d810a28..293d9db 100644 --- a/mapclassify/classifiers.py +++ b/mapclassify/classifiers.py @@ -8,7 +8,7 @@ import scipy.stats as stats from sklearn.cluster import KMeans - +import numpy as np @@ -92,13 +92,6 @@ def delete(arr, index): return arr[:index] + arr[index+1:] -try: - if sys.implementation.name != 'cpython': - raise ImportError - import numpy as np - HAS_NUMPY = True -except ImportError: - HAS_NUMPY = False @@ -2084,14 +2077,20 @@ class FisherJenks(MapClassifier): """ - def __init__(self, y, k=K): + def __init__(self, y, k=K, _fisher_jenks_means = None): if not HAS_NUMBA: warnings.warn( "Numba not installed. Using slow pure python version.", UserWarning, stacklevel=3, ) + _fisher_jenks_means = _fisher_jenks_means or _fisher_jenks_means_without_numpy + else: + + _fisher_jenks_means = _fisher_jenks_means or _fisher_jenks_means_numpy + self._fisher_jenks_means = _fisher_jenks_means + nu = len(np.unique(y)) if nu < k: raise ValueError( @@ -2101,9 +2100,11 @@ def __init__(self, y, k=K): MapClassifier.__init__(self, y) self.name = "FisherJenks" + + def _set_bins(self): x = np.sort(self.y).astype("f8") - self.bins = _fisher_jenks_means(x, classes=self.k) + self.bins = self._fisher_jenks_means(x, classes=self.k) class FisherJenksSampled(MapClassifier): From b125267d18b0263da811ebb8b10c8794595eea12 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Thu, 14 Dec 2023 12:57:05 +0000 Subject: [PATCH 07/18] Add test --- README.md | 5 +- mapclassify/_classify_API.py | 1 + mapclassify/classifiers.py | 25 ++++----- mapclassify/tests/time_fisher_jenkss.py | 67 +++++++++++++++++++++++++ 4 files changed, 81 insertions(+), 17 deletions(-) create mode 100644 mapclassify/tests/time_fisher_jenkss.py diff --git a/README.md b/README.md index 5b863d1..1b98bc1 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,8 @@ # mapclassif-Iron: _fisher_jenks_means for IronPython, and other environments without numpy. -This branch does not currently work, but is intended to very soon work with, and provide PRs for mapclassify, and the -resulting installation should work when installed by directing pip to this github repository branch +This branch does not currently work, but is intended to very soon work with, when installed by +directing pip to this github repository branch. This branch is primarily for convenient +PRs for mapclassify. Only the function, _fisher_jenks_means has been adapted (back to something similar to Daniel J Lewis's original snippet). diff --git a/mapclassify/_classify_API.py b/mapclassify/_classify_API.py index c0c0fa8..7f1c0a7 100644 --- a/mapclassify/_classify_API.py +++ b/mapclassify/_classify_API.py @@ -24,6 +24,7 @@ "boxplot": BoxPlot, "equalinterval": EqualInterval, "fisherjenks": FisherJenks, + "fisherjenks_nonumpy": FisherJenks, "fisherjenkssampled": FisherJenksSampled, "headtailbreaks": HeadTailBreaks, "jenkscaspall": JenksCaspall, diff --git a/mapclassify/classifiers.py b/mapclassify/classifiers.py index 293d9db..02215da 100644 --- a/mapclassify/classifiers.py +++ b/mapclassify/classifiers.py @@ -84,7 +84,7 @@ def zeros(self, dims, dtype = int): zero = dtype(0) return [zero for __ in range(dims[0])] - return [cls.zeros(dims[1:], dtype) for __ in range(dims[0])] + return [self.zeros(dims[1:], dtype) for __ in range(dims[0])] @staticmethod @@ -621,7 +621,7 @@ def natural_breaks(values, k=5, init=10): @njit("f8[:](f8[:], u2)") -def _fisher_jenks_means_numpy(values, classes=5): +def _fisher_jenks_means(values, classes=5): """ Jenks Optimal (Natural Breaks) algorithm implemented in Python. @@ -677,13 +677,13 @@ def _fisher_jenks_means_numpy(values, classes=5): return np.delete(kclass, 0) -def _fisher_jenks_means_without_numpy( +def _fjm_without_numpy( values, classes=5, np = default_mock_numpy ): """ - As for _fisher_jenks_means_numpy above, to keep the code as far as possible + As for _fisher_jenks_means above, to keep the code as far as possible exactly the same, except with np passable in as a dependency, and with matrix[i, j] replaced with matrix[i][j] for speed. @@ -748,10 +748,6 @@ def _fisher_jenks_means_without_numpy( return np.delete(kclass, 0) -if HAS_NUMBA: - _fisher_jenks_means = _fisher_jenks_means_numpy -else: - _fisher_jenks_means = _fisher_jenks_means_without_numpy class MapClassifier: @@ -2077,19 +2073,18 @@ class FisherJenks(MapClassifier): """ - def __init__(self, y, k=K, _fisher_jenks_means = None): + def __init__(self, y, k=K): if not HAS_NUMBA: warnings.warn( "Numba not installed. Using slow pure python version.", UserWarning, stacklevel=3, ) - _fisher_jenks_means = _fisher_jenks_means or _fisher_jenks_means_without_numpy - else: - - _fisher_jenks_means = _fisher_jenks_means or _fisher_jenks_means_numpy + + + self._fjm = _fisher_jenks_means if HAS_NUMBA else _fjm_without_numpy - self._fisher_jenks_means = _fisher_jenks_means + nu = len(np.unique(y)) if nu < k: @@ -2104,7 +2099,7 @@ def __init__(self, y, k=K, _fisher_jenks_means = None): def _set_bins(self): x = np.sort(self.y).astype("f8") - self.bins = self._fisher_jenks_means(x, classes=self.k) + self.bins = self._fjm(x, classes=self.k) class FisherJenksSampled(MapClassifier): diff --git a/mapclassify/tests/time_fisher_jenkss.py b/mapclassify/tests/time_fisher_jenkss.py new file mode 100644 index 0000000..7dc155c --- /dev/null +++ b/mapclassify/tests/time_fisher_jenkss.py @@ -0,0 +1,67 @@ +import random +import timeit +import functools + +import matplotlib.pyplot as plt + +import mapclassify + +try: + import numba + raise Exception( + f"""This test is to compare execution times of two alternatives + to the Numba-optimised function (both of which we already + know are far slower). + + Please run {__file__} again in a venv, + in which Numba is not installed. """ + ) +except ImportError: + pass + +number_tests = 1 + + + +def test_fisher_jenks(N): + + data = [random.randint(1, 1000) for __ in range(N)] + + mapclassify.classify(y = data, scheme = 'fisherjenks', k=8) + +descriptions = ["without Numpy, proposed less slow Pure Python code", + 'with Numpy, existing "slow pure python" code', + ] + +data_sizes = [100, 300, 900, 1400, 2100, 3800, 10000] + + +fig, ax = plt.subplots(figsize=(5, 2.7), layout='constrained') + + + + +for HAS_NUMBA, description in zip([False, True], descriptions): + + times = [] + + for N in data_sizes: + + # This hack avoids changing the FisherJenks + # interface, just for this profiling code. + mapclassify.classifiers.HAS_NUMBA = HAS_NUMBA + + t = timeit.timeit(functools.partial(test_fisher_jenks, N=N), number=number_tests) + + print(f'Time: {t} seconds, data points: {N} {description}, {number_tests=}') + + times.append(t) + + ax.plot(data_sizes, times, label=description) + +ax.set_xlabel('Size of data classified') # Add an x-label to the axes. +ax.set_ylabel('Run time') # Add a y-label to the axes. +ax.set_title('Comparison of Fisher Jenks implementations. ') # Add a title to the axes. +ax.legend() # Add a legend. + +plt.show() \ No newline at end of file From eba2180403fc8098effe7baf0a86008337acee63 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Thu, 14 Dec 2023 15:21:19 +0000 Subject: [PATCH 08/18] Use sorted, not np.sort.astype. Compare times with and without overhead. --- mapclassify/classifiers.py | 15 ++-- mapclassify/tests/time_fisher_jenkss.py | 101 ++++++++++++++++++------ 2 files changed, 85 insertions(+), 31 deletions(-) diff --git a/mapclassify/classifiers.py b/mapclassify/classifiers.py index 02215da..b4e4762 100644 --- a/mapclassify/classifiers.py +++ b/mapclassify/classifiers.py @@ -666,7 +666,8 @@ def _fisher_jenks_means(values, classes=5): k = len(values) - kclass = np.zeros(classes + 1, dtype=values.dtype) + # kclass = np.zeros(classes + 1, dtype=values.dtype) + kclass = np.zeros(classes + 1, dtype=int if isinstance(values[0], int) else float) kclass[classes] = values[len(values) - 1] kclass[0] = values[0] for countNum in range(classes, 1, -1): @@ -2076,13 +2077,11 @@ class FisherJenks(MapClassifier): def __init__(self, y, k=K): if not HAS_NUMBA: warnings.warn( - "Numba not installed. Using slow pure python version.", + "Numba not installed. Using the new, less slow, pure python version.", UserWarning, stacklevel=3, ) - - - self._fjm = _fisher_jenks_means if HAS_NUMBA else _fjm_without_numpy + self._set_bins = self._set_bins_without_numpy @@ -2099,7 +2098,11 @@ def __init__(self, y, k=K): def _set_bins(self): x = np.sort(self.y).astype("f8") - self.bins = self._fjm(x, classes=self.k) + self.bins = _fisher_jenks_means(x, classes=self.k) + + def _set_bins_without_numpy(self): + x = sorted(self.y) + self.bins = _fjm_without_numpy(x, classes=self.k) class FisherJenksSampled(MapClassifier): diff --git a/mapclassify/tests/time_fisher_jenkss.py b/mapclassify/tests/time_fisher_jenkss.py index 7dc155c..d662a66 100644 --- a/mapclassify/tests/time_fisher_jenkss.py +++ b/mapclassify/tests/time_fisher_jenkss.py @@ -2,9 +2,16 @@ import timeit import functools -import matplotlib.pyplot as plt +try: + import matplotlib.pyplot as plt + HAS_MATPLOTLIB = True +except ImportError: + HAS_MATPLOTLIB = False import mapclassify +import mapclassify.classifiers + +import numpy as np try: import numba @@ -19,49 +26,93 @@ except ImportError: pass + number_tests = 1 +k = 8 -def test_fisher_jenks(N): +def test_fisher_jenks_means(N, HAS_NUMBA): - data = [random.randint(1, 1000) for __ in range(N)] + data = [random.uniform(1.0, 1000.0) for __ in range(N)] - mapclassify.classify(y = data, scheme = 'fisherjenks', k=8) + if HAS_NUMBA: + func = mapclassify.classifiers._fisher_jenks_means(np.sort(data).astype("f8"), classes=k) + else: + func = mapclassify.classifiers._fjm_without_numpy(sorted(data), classes=k) + -descriptions = ["without Numpy, proposed less slow Pure Python code", - 'with Numpy, existing "slow pure python" code', - ] -data_sizes = [100, 300, 900, 1400, 2100, 3800, 10000] +def test_mapclassify_classify_fisherjenks(N, HAS_NUMBA): + data = [random.uniform(1.0, 1000.0) for __ in range(N)] -fig, ax = plt.subplots(figsize=(5, 2.7), layout='constrained') + # This hack avoids changing the interface of the + # FisherJenks class, just for this timing code. + mapclassify.classifiers.HAS_NUMBA = HAS_NUMBA + mapclassify.classify(y = data, scheme = 'fisherjenks', k=k) -for HAS_NUMBA, description in zip([False, True], descriptions): - times = [] + + + + +data_sizes = [100, 300, 1000, 2800, 8000] + + + + +def compare_times(test_runner, descriptions, title): + + + print(f'{title}\n') + + if HAS_MATPLOTLIB: + fig, ax = plt.subplots(figsize=(8.5, 5), layout='constrained') + + + + + for HAS_NUMBA, description in zip([False, True], descriptions): + + times = [] + + for N in data_sizes: + + t = timeit.timeit(functools.partial(test_runner, N=N, HAS_NUMBA = HAS_NUMBA), number=number_tests) + + print(f'Time: {t:.3f} seconds, data points: {N} {description}, {number_tests=}') + + times.append(t) - for N in data_sizes: - - # This hack avoids changing the FisherJenks - # interface, just for this profiling code. - mapclassify.classifiers.HAS_NUMBA = HAS_NUMBA - t = timeit.timeit(functools.partial(test_fisher_jenks, N=N), number=number_tests) + if HAS_MATPLOTLIB: + ax.plot(data_sizes, times, 'o-', label=description) - print(f'Time: {t} seconds, data points: {N} {description}, {number_tests=}') - times.append(t) + if HAS_MATPLOTLIB: + ax.set_xlabel('Number of random data points classified') + ax.set_ylabel('Run time (seconds)') + ax.set_title(title) + ax.legend() - ax.plot(data_sizes, times, label=description) + plt.show() -ax.set_xlabel('Size of data classified') # Add an x-label to the axes. -ax.set_ylabel('Run time') # Add a y-label to the axes. -ax.set_title('Comparison of Fisher Jenks implementations. ') # Add a title to the axes. -ax.legend() # Add a legend. +compare_times( + test_fisher_jenks_means, + title="Run times of the proposed function vs the original (excluding MapClassifier overhead)", + descriptions = [" _fjm_without_numpy", + " _fisher_jenks_means", + ] + ) -plt.show() \ No newline at end of file +compare_times( + test_mapclassify_classify_fisherjenks, + title="Run times for end user, of the proposed code vs the original (inc MapClassifier overhead)", + descriptions = ["without Numpy, much less slow, pure python code", + 'with Numpy, existing "slow pure python" code', + ], + ) \ No newline at end of file From 583ae81010d7aca4da3a22c66fa1d7dc664ca17a Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Thu, 14 Dec 2023 16:07:55 +0000 Subject: [PATCH 09/18] Adjust typing in docstring. Correct function name. --- mapclassify/classifiers.py | 10 +++++----- mapclassify/tests/time_fisher_jenkss.py | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/mapclassify/classifiers.py b/mapclassify/classifiers.py index b4e4762..5cddf34 100644 --- a/mapclassify/classifiers.py +++ b/mapclassify/classifiers.py @@ -678,7 +678,7 @@ def _fisher_jenks_means(values, classes=5): return np.delete(kclass, 0) -def _fjm_without_numpy( +def _fisher_jenks_means_without_numpy( values, classes=5, np = default_mock_numpy @@ -2040,7 +2040,7 @@ class FisherJenks(MapClassifier): Parameters ---------- - y : numpy.array + y : collections.abc.Iterable[numbers.Real] :math:`(n,1)`, values to classify. k : int (default 5) The number of classes required. @@ -2050,7 +2050,7 @@ class FisherJenks(MapClassifier): yb : numpy.array :math:`(n,1)`, bin IDs for observations. - bins : numpy.array + bins : collections.abc.Sequence[numbers.Real] :math:`(k,1)`, the upper bounds of each class. k : int The number of classes. @@ -2077,7 +2077,7 @@ class FisherJenks(MapClassifier): def __init__(self, y, k=K): if not HAS_NUMBA: warnings.warn( - "Numba not installed. Using the new, less slow, pure python version.", + "Numba not installed. Using a less slow, pure python version.", UserWarning, stacklevel=3, ) @@ -2102,7 +2102,7 @@ def _set_bins(self): def _set_bins_without_numpy(self): x = sorted(self.y) - self.bins = _fjm_without_numpy(x, classes=self.k) + self.bins = _fisher_jenks_means_without_numpy(x, classes=self.k) class FisherJenksSampled(MapClassifier): diff --git a/mapclassify/tests/time_fisher_jenkss.py b/mapclassify/tests/time_fisher_jenkss.py index d662a66..15ece69 100644 --- a/mapclassify/tests/time_fisher_jenkss.py +++ b/mapclassify/tests/time_fisher_jenkss.py @@ -39,7 +39,7 @@ def test_fisher_jenks_means(N, HAS_NUMBA): if HAS_NUMBA: func = mapclassify.classifiers._fisher_jenks_means(np.sort(data).astype("f8"), classes=k) else: - func = mapclassify.classifiers._fjm_without_numpy(sorted(data), classes=k) + func = mapclassify.classifiers._fisher_jenks_means_without_numpy(sorted(data), classes=k) From 1c79a9de095a15dde39cf3a709ecdfdbe54d951a Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Thu, 14 Dec 2023 16:51:59 +0000 Subject: [PATCH 10/18] Revert "Deactivate CI" This reverts commit 34c7a25fa03503be34f67f3c8bb64da802d342d1. --- .github/dependabot.yml | 20 ++++++++++++++++++++ .github/workflows/build_docs.yml | 8 ++++---- .github/workflows/release.yml | 8 ++++---- .github/workflows/testing.yml | 18 ++++++++---------- 4 files changed, 36 insertions(+), 18 deletions(-) create mode 100644 .github/dependabot.yml diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..57fa59b --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,20 @@ +# To get started with Dependabot version updates, you'll need to specify which +# package ecosystems to update and where the package manifests are located. +# Please see the documentation for all configuration options: +# https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates + +version: 2 +updates: + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "daily" + reviewers: + - "jGaboardi" + + - package-ecosystem: "pip" + directory: "/" + schedule: + interval: "daily" + reviewers: + - "jGaboardi" diff --git a/.github/workflows/build_docs.yml b/.github/workflows/build_docs.yml index 0adc82d..4d42cf2 100644 --- a/.github/workflows/build_docs.yml +++ b/.github/workflows/build_docs.yml @@ -2,10 +2,10 @@ name: Build Docs on: - # push: - # # Sequence of patterns matched against refs/tags - # tags: - # - 'v*' # Push events to matching v*, i.e. v1.0, v20.15.10 + push: + # Sequence of patterns matched against refs/tags + tags: + - 'v*' # Push events to matching v*, i.e. v1.0, v20.15.10 workflow_dispatch: inputs: version: diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index f46c616..b5418e6 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -1,10 +1,10 @@ name: Release Package on: - # push: - # # Sequence of patterns matched against refs/tags - # tags: - # - 'v*' # Push events to matching v*, i.e. v1.0, v20.15.10 + push: + # Sequence of patterns matched against refs/tags + tags: + - 'v*' # Push events to matching v*, i.e. v1.0, v20.15.10 workflow_dispatch: inputs: version: diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index 94c1931..8a7cf60 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -1,16 +1,14 @@ name: Continuous Integration on: - # push: - # branches: - # - "*" - # pull_request: - # branches: - # - "*" - # schedule: - # - cron: "59 21 * * *" - workflow_dispatch: - + push: + branches: + - "*" + pull_request: + branches: + - "*" + schedule: + - cron: "59 21 * * *" jobs: testing: From 81501db7e12a03f088764dd3cbf6cc798cf105c0 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Thu, 14 Dec 2023 16:56:37 +0000 Subject: [PATCH 11/18] Revert README.md to original --- README.md | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/README.md b/README.md index 1b98bc1..9f73d13 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,3 @@ -# mapclassif-Iron: _fisher_jenks_means for IronPython, and other environments without numpy. - -This branch does not currently work, but is intended to very soon work with, when installed by -directing pip to this github repository branch. This branch is primarily for convenient -PRs for mapclassify. - -Only the function, _fisher_jenks_means has been adapted (back to something similar to -Daniel J Lewis's original snippet). - -No other functionality from mapclassify has been changed, and none has been added. - -Parent project's README.md: - # mapclassify: Classification Schemes for Choropleth Maps [![Continuous Integration](https://github.com/pysal/mapclassify/actions/workflows/testing.yml/badge.svg)](https://github.com/pysal/mapclassify/actions/workflows/testing.yml) From f68bd5e90df80bdc9885097179935473b8adbe70 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Thu, 14 Dec 2023 16:59:34 +0000 Subject: [PATCH 12/18] Remove unused entry and unused import --- mapclassify/_classify_API.py | 1 - mapclassify/classifiers.py | 1 - 2 files changed, 2 deletions(-) diff --git a/mapclassify/_classify_API.py b/mapclassify/_classify_API.py index 7f1c0a7..c0c0fa8 100644 --- a/mapclassify/_classify_API.py +++ b/mapclassify/_classify_API.py @@ -24,7 +24,6 @@ "boxplot": BoxPlot, "equalinterval": EqualInterval, "fisherjenks": FisherJenks, - "fisherjenks_nonumpy": FisherJenks, "fisherjenkssampled": FisherJenksSampled, "headtailbreaks": HeadTailBreaks, "jenkscaspall": JenksCaspall, diff --git a/mapclassify/classifiers.py b/mapclassify/classifiers.py index 5cddf34..4521ba8 100644 --- a/mapclassify/classifiers.py +++ b/mapclassify/classifiers.py @@ -1,7 +1,6 @@ """ A module of classification schemes for choropleth mapping. """ -import sys import copy import functools import warnings From 9784bfd185a3cf442f14a1d6188f22325ebcdf74 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Thu, 14 Dec 2023 17:09:25 +0000 Subject: [PATCH 13/18] Tweak to reduce number of changes in PR --- mapclassify/classifiers.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/mapclassify/classifiers.py b/mapclassify/classifiers.py index 4521ba8..fae1766 100644 --- a/mapclassify/classifiers.py +++ b/mapclassify/classifiers.py @@ -5,9 +5,9 @@ import functools import warnings +import numpy as np import scipy.stats as stats from sklearn.cluster import KMeans -import numpy as np @@ -113,7 +113,8 @@ def wrapper_decorator(*args, **kwargs): return decorator_njit - # Numba requires Numpy + # Numba requires Numpy. This is still used when + # Numpy is present, but Numba isn't. default_mock_numpy = MockNumpy() @@ -665,8 +666,7 @@ def _fisher_jenks_means(values, classes=5): k = len(values) - # kclass = np.zeros(classes + 1, dtype=values.dtype) - kclass = np.zeros(classes + 1, dtype=int if isinstance(values[0], int) else float) + kclass = np.zeros(classes + 1, dtype=values.dtype) kclass[classes] = values[len(values) - 1] kclass[0] = values[0] for countNum in range(classes, 1, -1): @@ -706,7 +706,6 @@ def _fisher_jenks_means_without_numpy( mat1 = np.zeros((n_data + 1, classes + 1), dtype=np.int32) mat2 = np.zeros((n_data + 1, classes + 1), dtype=np.float32) - # System.Array.Fill not suppported on Multi-dimensional arrays for j in range(1, classes + 1): mat1[1][j] = 1 for i in range(2, n_data+1): @@ -733,8 +732,6 @@ def _fisher_jenks_means_without_numpy( mat1[_l][1] = 1 mat2[_l][1] = v -# for row in mat1: -# print(row) k = len(values) kclass = np.zeros((classes + 1,), dtype=type(values[0])) From 49a06a9a6350ea5a639c394c170b0924cc15d782 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Thu, 14 Dec 2023 17:11:31 +0000 Subject: [PATCH 14/18] Remove whitespace --- mapclassify/classifiers.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mapclassify/classifiers.py b/mapclassify/classifiers.py index fae1766..cc0ec5f 100644 --- a/mapclassify/classifiers.py +++ b/mapclassify/classifiers.py @@ -5,12 +5,10 @@ import functools import warnings -import numpy as np +import numpy as np import scipy.stats as stats from sklearn.cluster import KMeans - - __author__ = "Sergio J. Rey" __all__ = [ From dbb3b8d83fc36c3a3b3a238b2db2addab34752d8 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Thu, 14 Dec 2023 17:37:40 +0000 Subject: [PATCH 15/18] Fix error when numba present. Add timing test that starts from np arrays --- mapclassify/classifiers.py | 9 ++++----- mapclassify/tests/time_fisher_jenkss.py | 22 +++++++++++++++------- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/mapclassify/classifiers.py b/mapclassify/classifiers.py index cc0ec5f..fabbeab 100644 --- a/mapclassify/classifiers.py +++ b/mapclassify/classifiers.py @@ -111,10 +111,6 @@ def wrapper_decorator(*args, **kwargs): return decorator_njit - # Numba requires Numpy. This is still used when - # Numpy is present, but Numba isn't. - default_mock_numpy = MockNumpy() - def _format_intervals(mc, fmt="{:.0f}"): """ @@ -678,7 +674,7 @@ def _fisher_jenks_means(values, classes=5): def _fisher_jenks_means_without_numpy( values, classes=5, - np = default_mock_numpy + np = None ): """ As for _fisher_jenks_means above, to keep the code as far as possible @@ -699,6 +695,9 @@ def _fisher_jenks_means_without_numpy( """ + if np is None: + np = MockNumpy() + values.sort() n_data = len(values) mat1 = np.zeros((n_data + 1, classes + 1), dtype=np.int32) diff --git a/mapclassify/tests/time_fisher_jenkss.py b/mapclassify/tests/time_fisher_jenkss.py index 15ece69..4172984 100644 --- a/mapclassify/tests/time_fisher_jenkss.py +++ b/mapclassify/tests/time_fisher_jenkss.py @@ -40,6 +40,14 @@ def test_fisher_jenks_means(N, HAS_NUMBA): func = mapclassify.classifiers._fisher_jenks_means(np.sort(data).astype("f8"), classes=k) else: func = mapclassify.classifiers._fisher_jenks_means_without_numpy(sorted(data), classes=k) + + + # data = np.sort([random.uniform(1.0, 1000.0) for __ in range(N)]).astype("f8") + + # if HAS_NUMBA: + # func = mapclassify.classifiers._fisher_jenks_means(data, classes=k) + # else: + # func = mapclassify.classifiers._fisher_jenks_means_without_numpy(data, classes=k) @@ -109,10 +117,10 @@ def compare_times(test_runner, descriptions, title): ] ) -compare_times( - test_mapclassify_classify_fisherjenks, - title="Run times for end user, of the proposed code vs the original (inc MapClassifier overhead)", - descriptions = ["without Numpy, much less slow, pure python code", - 'with Numpy, existing "slow pure python" code', - ], - ) \ No newline at end of file +# compare_times( +# test_mapclassify_classify_fisherjenks, +# title="Run times for end user, of the proposed code vs the original (inc MapClassifier overhead)", +# descriptions = ["without Numpy, much less slow, pure python code", +# 'with Numpy, existing "slow pure python" code', +# ], +# ) \ No newline at end of file From 0b4885468118edd4e215319123ebe22aa679fb32 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Thu, 14 Dec 2023 19:38:27 +0000 Subject: [PATCH 16/18] Set bins to be a numpy array --- mapclassify/classifiers.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/mapclassify/classifiers.py b/mapclassify/classifiers.py index fabbeab..5a83a2f 100644 --- a/mapclassify/classifiers.py +++ b/mapclassify/classifiers.py @@ -698,7 +698,6 @@ def _fisher_jenks_means_without_numpy( if np is None: np = MockNumpy() - values.sort() n_data = len(values) mat1 = np.zeros((n_data + 1, classes + 1), dtype=np.int32) mat2 = np.zeros((n_data + 1, classes + 1), dtype=np.float32) @@ -744,6 +743,7 @@ def _fisher_jenks_means_without_numpy( + class MapClassifier: r""" Abstract class for all map classifications :cite:`Slocum_2009` @@ -2069,12 +2069,12 @@ class FisherJenks(MapClassifier): def __init__(self, y, k=K): if not HAS_NUMBA: + self._set_bins = self._set_bins_without_numpy warnings.warn( "Numba not installed. Using a less slow, pure python version.", UserWarning, stacklevel=3, ) - self._set_bins = self._set_bins_without_numpy @@ -2095,7 +2095,7 @@ def _set_bins(self): def _set_bins_without_numpy(self): x = sorted(self.y) - self.bins = _fisher_jenks_means_without_numpy(x, classes=self.k) + self.bins = np.asarray(_fisher_jenks_means_without_numpy(x, classes=self.k)) class FisherJenksSampled(MapClassifier): @@ -2135,14 +2135,20 @@ class FisherJenksSampled(MapClassifier): """ + ids = None + def __init__(self, y, k=K, pct=0.10, truncate=True): + print(f'Got: {k=}, {pct=}, {truncate=}') self.k = k n = y.size if (pct * n > 1000) and truncate: pct = 1000.0 / n + # if FisherJenksSampled.ids is None: + # FisherJenksSampled.ids = np.random.randint(0, n, int(n * pct)) ids = np.random.randint(0, n, int(n * pct)) y = np.asarray(y) + # yr = y[FisherJenksSampled.ids] yr = y[ids] yr[-1] = max(y) # make sure we have the upper bound yr[0] = min(y) # make sure we have the min From 921646207b43d050fcada25fceb026336fae2770 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Thu, 14 Dec 2023 20:59:46 +0000 Subject: [PATCH 17/18] Change exception if numba present to a UserWarning --- mapclassify/tests/time_fisher_jenkss.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/mapclassify/tests/time_fisher_jenkss.py b/mapclassify/tests/time_fisher_jenkss.py index 4172984..3c26ae3 100644 --- a/mapclassify/tests/time_fisher_jenkss.py +++ b/mapclassify/tests/time_fisher_jenkss.py @@ -1,6 +1,8 @@ import random import timeit import functools +import warnings + try: import matplotlib.pyplot as plt @@ -15,14 +17,17 @@ try: import numba - raise Exception( - f"""This test is to compare execution times of two alternatives + + warnings.warn( + f"""This test is to compare execution times of two alternatives to the Numba-optimised function (both of which we already know are far slower). Please run {__file__} again in a venv, - in which Numba is not installed. """ - ) + in which Numba is not installed. """, + UserWarning, + stacklevel=3, + ) except ImportError: pass From 268c1d5a2195ee3ae79d3a538a6b409cffcb231d Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 15 Dec 2023 10:41:56 +0000 Subject: [PATCH 18/18] Make zeros an instance method and MockNumpy a Python 3 style class --- mapclassify/classifiers.py | 51 +++++------------ mapclassify/tests/time_fisher_jenkss.py | 74 +++++++++++-------------- 2 files changed, 45 insertions(+), 80 deletions(-) diff --git a/mapclassify/classifiers.py b/mapclassify/classifiers.py index 5a83a2f..7eca9af 100644 --- a/mapclassify/classifiers.py +++ b/mapclassify/classifiers.py @@ -61,37 +61,23 @@ FMT = "{:.2f}" - - - - -class MockNumpy(object): - - def __init__(self, int_type = None, float_type = None): - +class MockNumpy: + def __init__(self, int_type=None, float_type=None): self.int32 = int_type or int self.float32 = float_type or float - self.inf = self.float32('inf') - - @classmethod - def zeros(self, dims, dtype = int): + self.inf = self.float32("inf") + def zeros(self, dims, dtype=int): if len(dims) == 1: zero = dtype(0) return [zero for __ in range(dims[0])] - - return [self.zeros(dims[1:], dtype) for __ in range(dims[0])] - + + return [self.zeros(dims[1:], dtype) for __ in range(dims[0])] @staticmethod def delete(arr, index): - return arr[:index] + arr[index+1:] - - - - - + return arr[:index] + arr[index + 1 :] try: @@ -671,11 +657,7 @@ def _fisher_jenks_means(values, classes=5): return np.delete(kclass, 0) -def _fisher_jenks_means_without_numpy( - values, - classes=5, - np = None - ): +def _fisher_jenks_means_without_numpy(values, classes=5, np=None): """ As for _fisher_jenks_means above, to keep the code as far as possible exactly the same, except with np passable in as a dependency, and with @@ -701,10 +683,10 @@ def _fisher_jenks_means_without_numpy( n_data = len(values) mat1 = np.zeros((n_data + 1, classes + 1), dtype=np.int32) mat2 = np.zeros((n_data + 1, classes + 1), dtype=np.float32) - + for j in range(1, classes + 1): mat1[1][j] = 1 - for i in range(2, n_data+1): + for i in range(2, n_data + 1): mat2[i][j] = np.inf v = 0 for _l in range(2, len(values) + 1): @@ -741,9 +723,6 @@ def _fisher_jenks_means_without_numpy( return np.delete(kclass, 0) - - - class MapClassifier: r""" Abstract class for all map classifications :cite:`Slocum_2009` @@ -2069,15 +2048,13 @@ class FisherJenks(MapClassifier): def __init__(self, y, k=K): if not HAS_NUMBA: - self._set_bins = self._set_bins_without_numpy + self._set_bins = self._set_bins_without_numpy warnings.warn( "Numba not installed. Using a less slow, pure python version.", UserWarning, stacklevel=3, ) - - nu = len(np.unique(y)) if nu < k: raise ValueError( @@ -2087,8 +2064,6 @@ def __init__(self, y, k=K): MapClassifier.__init__(self, y) self.name = "FisherJenks" - - def _set_bins(self): x = np.sort(self.y).astype("f8") self.bins = _fisher_jenks_means(x, classes=self.k) @@ -2138,14 +2113,14 @@ class FisherJenksSampled(MapClassifier): ids = None def __init__(self, y, k=K, pct=0.10, truncate=True): - print(f'Got: {k=}, {pct=}, {truncate=}') + print(f"Got: {k=}, {pct=}, {truncate=}") self.k = k n = y.size if (pct * n > 1000) and truncate: pct = 1000.0 / n # if FisherJenksSampled.ids is None: - # FisherJenksSampled.ids = np.random.randint(0, n, int(n * pct)) + # FisherJenksSampled.ids = np.random.randint(0, n, int(n * pct)) ids = np.random.randint(0, n, int(n * pct)) y = np.asarray(y) # yr = y[FisherJenksSampled.ids] diff --git a/mapclassify/tests/time_fisher_jenkss.py b/mapclassify/tests/time_fisher_jenkss.py index 3c26ae3..7d5d26c 100644 --- a/mapclassify/tests/time_fisher_jenkss.py +++ b/mapclassify/tests/time_fisher_jenkss.py @@ -6,6 +6,7 @@ try: import matplotlib.pyplot as plt + HAS_MATPLOTLIB = True except ImportError: HAS_MATPLOTLIB = False @@ -19,7 +20,7 @@ import numba warnings.warn( - f"""This test is to compare execution times of two alternatives + f"""This test is to compare execution times of two alternatives to the Numba-optimised function (both of which we already know are far slower). @@ -27,7 +28,7 @@ in which Numba is not installed. """, UserWarning, stacklevel=3, - ) + ) except ImportError: pass @@ -36,16 +37,17 @@ k = 8 - def test_fisher_jenks_means(N, HAS_NUMBA): - data = [random.uniform(1.0, 1000.0) for __ in range(N)] if HAS_NUMBA: - func = mapclassify.classifiers._fisher_jenks_means(np.sort(data).astype("f8"), classes=k) + func = mapclassify.classifiers._fisher_jenks_means( + np.sort(data).astype("f8"), classes=k + ) else: - func = mapclassify.classifiers._fisher_jenks_means_without_numpy(sorted(data), classes=k) - + func = mapclassify.classifiers._fisher_jenks_means_without_numpy( + sorted(data), classes=k + ) # data = np.sort([random.uniform(1.0, 1000.0) for __ in range(N)]).astype("f8") @@ -53,74 +55,62 @@ def test_fisher_jenks_means(N, HAS_NUMBA): # func = mapclassify.classifiers._fisher_jenks_means(data, classes=k) # else: # func = mapclassify.classifiers._fisher_jenks_means_without_numpy(data, classes=k) - def test_mapclassify_classify_fisherjenks(N, HAS_NUMBA): - data = [random.uniform(1.0, 1000.0) for __ in range(N)] - - # This hack avoids changing the interface of the + # This hack avoids changing the interface of the # FisherJenks class, just for this timing code. mapclassify.classifiers.HAS_NUMBA = HAS_NUMBA - - mapclassify.classify(y = data, scheme = 'fisherjenks', k=k) - - - - + mapclassify.classify(y=data, scheme="fisherjenks", k=k) data_sizes = [100, 300, 1000, 2800, 8000] - - def compare_times(test_runner, descriptions, title): - - - print(f'{title}\n') + print(f"{title}\n") if HAS_MATPLOTLIB: - fig, ax = plt.subplots(figsize=(8.5, 5), layout='constrained') - - - + fig, ax = plt.subplots(figsize=(8.5, 5), layout="constrained") for HAS_NUMBA, description in zip([False, True], descriptions): - times = [] for N in data_sizes: - - t = timeit.timeit(functools.partial(test_runner, N=N, HAS_NUMBA = HAS_NUMBA), number=number_tests) + t = timeit.timeit( + functools.partial(test_runner, N=N, HAS_NUMBA=HAS_NUMBA), + number=number_tests, + ) - print(f'Time: {t:.3f} seconds, data points: {N} {description}, {number_tests=}') + print( + f"Time: {t:.3f} seconds, data points: {N} {description}, {number_tests=}" + ) times.append(t) - if HAS_MATPLOTLIB: - ax.plot(data_sizes, times, 'o-', label=description) - + ax.plot(data_sizes, times, "o-", label=description) if HAS_MATPLOTLIB: - ax.set_xlabel('Number of random data points classified') - ax.set_ylabel('Run time (seconds)') - ax.set_title(title) - ax.legend() + ax.set_xlabel("Number of random data points classified") + ax.set_ylabel("Run time (seconds)") + ax.set_title(title) + ax.legend() plt.show() + compare_times( test_fisher_jenks_means, title="Run times of the proposed function vs the original (excluding MapClassifier overhead)", - descriptions = [" _fjm_without_numpy", - " _fisher_jenks_means", - ] - ) + descriptions=[ + " _fjm_without_numpy", + " _fisher_jenks_means", + ], +) # compare_times( # test_mapclassify_classify_fisherjenks, @@ -128,4 +118,4 @@ def compare_times(test_runner, descriptions, title): # descriptions = ["without Numpy, much less slow, pure python code", # 'with Numpy, existing "slow pure python" code', # ], -# ) \ No newline at end of file +# )