diff --git a/cityseer/util/mock.py b/cityseer/util/mock.py index 6c0031fe..77b8180f 100644 --- a/cityseer/util/mock.py +++ b/cityseer/util/mock.py @@ -3,7 +3,7 @@ ''' import logging from typing import Tuple -import json +import string import networkx as nx import numpy as np @@ -223,13 +223,16 @@ def mock_data_dict(G: nx.Graph, length: int = 50, random_seed: int = None) -> di return data_dict -def mock_categorical_data(length: int, random_seed: int = None) -> np.ndarray: +def mock_categorical_data(length: int, num_classes: int = 10, random_seed: int = None) -> np.ndarray: if random_seed is not None: np.random.seed(seed=random_seed) - random_class_str = 'abcdefghijk' - d = [] + random_class_str = string.ascii_lowercase + if num_classes > len(random_class_str): + raise ValueError(f'The requested {num_classes} classes exceeds the max available categorical classes: {len(random_class_str)}') + random_class_str = random_class_str[:num_classes] + d = [] for i in range(length): d.append(random_class_str[np.random.randint(0, len(random_class_str) - 1)]) diff --git a/docs/util/mock.md b/docs/util/mock.md index 4f9ba9d7..d5904de0 100644 --- a/docs/util/mock.md +++ b/docs/util/mock.md @@ -72,7 +72,7 @@ A dictionary where each entry consists of a `key` representing a distinct data p mock\_categorical\_data ----------------------- -mock_categorical_data(length, random_seed=None) +mock_categorical_data(length, num_classes=10, random_seed=None) Generates a `numpy` array containing mock categorical data for testing or experimentation purposes. @@ -84,6 +84,12 @@ The number of categorical elements to return in the array. + + +The maximum number of unique classes to return in the randomly assigned categorical data. The classes are randomly generated from a pool of unique class labels of length `num_classes`. The number of returned unique classes will be less than or equal to `num_classes`. + + + For the use of a specified random seed. diff --git a/setup.py b/setup.py index 1dce577f..adcb8e6b 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ setup ( name = 'cityseer', - version='0.8.16', + version='0.8.17', packages=['cityseer', 'cityseer.algos', 'cityseer.metrics', 'cityseer.util'], description = 'Computational tools for urban analysis', url='https://github.com/cityseer/cityseer-api', diff --git a/tests/util/test_mock.py b/tests/util/test_mock.py index 42d7efa1..2498024d 100644 --- a/tests/util/test_mock.py +++ b/tests/util/test_mock.py @@ -1,5 +1,7 @@ import networkx as nx import numpy as np +import pytest +import string from cityseer.util import mock @@ -57,10 +59,20 @@ def test_mock_data_dict(): def test_mock_categorical_data(): cat_d = mock.mock_categorical_data(50) assert len(cat_d) == 50 + # classes are generated randomly from max number of classes + # i.e. situations do exist where the number of classes will be less than the max permitted + assert len(set(cat_d)) <= 10 for c in cat_d: assert isinstance(c, str) - assert c in 'abcdefghijk' + assert c in string.ascii_lowercase + + cat_d = mock.mock_categorical_data(50, num_classes=3) + assert len(set(cat_d)) <= 3 + + # test that an error is raised when requesting more than available max classes per asii_lowercase + with pytest.raises(ValueError): + mock.mock_categorical_data(50, num_classes=len(string.ascii_lowercase) + 1) def test_mock_numerical_data():