diff --git a/cityseer/util/mock.py b/cityseer/util/mock.py
index 6c0031fe..77b8180f 100644
--- a/cityseer/util/mock.py
+++ b/cityseer/util/mock.py
@@ -3,7 +3,7 @@
'''
import logging
from typing import Tuple
-import json
+import string
import networkx as nx
import numpy as np
@@ -223,13 +223,16 @@ def mock_data_dict(G: nx.Graph, length: int = 50, random_seed: int = None) -> di
return data_dict
-def mock_categorical_data(length: int, random_seed: int = None) -> np.ndarray:
+def mock_categorical_data(length: int, num_classes: int = 10, random_seed: int = None) -> np.ndarray:
if random_seed is not None:
np.random.seed(seed=random_seed)
- random_class_str = 'abcdefghijk'
- d = []
+ random_class_str = string.ascii_lowercase
+ if num_classes > len(random_class_str):
+ raise ValueError(f'The requested {num_classes} classes exceeds the max available categorical classes: {len(random_class_str)}')
+ random_class_str = random_class_str[:num_classes]
+ d = []
for i in range(length):
d.append(random_class_str[np.random.randint(0, len(random_class_str) - 1)])
diff --git a/docs/util/mock.md b/docs/util/mock.md
index 4f9ba9d7..d5904de0 100644
--- a/docs/util/mock.md
+++ b/docs/util/mock.md
@@ -72,7 +72,7 @@ A dictionary where each entry consists of a `key` representing a distinct data p
mock\_categorical\_data
-----------------------
-mock_categorical_data(length, random_seed=None)
+mock_categorical_data(length, num_classes=10, random_seed=None)
Generates a `numpy` array containing mock categorical data for testing or experimentation purposes.
@@ -84,6 +84,12 @@ The number of categorical elements to return in the array.
+
+
+The maximum number of unique classes to return in the randomly assigned categorical data. The classes are randomly generated from a pool of unique class labels of length `num_classes`. The number of returned unique classes will be less than or equal to `num_classes`.
+
+
+
For the use of a specified random seed.
diff --git a/setup.py b/setup.py
index 1dce577f..adcb8e6b 100644
--- a/setup.py
+++ b/setup.py
@@ -16,7 +16,7 @@
setup (
name = 'cityseer',
- version='0.8.16',
+ version='0.8.17',
packages=['cityseer', 'cityseer.algos', 'cityseer.metrics', 'cityseer.util'],
description = 'Computational tools for urban analysis',
url='https://github.com/cityseer/cityseer-api',
diff --git a/tests/util/test_mock.py b/tests/util/test_mock.py
index 42d7efa1..2498024d 100644
--- a/tests/util/test_mock.py
+++ b/tests/util/test_mock.py
@@ -1,5 +1,7 @@
import networkx as nx
import numpy as np
+import pytest
+import string
from cityseer.util import mock
@@ -57,10 +59,20 @@ def test_mock_data_dict():
def test_mock_categorical_data():
cat_d = mock.mock_categorical_data(50)
assert len(cat_d) == 50
+ # classes are generated randomly from max number of classes
+ # i.e. situations do exist where the number of classes will be less than the max permitted
+ assert len(set(cat_d)) <= 10
for c in cat_d:
assert isinstance(c, str)
- assert c in 'abcdefghijk'
+ assert c in string.ascii_lowercase
+
+ cat_d = mock.mock_categorical_data(50, num_classes=3)
+ assert len(set(cat_d)) <= 3
+
+ # test that an error is raised when requesting more than available max classes per asii_lowercase
+ with pytest.raises(ValueError):
+ mock.mock_categorical_data(50, num_classes=len(string.ascii_lowercase) + 1)
def test_mock_numerical_data():