Skip to content

Commit

Permalink
Adds a num_classes parameter to the mock_categorical_data function.
Browse files Browse the repository at this point in the history
  • Loading branch information
songololo committed Aug 2, 2019
1 parent d874f94 commit 4506cc4
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 7 deletions.
11 changes: 7 additions & 4 deletions cityseer/util/mock.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
'''
import logging
from typing import Tuple
import json
import string

import networkx as nx
import numpy as np
Expand Down Expand Up @@ -223,13 +223,16 @@ def mock_data_dict(G: nx.Graph, length: int = 50, random_seed: int = None) -> di
return data_dict


def mock_categorical_data(length: int, random_seed: int = None) -> np.ndarray:
def mock_categorical_data(length: int, num_classes: int = 10, random_seed: int = None) -> np.ndarray:
if random_seed is not None:
np.random.seed(seed=random_seed)

random_class_str = 'abcdefghijk'
d = []
random_class_str = string.ascii_lowercase
if num_classes > len(random_class_str):
raise ValueError(f'The requested {num_classes} classes exceeds the max available categorical classes: {len(random_class_str)}')
random_class_str = random_class_str[:num_classes]

d = []
for i in range(length):
d.append(random_class_str[np.random.randint(0, len(random_class_str) - 1)])

Expand Down
8 changes: 7 additions & 1 deletion docs/util/mock.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ A dictionary where each entry consists of a `key` representing a distinct data p

mock\_categorical\_data
-----------------------
<FuncSignature>mock_categorical_data(length, random_seed=None)</FuncSignature>
<FuncSignature>mock_categorical_data(length, num_classes=10, random_seed=None)</FuncSignature>

Generates a `numpy` array containing mock categorical data for testing or experimentation purposes.

Expand All @@ -84,6 +84,12 @@ The number of categorical elements to return in the array.

</FuncElement>

<FuncElement name="num_classes" type="int">

The maximum number of unique classes to return in the randomly assigned categorical data. The classes are randomly generated from a pool of unique class labels of length `num_classes`. The number of returned unique classes will be less than or equal to `num_classes`.

</FuncElement>

<FuncElement name="random_seed" type="int">

For the use of a specified random seed.
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

setup (
name = 'cityseer',
version='0.8.16',
version='0.8.17',
packages=['cityseer', 'cityseer.algos', 'cityseer.metrics', 'cityseer.util'],
description = 'Computational tools for urban analysis',
url='https://github.com/cityseer/cityseer-api',
Expand Down
14 changes: 13 additions & 1 deletion tests/util/test_mock.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import networkx as nx
import numpy as np
import pytest
import string

from cityseer.util import mock

Expand Down Expand Up @@ -57,10 +59,20 @@ def test_mock_data_dict():
def test_mock_categorical_data():
cat_d = mock.mock_categorical_data(50)
assert len(cat_d) == 50
# classes are generated randomly from max number of classes
# i.e. situations do exist where the number of classes will be less than the max permitted
assert len(set(cat_d)) <= 10

for c in cat_d:
assert isinstance(c, str)
assert c in 'abcdefghijk'
assert c in string.ascii_lowercase

cat_d = mock.mock_categorical_data(50, num_classes=3)
assert len(set(cat_d)) <= 3

# test that an error is raised when requesting more than available max classes per asii_lowercase
with pytest.raises(ValueError):
mock.mock_categorical_data(50, num_classes=len(string.ascii_lowercase) + 1)


def test_mock_numerical_data():
Expand Down

0 comments on commit 4506cc4

Please sign in to comment.