From b08f66bf56a50dbfb4db03d45d6c39afecd9f15f Mon Sep 17 00:00:00 2001 From: Adam Narozniak Date: Fri, 8 Mar 2024 10:12:19 +0100 Subject: [PATCH] Improve docs --- datasets/flwr_datasets/mock_utils.py | 32 ++++++++++++---------------- 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/datasets/flwr_datasets/mock_utils.py b/datasets/flwr_datasets/mock_utils.py index 1504da30843b..6e290e89cc75 100644 --- a/datasets/flwr_datasets/mock_utils.py +++ b/datasets/flwr_datasets/mock_utils.py @@ -19,7 +19,7 @@ import random import string from datetime import datetime, timedelta -from typing import Any, Callable, Dict, List, Set, Tuple, Union +from typing import Any, Dict, List, Set, Tuple, Union import numpy as np from PIL import Image @@ -78,11 +78,12 @@ def _generate_artificial_categories(num_rows: int, choices: List[Any]) -> List[s def _generate_random_word(length: int) -> str: - # Generate a random word of the given length + """Generate a random word of the given length.""" return "".join(random.choices(string.ascii_letters, k=length)) def _generate_random_text_column(num_rows: int, length: int) -> List[str]: + """Generate a list of random text of specified length.""" text_col = [] for _ in range(num_rows): text_col.append(_generate_random_word(length)) @@ -95,7 +96,7 @@ def _generate_random_sentence( min_sentence_length: int, max_sentence_length: int, ) -> str: - # Generate a random sentence with words of random lengths + """Generate a random sentence with words of random lengths.""" sentence_length = random.randint(min_sentence_length, max_sentence_length) sentence: List[str] = [] while len(" ".join(sentence)) < sentence_length: @@ -112,7 +113,7 @@ def _generate_random_sentences( min_sentence_length: int, max_sentence_length: int, ) -> List[str]: - # Generate a list of random sentences + """Generate a list of random sentences.""" text_col = [ _generate_random_sentence( min_word_length, max_word_length, min_sentence_length, max_sentence_length @@ -123,10 +124,12 @@ def _generate_random_sentences( def _make_num_rows_none(column: List[Any], num_none: int) -> List[Any]: - none_positions = random.sample(range(len(column)), num_none) + """Assign none num_none times to the given list.""" + column_copy = column.copy() + none_positions = random.sample(range(len(column_copy)), num_none) for pos in none_positions: - column[pos] = None - return column + column_copy[pos] = None + return column_copy def _generate_random_date( @@ -160,10 +163,12 @@ def _generate_random_date_column( def _generate_random_int_column(num_rows: int, min_int: int, max_int: int) -> List[int]: + """Generate a list of ints.""" return [random.randint(min_int, max_int) for _ in range(num_rows)] def _generate_random_bool_column(num_rows: int) -> List[bool]: + """Generate a list of bools.""" return [random.choice([True, False]) for _ in range(num_rows)] @@ -273,11 +278,11 @@ def _mock_sentiment140(num_rows: int) -> Dataset: def _mock_cifar100(num_rows: int) -> Dataset: imgs = _generate_random_image_column(num_rows, (32, 32, 3), "PNG") unique_fine_labels = _generate_artificial_strings( - num_rows=100, num_unique=100, string_length=10 + num_rows=100, num_unique=100, string_length=10, seed=42 ) fine_label = _generate_artificial_categories(num_rows, unique_fine_labels) unique_coarse_labels = _generate_artificial_strings( - num_rows=20, num_unique=20, string_length=10 + num_rows=20, num_unique=20, string_length=10, seed=42 ) coarse_label = _generate_artificial_categories(num_rows, unique_coarse_labels) @@ -350,15 +355,6 @@ def _mock_speach_commands(num_rows: int) -> Dataset: return dataset -def _mock_dict_dataset( - num_rows: List[int], split_names: List[str], function: Callable[[int], Dataset] -) -> DatasetDict: - dataset_dict = {} - for params in zip(num_rows, split_names): - dataset_dict[params[1]] = function(params[0]) - return datasets.DatasetDict(dataset_dict) - - dataset_name_to_mock_function = { "cifar100": _mock_cifar100, "sentiment140": _mock_sentiment140,