From efb2f4008b096dae28d889db22ba77b22b20fd17 Mon Sep 17 00:00:00 2001 From: Adam Narozniak Date: Wed, 17 Jul 2024 13:43:50 +0200 Subject: [PATCH 01/15] Extend tested_datasets list --- datasets/flwr_datasets/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/datasets/flwr_datasets/utils.py b/datasets/flwr_datasets/utils.py index 0ecb96ac9456..746dc85478f1 100644 --- a/datasets/flwr_datasets/utils.py +++ b/datasets/flwr_datasets/utils.py @@ -34,6 +34,7 @@ "svhn", "sentiment140", "speech_commands", + "flwrlabs/femnist", ] From 016348beff7acd9b86d7f610d4904810d3a1edb4 Mon Sep 17 00:00:00 2001 From: Adam Narozniak Date: Thu, 18 Jul 2024 09:59:31 +0200 Subject: [PATCH 02/15] Add femnist tests --- .../flwr_datasets/federated_dataset_test.py | 35 ++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/datasets/flwr_datasets/federated_dataset_test.py b/datasets/flwr_datasets/federated_dataset_test.py index f65aa6346f3a..b66874cb34e2 100644 --- a/datasets/flwr_datasets/federated_dataset_test.py +++ b/datasets/flwr_datasets/federated_dataset_test.py @@ -28,7 +28,7 @@ from datasets import Dataset, DatasetDict, concatenate_datasets from flwr_datasets.federated_dataset import FederatedDataset from flwr_datasets.mock_utils_test import _load_mocked_dataset -from flwr_datasets.partitioner import IidPartitioner, Partitioner +from flwr_datasets.partitioner import IidPartitioner, NaturalIdPartitioner, Partitioner mocked_datasets = ["cifar100", "svhn", "sentiment140", "speech_commands"] @@ -380,6 +380,39 @@ def test_mixed_type_partitioners_creates_from_int(self) -> None: ) +mocked_natural_id_datasets = [ + "flwrlabs/femnist", +] + + +@parameterized_class( + ("dataset_name", "test_split", "subset", "partition_by"), + [ + ("flwrlabs/femnist", "", "", "writer_id"), + ], +) +class NaturalIdPartitionerIntegrationTest(unittest.TestCase): + """General FederatedDataset tests with NaturalIdPartitioner.""" + + dataset_name = "" + test_split = "" + subset = "" + partition_by = "" + + def test_if_the_partitions_have_unique_values(self) -> None: + """Test if each partition has a single unique id value.""" + fds = FederatedDataset( + dataset=self.dataset_name, + partitioners={ + "train": NaturalIdPartitioner(partition_by=self.partition_by) + }, + ) + for partition_id in range(fds.partitioners["train"].num_partitions): + partition = fds.load_partition(partition_id) + unique_ids_in_partition = list(set(partition[self.partition_by])) + self.assertEqual(len(unique_ids_in_partition), 1) + + class IncorrectUsageFederatedDatasets(unittest.TestCase): """Test incorrect usages in FederatedDatasets.""" From 529292326b4cce18ee976575268850b98217839d Mon Sep 17 00:00:00 2001 From: Adam Narozniak Date: Thu, 18 Jul 2024 10:52:51 +0200 Subject: [PATCH 03/15] Add tests for ucf101 dataset --- .../flwr_datasets/federated_dataset_test.py | 23 ++++++++++++++++++- datasets/flwr_datasets/mock_utils_test.py | 20 ++++++++++++++++ datasets/flwr_datasets/utils.py | 1 + 3 files changed, 43 insertions(+), 1 deletion(-) diff --git a/datasets/flwr_datasets/federated_dataset_test.py b/datasets/flwr_datasets/federated_dataset_test.py index b66874cb34e2..aa6075d9e61a 100644 --- a/datasets/flwr_datasets/federated_dataset_test.py +++ b/datasets/flwr_datasets/federated_dataset_test.py @@ -380,15 +380,18 @@ def test_mixed_type_partitioners_creates_from_int(self) -> None: ) -mocked_natural_id_datasets = [ +natural_id_datasets = [ "flwrlabs/femnist", ] +mocked_natural_id_datasets = ["flwrlabs/ucf101"] + @parameterized_class( ("dataset_name", "test_split", "subset", "partition_by"), [ ("flwrlabs/femnist", "", "", "writer_id"), + ("flwrlabs/ucf101", "test", "", "video_id"), ], ) class NaturalIdPartitionerIntegrationTest(unittest.TestCase): @@ -399,6 +402,24 @@ class NaturalIdPartitionerIntegrationTest(unittest.TestCase): subset = "" partition_by = "" + def setUp(self) -> None: + """Mock the dataset download prior to each method if needed. + + If the `dataset_name` is in the `mocked_datasets` list, then the dataset + download is mocked. + """ + if self.dataset_name in mocked_natural_id_datasets: + self.patcher = patch("datasets.load_dataset") + self.mock_load_dataset = self.patcher.start() + self.mock_load_dataset.return_value = _load_mocked_dataset( + self.dataset_name, [20, 10], ["train", self.test_split], self.subset + ) + + def tearDown(self) -> None: + """Clean up after the dataset mocking.""" + if self.dataset_name in mocked_datasets: + patch.stopall() + def test_if_the_partitions_have_unique_values(self) -> None: """Test if each partition has a single unique id value.""" fds = FederatedDataset( diff --git a/datasets/flwr_datasets/mock_utils_test.py b/datasets/flwr_datasets/mock_utils_test.py index bd49de8033de..f31fe17c2543 100644 --- a/datasets/flwr_datasets/mock_utils_test.py +++ b/datasets/flwr_datasets/mock_utils_test.py @@ -355,11 +355,31 @@ def _mock_speach_commands(num_rows: int) -> Dataset: return dataset +def _mock_ucf101(num_rows: int) -> Dataset: + imgs = _generate_random_image_column(num_rows, (320, 240, 3), "JPEG") + unique_video_id = ["0", "1", "2", "3", "4"] + unique_labels = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"] + label = _generate_artificial_categories(num_rows, unique_labels) + video_id = _generate_artificial_categories(num_rows, unique_video_id) + features = Features( + { + "image": datasets.Image(decode=True), + "video_id": Value(dtype="string"), + "label": ClassLabel(names=unique_labels), + } + ) + dataset = datasets.Dataset.from_dict( + {"image": imgs, "video_id": video_id, "label": label}, features=features + ) + return dataset + + dataset_name_to_mock_function = { "cifar100": _mock_cifar100, "sentiment140": _mock_sentiment140, "svhn_cropped_digits": _mock_svhn_cropped_digits, "speech_commands_v0.01": _mock_speach_commands, + "flwrlabs/ucf101": _mock_ucf101, } diff --git a/datasets/flwr_datasets/utils.py b/datasets/flwr_datasets/utils.py index 746dc85478f1..282a33ed10a0 100644 --- a/datasets/flwr_datasets/utils.py +++ b/datasets/flwr_datasets/utils.py @@ -35,6 +35,7 @@ "sentiment140", "speech_commands", "flwrlabs/femnist", + "flwrlabs/ucf101", ] From b8a8387047de0e9d58092d95b07c71eca253ac7d Mon Sep 17 00:00:00 2001 From: Adam Narozniak Date: Thu, 18 Jul 2024 10:57:22 +0200 Subject: [PATCH 04/15] Add tests for ambient acoustic context dataset --- datasets/flwr_datasets/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/datasets/flwr_datasets/utils.py b/datasets/flwr_datasets/utils.py index 282a33ed10a0..ab089a255d56 100644 --- a/datasets/flwr_datasets/utils.py +++ b/datasets/flwr_datasets/utils.py @@ -36,6 +36,7 @@ "speech_commands", "flwrlabs/femnist", "flwrlabs/ucf101", + "flwrlabs/ambient-acoustic-context", # Feature wise it's just like speech_commands ] From 9e46f0c8dd405c1e4b8325e68deff1af3ecc1f37 Mon Sep 17 00:00:00 2001 From: Adam Narozniak Date: Thu, 18 Jul 2024 10:59:12 +0200 Subject: [PATCH 05/15] Add LIUM/tedlium as tested --- datasets/flwr_datasets/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/datasets/flwr_datasets/utils.py b/datasets/flwr_datasets/utils.py index ab089a255d56..c4a0462416ac 100644 --- a/datasets/flwr_datasets/utils.py +++ b/datasets/flwr_datasets/utils.py @@ -37,6 +37,7 @@ "flwrlabs/femnist", "flwrlabs/ucf101", "flwrlabs/ambient-acoustic-context", # Feature wise it's just like speech_commands + "LIUM/tedlium", # Feature wise it's just like speech_commands ] From e9498a3bf0d707fdaf1ca713b7dbd2a88d8ba0f6 Mon Sep 17 00:00:00 2001 From: Adam Narozniak Date: Thu, 18 Jul 2024 11:07:35 +0200 Subject: [PATCH 06/15] Update the tested dataset list --- datasets/flwr_datasets/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/datasets/flwr_datasets/utils.py b/datasets/flwr_datasets/utils.py index c4a0462416ac..ab089a255d56 100644 --- a/datasets/flwr_datasets/utils.py +++ b/datasets/flwr_datasets/utils.py @@ -37,7 +37,6 @@ "flwrlabs/femnist", "flwrlabs/ucf101", "flwrlabs/ambient-acoustic-context", # Feature wise it's just like speech_commands - "LIUM/tedlium", # Feature wise it's just like speech_commands ] From c4aafea2b46f1e7872ff9ddbc92ab43faeb6206e Mon Sep 17 00:00:00 2001 From: Adam Narozniak Date: Fri, 19 Jul 2024 11:32:20 +0200 Subject: [PATCH 07/15] Fix formatting --- datasets/flwr_datasets/federated_dataset_test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/datasets/flwr_datasets/federated_dataset_test.py b/datasets/flwr_datasets/federated_dataset_test.py index f1a84625385d..bf6315bc6a90 100644 --- a/datasets/flwr_datasets/federated_dataset_test.py +++ b/datasets/flwr_datasets/federated_dataset_test.py @@ -408,7 +408,6 @@ def test_mixed_type_partitioners_creates_from_int(self) -> None: mocked_natural_id_datasets = ["flwrlabs/ucf101"] - @parameterized_class( ("dataset_name", "test_split", "subset", "partition_by"), [ From 915a6f38cd4d1ef77d09e6c1c217a99fbe1dc82f Mon Sep 17 00:00:00 2001 From: Adam Narozniak Date: Sat, 20 Jul 2024 13:51:20 +0200 Subject: [PATCH 08/15] Add function to perform partial download of dataset for tests --- datasets/flwr_datasets/mock_utils_test.py | 46 ++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/datasets/flwr_datasets/mock_utils_test.py b/datasets/flwr_datasets/mock_utils_test.py index bd49de8033de..fe118ff19319 100644 --- a/datasets/flwr_datasets/mock_utils_test.py +++ b/datasets/flwr_datasets/mock_utils_test.py @@ -19,7 +19,7 @@ import random import string from datetime import datetime, timedelta -from typing import Any, Dict, List, Set, Tuple, Union +from typing import Any, Dict, List, Optional, Set, Tuple, Union import numpy as np from PIL import Image @@ -375,3 +375,47 @@ def _load_mocked_dataset( for params in zip(num_rows, split_names): dataset_dict[params[1]] = dataset_creation_fnc(params[0]) return datasets.DatasetDict(dataset_dict) + + +def _download_partial_dataset( + dataset_name: str, + split_name: str, + skip_take_list: List[Tuple[int, int]], + subset_name: Optional[str] = None, +) -> Dataset: + """Download a partial dataset. + + This functionality is not supported in the datasets library. This is an informal + way of achieving this by using the `streaming=True` and creating a dataset.Dataset + from in-memory objects. + + Parameters + ---------- + dataset_name: str + Name of the dataset (passed to load_dataset). + split_name: str + Name of the split (passed to load_dataset) e.g. "train". + skip_take_list: List[Tuple[int, int]] + The streaming mode has a specific type of accessing the data, the first tuple + value is how many samples to skip, the second is how many samples to take. Due + to this mechanism, diverse samples can be taken (especially if the dataset is + sorted by the natual_id for NaturalIdPartitioner). + subset_name: Optional[str] + Name of the subset (passed to load_dataset) e.g. "v0.01" for speech_commands. + + Returns + ------- + dataset: Dataset + The dataset with the requested samples. + """ + dataset = datasets.load_dataset( + dataset_name, name=subset_name, split=split_name, streaming=True + ) + dataset_list = [] + # It's a list of dict such that each dict represent a single sample of the dataset + # The sample is exactly the same as if the full dataset was downloaded and indexed + for skip, take in skip_take_list: + # dataset.skip(n).take(m) in streaming mode is equivalent (in terms of return) + # to the fully downloaded dataset index: dataset[n+1: (n+1 + m)] + dataset_list.extend(list(dataset.skip(skip).take(take))) + return Dataset.from_list(dataset_list) From e81680d1cbed28b892041142bc75fe0ec67dcec4 Mon Sep 17 00:00:00 2001 From: Adam Narozniak Date: Sat, 20 Jul 2024 15:26:03 +0200 Subject: [PATCH 09/15] Add a function that handles multiple splits --- datasets/flwr_datasets/mock_utils_test.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/datasets/flwr_datasets/mock_utils_test.py b/datasets/flwr_datasets/mock_utils_test.py index fe118ff19319..93e771b5bb60 100644 --- a/datasets/flwr_datasets/mock_utils_test.py +++ b/datasets/flwr_datasets/mock_utils_test.py @@ -377,7 +377,7 @@ def _load_mocked_dataset( return datasets.DatasetDict(dataset_dict) -def _download_partial_dataset( +def _load_mocked_dataset_by_partial_download( dataset_name: str, split_name: str, skip_take_list: List[Tuple[int, int]], @@ -419,3 +419,18 @@ def _download_partial_dataset( # to the fully downloaded dataset index: dataset[n+1: (n+1 + m)] dataset_list.extend(list(dataset.skip(skip).take(take))) return Dataset.from_list(dataset_list) + + +def _load_mocked_dataset_dict_by_partial_download( + dataset_name: str, + split_names: List[str], + skip_take_lists: List[List[Tuple[int, int]]], + subset_name: Optional[str] = None, +) -> DatasetDict: + """Like _load_mocked_dataset_by_partial_download but for many splits.""" + dataset_dict = {} + for split_name, skip_take_list in zip(split_names, skip_take_lists): + dataset_dict[split_name] = _load_mocked_dataset_by_partial_download( + dataset_name, split_name, skip_take_list, subset_name + ) + return DatasetDict(dataset_dict) From 2e955c98ebe6c4be3cf50d81a8368bdde7e309af Mon Sep 17 00:00:00 2001 From: Adam Narozniak Date: Mon, 22 Jul 2024 10:30:01 +0200 Subject: [PATCH 10/15] Mock ucf101 by partial download --- .../flwr_datasets/federated_dataset_test.py | 35 +++++++++++++++---- 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/datasets/flwr_datasets/federated_dataset_test.py b/datasets/flwr_datasets/federated_dataset_test.py index 6dc685df4c40..87cb541a430e 100644 --- a/datasets/flwr_datasets/federated_dataset_test.py +++ b/datasets/flwr_datasets/federated_dataset_test.py @@ -27,7 +27,10 @@ import datasets from datasets import Dataset, DatasetDict, concatenate_datasets from flwr_datasets.federated_dataset import FederatedDataset -from flwr_datasets.mock_utils_test import _load_mocked_dataset +from flwr_datasets.mock_utils_test import ( + _load_mocked_dataset, + _load_mocked_dataset_dict_by_partial_download, +) from flwr_datasets.partitioner import IidPartitioner, NaturalIdPartitioner, Partitioner mocked_datasets = ["cifar100", "svhn", "sentiment140", "speech_commands"] @@ -411,7 +414,7 @@ def test_mixed_type_partitioners_creates_from_int(self) -> None: ("dataset_name", "test_split", "subset", "partition_by"), [ ("flwrlabs/femnist", "", "", "writer_id"), - ("flwrlabs/ucf101", "test", "", "video_id"), + ("flwrlabs/ucf101", "test", None, "video_id"), ], ) class NaturalIdPartitionerIntegrationTest(unittest.TestCase): @@ -429,15 +432,19 @@ def setUp(self) -> None: download is mocked. """ if self.dataset_name in mocked_natural_id_datasets: + mock_return_value = _load_mocked_dataset_dict_by_partial_download( + dataset_name=self.dataset_name, + split_names=["train"], + skip_take_lists=[[(0, 30), (1000, 30), (2000, 40)]], + subset_name=self.subset, + ) self.patcher = patch("datasets.load_dataset") self.mock_load_dataset = self.patcher.start() - self.mock_load_dataset.return_value = _load_mocked_dataset( - self.dataset_name, [20, 10], ["train", self.test_split], self.subset - ) + self.mock_load_dataset.return_value = mock_return_value def tearDown(self) -> None: """Clean up after the dataset mocking.""" - if self.dataset_name in mocked_datasets: + if self.dataset_name in mocked_natural_id_datasets: patch.stopall() def test_if_the_partitions_have_unique_values(self) -> None: @@ -453,6 +460,22 @@ def test_if_the_partitions_have_unique_values(self) -> None: unique_ids_in_partition = list(set(partition[self.partition_by])) self.assertEqual(len(unique_ids_in_partition), 1) + def tests_if_the_columns_are_unchanged(self) -> None: + """Test if the columns are unchanged after partitioning.""" + fds = FederatedDataset( + dataset=self.dataset_name, + partitioners={ + "train": NaturalIdPartitioner(partition_by=self.partition_by) + }, + ) + dataset = fds.load_split("train") + columns_in_dataset = set(dataset.column_names) + + for partition_id in range(fds.partitioners["train"].num_partitions): + partition = fds.load_partition(partition_id) + columns_in_partition = set(partition.column_names) + self.assertEqual(columns_in_partition, columns_in_dataset) + class IncorrectUsageFederatedDatasets(unittest.TestCase): """Test incorrect usages in FederatedDatasets.""" From 303da2ed50f63efaf3cf773e4813b37c73abb14e Mon Sep 17 00:00:00 2001 From: Adam Narozniak Date: Mon, 22 Jul 2024 10:54:39 +0200 Subject: [PATCH 11/15] Mock ambient-acoustic-context by partial download --- datasets/flwr_datasets/federated_dataset_test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/datasets/flwr_datasets/federated_dataset_test.py b/datasets/flwr_datasets/federated_dataset_test.py index 87cb541a430e..ba547efcf4d1 100644 --- a/datasets/flwr_datasets/federated_dataset_test.py +++ b/datasets/flwr_datasets/federated_dataset_test.py @@ -415,6 +415,7 @@ def test_mixed_type_partitioners_creates_from_int(self) -> None: [ ("flwrlabs/femnist", "", "", "writer_id"), ("flwrlabs/ucf101", "test", None, "video_id"), + ("flwrlabs/ambient-acoustic-context", "", None, "speaker_id"), ], ) class NaturalIdPartitionerIntegrationTest(unittest.TestCase): From bb538a1b767a2d19c12e9a0c6ce962beca35e467 Mon Sep 17 00:00:00 2001 From: Adam Narozniak Date: Mon, 22 Jul 2024 13:11:33 +0200 Subject: [PATCH 12/15] Update tests --- datasets/flwr_datasets/federated_dataset_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datasets/flwr_datasets/federated_dataset_test.py b/datasets/flwr_datasets/federated_dataset_test.py index ba547efcf4d1..ccd962dbd456 100644 --- a/datasets/flwr_datasets/federated_dataset_test.py +++ b/datasets/flwr_datasets/federated_dataset_test.py @@ -407,7 +407,7 @@ def test_mixed_type_partitioners_creates_from_int(self) -> None: "flwrlabs/femnist", ] -mocked_natural_id_datasets = ["flwrlabs/ucf101"] +mocked_natural_id_datasets = ["flwrlabs/ucf101", "flwrlabs/ambient-acoustic-context"] @parameterized_class( From e9f4eb8ddd913f8bce92420ee4f3a1982eb914b5 Mon Sep 17 00:00:00 2001 From: Adam Narozniak Date: Mon, 22 Jul 2024 13:14:09 +0200 Subject: [PATCH 13/15] Update tests for ted-lium --- datasets/flwr_datasets/federated_dataset_test.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/datasets/flwr_datasets/federated_dataset_test.py b/datasets/flwr_datasets/federated_dataset_test.py index ccd962dbd456..fac16b49891a 100644 --- a/datasets/flwr_datasets/federated_dataset_test.py +++ b/datasets/flwr_datasets/federated_dataset_test.py @@ -407,7 +407,11 @@ def test_mixed_type_partitioners_creates_from_int(self) -> None: "flwrlabs/femnist", ] -mocked_natural_id_datasets = ["flwrlabs/ucf101", "flwrlabs/ambient-acoustic-context"] +mocked_natural_id_datasets = [ + "flwrlabs/ucf101", + "flwrlabs/ambient-acoustic-context", + "LIUM/tedlium", +] @parameterized_class( @@ -416,6 +420,7 @@ def test_mixed_type_partitioners_creates_from_int(self) -> None: ("flwrlabs/femnist", "", "", "writer_id"), ("flwrlabs/ucf101", "test", None, "video_id"), ("flwrlabs/ambient-acoustic-context", "", None, "speaker_id"), + ("LIUM/tedlium", "test", "release3", "speaker_id"), ], ) class NaturalIdPartitionerIntegrationTest(unittest.TestCase): From a3cbf9aa60ed3eaea2755991ec635d70ee46171e Mon Sep 17 00:00:00 2001 From: Adam Narozniak Date: Mon, 22 Jul 2024 13:15:27 +0200 Subject: [PATCH 14/15] Update list --- datasets/flwr_datasets/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datasets/flwr_datasets/utils.py b/datasets/flwr_datasets/utils.py index f2ae25eb81d7..1740c66657ef 100644 --- a/datasets/flwr_datasets/utils.py +++ b/datasets/flwr_datasets/utils.py @@ -34,7 +34,7 @@ "svhn", "sentiment140", "speech_commands", - "LIUM/tedlium", # Feature wise it's just like speech_commands + "LIUM/tedlium", "flwrlabs/femnist", "flwrlabs/ucf101", "flwrlabs/ambient-acoustic-context", From 8376d3896f53ce8dc5922e359bdf1289ed73f13a Mon Sep 17 00:00:00 2001 From: Adam Narozniak Date: Mon, 22 Jul 2024 16:36:05 +0200 Subject: [PATCH 15/15] Remove unused code --- datasets/flwr_datasets/mock_utils_test.py | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/datasets/flwr_datasets/mock_utils_test.py b/datasets/flwr_datasets/mock_utils_test.py index 0d62fca10425..7ee3bae890ff 100644 --- a/datasets/flwr_datasets/mock_utils_test.py +++ b/datasets/flwr_datasets/mock_utils_test.py @@ -355,31 +355,11 @@ def _mock_speach_commands(num_rows: int) -> Dataset: return dataset -def _mock_ucf101(num_rows: int) -> Dataset: - imgs = _generate_random_image_column(num_rows, (320, 240, 3), "JPEG") - unique_video_id = ["0", "1", "2", "3", "4"] - unique_labels = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"] - label = _generate_artificial_categories(num_rows, unique_labels) - video_id = _generate_artificial_categories(num_rows, unique_video_id) - features = Features( - { - "image": datasets.Image(decode=True), - "video_id": Value(dtype="string"), - "label": ClassLabel(names=unique_labels), - } - ) - dataset = datasets.Dataset.from_dict( - {"image": imgs, "video_id": video_id, "label": label}, features=features - ) - return dataset - - dataset_name_to_mock_function = { "cifar100": _mock_cifar100, "sentiment140": _mock_sentiment140, "svhn_cropped_digits": _mock_svhn_cropped_digits, "speech_commands_v0.01": _mock_speach_commands, - "flwrlabs/ucf101": _mock_ucf101, }