From 53d37e2c68638635ac2666782fa5838b96215191 Mon Sep 17 00:00:00 2001 From: Adam Narozniak Date: Tue, 12 Sep 2023 15:15:50 +0200 Subject: [PATCH 1/8] Add PyTorch integration tests with FDS --- .../integration_tests/pytorch/pyproject.toml | 17 ++++ .../integration_tests/pytorch/pytorch_test.py | 80 +++++++++++++++++++ 2 files changed, 97 insertions(+) create mode 100644 datasets/integration_tests/pytorch/pyproject.toml create mode 100644 datasets/integration_tests/pytorch/pytorch_test.py diff --git a/datasets/integration_tests/pytorch/pyproject.toml b/datasets/integration_tests/pytorch/pyproject.toml new file mode 100644 index 000000000000..e081c43fd79a --- /dev/null +++ b/datasets/integration_tests/pytorch/pyproject.toml @@ -0,0 +1,17 @@ +[build-system] +requires = ["poetry-core>=1.4.0"] +build-backend = "poetry.core.masonry.api" + +[tool.poetry] +name = "fds-integration-pytorch" +version = "0.1.0" +description = "Flower Datasets with PyTorch" +authors = ["The Flower Authors "] + +[tool.poetry.dependencies] +python = "^3.8" +flwr = "^1.5" +torch = "^1.12.0" +torchvision = "^0.14.1" +flwr_datasets = { path = "./../../", develop = true, extras = ["vision"] } +parameterized = "==0.9.0" diff --git a/datasets/integration_tests/pytorch/pytorch_test.py b/datasets/integration_tests/pytorch/pytorch_test.py new file mode 100644 index 000000000000..318690a4dfa3 --- /dev/null +++ b/datasets/integration_tests/pytorch/pytorch_test.py @@ -0,0 +1,80 @@ +import unittest + +from datasets.utils.logging import disable_progress_bar +from parameterized import parameterized_class +from torch import Tensor +from torch.utils.data import DataLoader +from torchvision.transforms import Compose, ToTensor, Normalize + +from flwr_datasets import FederatedDataset + + +# Using parameterized testing, two different sets of parameters are specified: +# 1. CIFAR10 dataset with the simple ToTensor transform. +# 2. CIFAR10 dataset with a composed transform that first converts an image to a tensor +# and then normalizes it. +@parameterized_class( + [ + {"dataset_name": "cifar10", "test_split": "test", "transforms": ToTensor()}, + {"dataset_name": "cifar10", "test_split": "test", "transforms": Compose( + [ToTensor(), Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))] + )}, + ] +) +class FDSToPyTorchCorrectUsage(unittest.TestCase): + """Test the conversion from FDS to PyTorch Dataset and Dataloader.""" + + dataset_name = "" + test_split = "" + transforms = None + trainloader = None + expected_img_shape_after_transform = [3, 32, 32] + + @classmethod + def setUpClass(cls): + """Disable progress bar to keep the log clean. + """ + disable_progress_bar() + + def _create_trainloader(self, batch_size: int) -> DataLoader: + """Create a trainloader from the federated dataset.""" + partition_id = 0 + fds = FederatedDataset(dataset=self.dataset_name, partitioners={"train": 100}) + partition = fds.load_partition(partition_id, "train") + partition_train_test = partition.train_test_split(test_size=0.2) + partition_train_test = partition_train_test.map( + lambda img: {"img": self.transforms(img)}, input_columns="img" + ) + trainloader = DataLoader( + partition_train_test["train"].with_format("torch"), batch_size=batch_size, + shuffle=True + ) + return trainloader + + def test_create_partition_dataloader_with_transforms_shape(self)-> None: + """Test if the DataLoader returns batches with the expected shape.""" + batch_size = 16 + trainloader = self._create_trainloader(batch_size) + batch = next(iter(trainloader)) + images = batch["img"] + self.assertEqual(tuple(images.shape), + (batch_size, *self.expected_img_shape_after_transform)) + + def test_create_partition_dataloader_with_transforms_batch_type(self)-> None: + """Test if the DataLoader returns batches of type dictionary.""" + batch_size = 16 + trainloader = self._create_trainloader(batch_size) + batch = next(iter(trainloader)) + self.assertIsInstance(batch, dict) + + def test_create_partition_dataloader_with_transforms_data_type(self)-> None: + """Test to verify if the data in the DataLoader batches are of type Tensor.""" + batch_size = 16 + trainloader = self._create_trainloader(batch_size) + batch = next(iter(trainloader)) + images = batch["img"] + self.assertIsInstance(images, Tensor) + + +if __name__ == '__main__': + unittest.main() From a11fa4f3f02a2cd868bc2bf814ebde884bcbb45f Mon Sep 17 00:00:00 2001 From: Adam Narozniak Date: Fri, 15 Sep 2023 12:20:04 +0200 Subject: [PATCH 2/8] Add ML training to the tests --- .../integration_tests/pytorch/pytorch_test.py | 59 +++++++++++++++++-- 1 file changed, 55 insertions(+), 4 deletions(-) diff --git a/datasets/integration_tests/pytorch/pytorch_test.py b/datasets/integration_tests/pytorch/pytorch_test.py index 318690a4dfa3..5eeb1e86737f 100644 --- a/datasets/integration_tests/pytorch/pytorch_test.py +++ b/datasets/integration_tests/pytorch/pytorch_test.py @@ -1,14 +1,38 @@ import unittest from datasets.utils.logging import disable_progress_bar -from parameterized import parameterized_class +from parameterized import parameterized_class, parameterized +import torch +import torch.nn.functional as F from torch import Tensor from torch.utils.data import DataLoader +import torch.nn as nn +import torch.optim as optim from torchvision.transforms import Compose, ToTensor, Normalize from flwr_datasets import FederatedDataset +class SimpleCNN(nn.Module): + def __init__(self): + super(SimpleCNN, self).__init__() + self.conv1 = nn.Conv2d(3, 6, 5) + self.pool = nn.MaxPool2d(2, 2) + self.conv2 = nn.Conv2d(6, 16, 5) + self.fc1 = nn.Linear(16 * 5 * 5, 120) + self.fc2 = nn.Linear(120, 84) + self.fc3 = nn.Linear(84, 10) + + def forward(self, x): + x = self.pool(F.relu(self.conv1(x))) + x = self.pool(F.relu(self.conv2(x))) + x = x.view(-1, 16 * 5 * 5) + x = F.relu(self.fc1(x)) + x = F.relu(self.fc2(x)) + x = self.fc3(x) + return x + + # Using parameterized testing, two different sets of parameters are specified: # 1. CIFAR10 dataset with the simple ToTensor transform. # 2. CIFAR10 dataset with a composed transform that first converts an image to a tensor @@ -51,7 +75,7 @@ def _create_trainloader(self, batch_size: int) -> DataLoader: ) return trainloader - def test_create_partition_dataloader_with_transforms_shape(self)-> None: + def test_create_partition_dataloader_with_transforms_shape(self) -> None: """Test if the DataLoader returns batches with the expected shape.""" batch_size = 16 trainloader = self._create_trainloader(batch_size) @@ -60,14 +84,14 @@ def test_create_partition_dataloader_with_transforms_shape(self)-> None: self.assertEqual(tuple(images.shape), (batch_size, *self.expected_img_shape_after_transform)) - def test_create_partition_dataloader_with_transforms_batch_type(self)-> None: + def test_create_partition_dataloader_with_transforms_batch_type(self) -> None: """Test if the DataLoader returns batches of type dictionary.""" batch_size = 16 trainloader = self._create_trainloader(batch_size) batch = next(iter(trainloader)) self.assertIsInstance(batch, dict) - def test_create_partition_dataloader_with_transforms_data_type(self)-> None: + def test_create_partition_dataloader_with_transforms_data_type(self) -> None: """Test to verify if the data in the DataLoader batches are of type Tensor.""" batch_size = 16 trainloader = self._create_trainloader(batch_size) @@ -75,6 +99,33 @@ def test_create_partition_dataloader_with_transforms_data_type(self)-> None: images = batch["img"] self.assertIsInstance(images, Tensor) + @parameterized.expand([ + ("not_nan", torch.isnan), + ("not_inf", torch.isinf), + ]) + def test_train_model_loss_value(self, name, condition_func): + """Test if the model trains and if the loss is a correct number.""" + trainloader = self._create_trainloader(16) + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + + # Create the model, criterion, and optimizer + net = SimpleCNN().to(device) + criterion = nn.CrossEntropyLoss() + optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) + + # Training loop for one epoch + net.train() + loss = None + for i, data in enumerate(trainloader, 0): + inputs, labels = data['img'].to(device), data['label'].to(device) + optimizer.zero_grad() + outputs = net(inputs) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + + self.assertFalse(condition_func(loss).item()) + if __name__ == '__main__': unittest.main() From bb412829585de98d22fafaccfbff5f13278c4ce8 Mon Sep 17 00:00:00 2001 From: Adam Narozniak Date: Fri, 15 Sep 2023 12:25:59 +0200 Subject: [PATCH 3/8] Reformat --- datasets/integration_tests/pytorch/pytorch_test.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/datasets/integration_tests/pytorch/pytorch_test.py b/datasets/integration_tests/pytorch/pytorch_test.py index 5eeb1e86737f..1be2234670bc 100644 --- a/datasets/integration_tests/pytorch/pytorch_test.py +++ b/datasets/integration_tests/pytorch/pytorch_test.py @@ -1,13 +1,13 @@ import unittest -from datasets.utils.logging import disable_progress_bar -from parameterized import parameterized_class, parameterized import torch +import torch.nn as nn import torch.nn.functional as F +import torch.optim as optim +from datasets.utils.logging import disable_progress_bar +from parameterized import parameterized_class, parameterized from torch import Tensor from torch.utils.data import DataLoader -import torch.nn as nn -import torch.optim as optim from torchvision.transforms import Compose, ToTensor, Normalize from flwr_datasets import FederatedDataset From 7ab43cce42a404a46fb1920382f30e798662c530 Mon Sep 17 00:00:00 2001 From: Adam Narozniak Date: Wed, 20 Sep 2023 11:28:36 +0200 Subject: [PATCH 4/8] Rename integration_tests to e2e --- datasets/{integration_tests => e2e}/pytorch/pyproject.toml | 0 datasets/{integration_tests => e2e}/pytorch/pytorch_test.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename datasets/{integration_tests => e2e}/pytorch/pyproject.toml (100%) rename datasets/{integration_tests => e2e}/pytorch/pytorch_test.py (100%) diff --git a/datasets/integration_tests/pytorch/pyproject.toml b/datasets/e2e/pytorch/pyproject.toml similarity index 100% rename from datasets/integration_tests/pytorch/pyproject.toml rename to datasets/e2e/pytorch/pyproject.toml diff --git a/datasets/integration_tests/pytorch/pytorch_test.py b/datasets/e2e/pytorch/pytorch_test.py similarity index 100% rename from datasets/integration_tests/pytorch/pytorch_test.py rename to datasets/e2e/pytorch/pytorch_test.py From 7efa66fcd87a7567978e165764253bf884284243 Mon Sep 17 00:00:00 2001 From: Adam Narozniak Date: Wed, 20 Sep 2023 11:31:11 +0200 Subject: [PATCH 5/8] Fix the Flower Dataset library name in the pyproject.toml --- datasets/e2e/pytorch/pyproject.toml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/datasets/e2e/pytorch/pyproject.toml b/datasets/e2e/pytorch/pyproject.toml index e081c43fd79a..6a0decddd8c9 100644 --- a/datasets/e2e/pytorch/pyproject.toml +++ b/datasets/e2e/pytorch/pyproject.toml @@ -10,8 +10,7 @@ authors = ["The Flower Authors "] [tool.poetry.dependencies] python = "^3.8" -flwr = "^1.5" +flwr-datasets = { path = "./../../", develop = true, extras = ["vision"] } torch = "^1.12.0" torchvision = "^0.14.1" -flwr_datasets = { path = "./../../", develop = true, extras = ["vision"] } parameterized = "==0.9.0" From 589d880798cb8009af9ab28026ebb2ac975c0230 Mon Sep 17 00:00:00 2001 From: Adam Narozniak Date: Wed, 20 Sep 2023 11:31:21 +0200 Subject: [PATCH 6/8] Fix test class name --- datasets/e2e/pytorch/pytorch_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datasets/e2e/pytorch/pytorch_test.py b/datasets/e2e/pytorch/pytorch_test.py index 1be2234670bc..5bac8f770f23 100644 --- a/datasets/e2e/pytorch/pytorch_test.py +++ b/datasets/e2e/pytorch/pytorch_test.py @@ -45,7 +45,7 @@ def forward(self, x): )}, ] ) -class FDSToPyTorchCorrectUsage(unittest.TestCase): +class FdsToPyTorch(unittest.TestCase): """Test the conversion from FDS to PyTorch Dataset and Dataloader.""" dataset_name = "" From 07399c04e8b9fb3736306192c45124d523f0bc84 Mon Sep 17 00:00:00 2001 From: Adam Narozniak Date: Wed, 20 Sep 2023 11:38:44 +0200 Subject: [PATCH 7/8] Fix name in tool.poetry --- datasets/e2e/pytorch/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datasets/e2e/pytorch/pyproject.toml b/datasets/e2e/pytorch/pyproject.toml index 6a0decddd8c9..b0bb9da9a73a 100644 --- a/datasets/e2e/pytorch/pyproject.toml +++ b/datasets/e2e/pytorch/pyproject.toml @@ -3,7 +3,7 @@ requires = ["poetry-core>=1.4.0"] build-backend = "poetry.core.masonry.api" [tool.poetry] -name = "fds-integration-pytorch" +name = "fds-e2e-pytorch" version = "0.1.0" description = "Flower Datasets with PyTorch" authors = ["The Flower Authors "] From 04001a69eb11f3acb796703ded8bf9100b2cf429 Mon Sep 17 00:00:00 2001 From: Adam Narozniak Date: Wed, 20 Sep 2023 12:42:33 +0200 Subject: [PATCH 8/8] Remove develop = true in pyproject.toml for flower-datasets --- datasets/e2e/pytorch/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datasets/e2e/pytorch/pyproject.toml b/datasets/e2e/pytorch/pyproject.toml index b0bb9da9a73a..4565cce9f828 100644 --- a/datasets/e2e/pytorch/pyproject.toml +++ b/datasets/e2e/pytorch/pyproject.toml @@ -10,7 +10,7 @@ authors = ["The Flower Authors "] [tool.poetry.dependencies] python = "^3.8" -flwr-datasets = { path = "./../../", develop = true, extras = ["vision"] } +flwr-datasets = { path = "./../../", extras = ["vision"] } torch = "^1.12.0" torchvision = "^0.14.1" parameterized = "==0.9.0"