Skip to content

Commit

Permalink
Object Detection Support (#28)
Browse files Browse the repository at this point in the history
* data loader for pascal dataset

* model for yolo

* added albumentations

* ignoring pascal images

* fixed data loaders

* added path for pascal dataset

* support for training for pascal dset

* settings for object detection and yolo model

* small fix

* adding yolo model to model utils

* exporting the model function

* export model

* changed num samples per user

* changed NUM_CLS to num_cls

* algos updated setting

* configs for yolo and pascal

* enabled support for pretrained yolo:

* support for testing for pascal

* modularize test and train

* modularized

* configs for yolo

* separated the configs for object detect

---------

Co-authored-by: jyuan24 <[email protected]>
  • Loading branch information
joyce-yuan and jyuan24 authored Aug 17, 2024
1 parent d5e98d0 commit 1bb595e
Show file tree
Hide file tree
Showing 11 changed files with 1,044 additions and 20 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ expt_dump/
expt_dump_old/
di_test/
imgs/
pascal/
data/
notes.txt
removeme*.png
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
albumentations==1.4.13
certifi==2024.2.2
charset-normalizer==3.3.2
contourpy==1.2.1
Expand Down
34 changes: 27 additions & 7 deletions src/algos/base_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
import torchvision.transforms as T
import os

from yolo import YOLOLoss

class BaseNode(ABC):
def __init__(self, config) -> None:
self.comm_utils = CommUtils()
Expand Down Expand Up @@ -89,7 +91,7 @@ def set_model_parameters(self, config):
optim = torch.optim.SGD
else:
raise ValueError("Unknown optimizer: {}.".format(optim_name))
num_classes = self.dset_obj.NUM_CLS
num_classes = self.dset_obj.num_cls
num_channels = self.dset_obj.num_channels
self.model = self.model_utils.get_model(
config["model"],
Expand All @@ -105,7 +107,10 @@ def set_model_parameters(self, config):
lr=config["model_lr"],
weight_decay=config.get("weight_decay", 0),
)
self.loss_fn = torch.nn.CrossEntropyLoss()
if config.get('dset') == "pascal":
self.loss_fn = YOLOLoss()
else:
self.loss_fn = torch.nn.CrossEntropyLoss()

def set_shared_exp_parameters(self, config):

Expand Down Expand Up @@ -218,6 +223,7 @@ def set_data_parameters(self, config):

samples_per_user = config["samples_per_user"]
batch_size = config["batch_size"]
print(f"samples per user: {samples_per_user}, batch size: {batch_size}")

# Support user specific dataset
if isinstance(config["dset"], dict):
Expand Down Expand Up @@ -268,7 +274,7 @@ def is_same_dest(dset):
cls_priors = []
dsets = list(config["dset"].values())
for _ in dsets:
n_cls = self.dset_obj.NUM_CLS
n_cls = self.dset_obj.num_cls
cls_priors.append(
np.random.dirichlet(
alpha=[alpha] * n_cls, size=len(users_with_same_dset)
Expand Down Expand Up @@ -360,7 +366,8 @@ def is_same_dest(dset):
test_dset = CacheDataset(test_dset)

self._test_loader = DataLoader(test_dset, batch_size=batch_size)
self.print_data_summary(train_dset, test_dset, val_dset=val_dset)
# TODO: fix print_data_summary
# self.print_data_summary(train_dset, test_dset, val_dset=val_dset)

def local_train(self, dataset, **kwargs):
"""
Expand All @@ -387,29 +394,42 @@ def print_data_summary(self, train_test, test_dset, val_dset=None):
"""
Print the data summary
"""

train_sample_per_class = {}
i = 0
for x, y in train_test:
train_sample_per_class[y] = train_sample_per_class.get(y, 0) + 1
print("train count: ", i)
i += 1

i = 0
if val_dset is not None:
val_sample_per_class = {}
for x, y in val_dset:
val_sample_per_class[y] = val_sample_per_class.get(y, 0) + 1

print("val count: ", i)
i += 1
i = 0
test_sample_per_class = {}
for x, y in test_dset:
test_sample_per_class[y] = test_sample_per_class.get(y, 0) + 1
print("test count: ", i)
i += 1

print("Node: {} data distribution summary".format(self.node_id))
print(type(train_sample_per_class.items()))
print(
"Train samples per class: {}".format(sorted(train_sample_per_class.items()))
)
print(
"Train samples per class: {}".format(len(train_sample_per_class.items()))
)
if val_dset is not None:
print(
"Val samples per class: {}".format(sorted(val_sample_per_class.items()))
"Val samples per class: {}".format(len(val_sample_per_class.items()))
)
print(
"Test samples per class: {}".format(sorted(test_sample_per_class.items()))
"Test samples per class: {}".format(len(test_sample_per_class.items()))
)


Expand Down
16 changes: 14 additions & 2 deletions src/configs/algo_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

iid_dispfl_clients_new = {
"algo": "dispfl",
"exp_id": 200,
"exp_id": 12,
"exp_type": "iid_dispfl",
"neighbors": 2,
"active_rate": 0.8,
Expand All @@ -28,4 +28,16 @@
"exp_keys": [],
}

current_config = traditional_fl
fedavg_object_detect = {
"algo": "fedavg",
"exp_id": "test_modular_yolo",
"exp_type": "test",
# Learning setup
"epochs": 10,
"model": "yolo",
"model_lr": 1e-5,
"batch_size": 8,
"exp_keys": [],
}

current_config = fedavg_object_detect
27 changes: 22 additions & 5 deletions src/configs/sys_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,34 @@
"dset": "cifar10",
"dump_dir": "./expt_dump/",
"dpath": "./datasets/imgs/cifar10/",
"seed": 2,
"seed": 31,
# node_0 is a server currently
# The device_ids dictionary depicts the GPUs on which the nodes reside.
# For a single-GPU environment, the config will look as follows (as it follows a 0-based indexing):
# "device_ids": {"node_0": [0], "node_1": [0],"node_2": [0], "node_3": [0]}
"device_ids": {"node_0": [5], "node_1": [5],"node_2": [5], "node_3": [2]},
"samples_per_user": 500, #TODO: To model scenarios where different users have different number of samples
"device_ids": {"node_0": [0], "node_1": [0],"node_2": [0], "node_3": [0]},
"samples_per_user": 1000, #TODO: To model scenarios where different users have different number of samples
# we need to make this a dictionary with user_id as key and number of samples as value
"train_label_distribution": "iid",
"test_label_distribution": "iid",
"folder_deletion_signal_path":"./expt_dump/folder_deletion.signal"
}

current_config = system_config
object_detect_system_config = {
"num_users": 1,
"experiment_path": "./experiments/",
"dset": "pascal",
"dump_dir": "./expt_dump/",
"dpath": "./datasets/pascal/VOCdevkit/VOC2012/",
"seed": 37,
# node_0 is a server currently
# The device_ids dictionary depicts the GPUs on which the nodes reside.
# For a single-GPU environment, the config will look as follows (as it follows a 0-based indexing):
"device_ids": {"node_0": [1], "node_1": [2]},
"samples_per_user": 100, #TODO: To model scenarios where different users have different number of samples
# we need to make this a dictionary with user_id as key and number of samples as value
"train_label_distribution": "iid",
"test_label_distribution": "iid",
"folder_deletion_signal_path":"./expt_dump/folder_deletion.signal"
}

current_config = object_detect_system_config
2 changes: 1 addition & 1 deletion src/data_loaders/cifar.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ class CIFAR10Dataset:
"""
def __init__(self, dpath: str, rot_angle: int = 0) -> None:
self.image_size = 32
self.NUM_CLS = 10
self.num_cls = 10
self.mean = np.array((0.4914, 0.4822, 0.4465))
self.std = np.array((0.2023, 0.1994, 0.2010))
self.num_channels = 3
Expand Down
Loading

0 comments on commit 1bb595e

Please sign in to comment.