Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Efficient automated GitHub testing #153

Merged
merged 25 commits into from
Dec 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
f0192d4
added MPI Communication class
kathrynle20 Oct 19, 2024
755fc07
added send thread, merged 2 classes
kathrynle20 Oct 22, 2024
d37c35b
improved comments
kathrynle20 Oct 22, 2024
4c6d85e
Merge branch 'aidecentralized:main' into main
kathrynle20 Oct 23, 2024
2f087e1
testing mpi, model weights not acquired
kathrynle20 Oct 28, 2024
464a674
mpi works, occassional deadlock issue
kathrynle20 Nov 3, 2024
71dd9e8
merged send and listener threads
kathrynle20 Nov 6, 2024
65fd527
Merge branch 'main' into main
kathrynle20 Nov 6, 2024
c8c7cc4
Merge branch 'aidecentralized:main' into main
kathrynle20 Nov 15, 2024
302a0f0
added super init to fl_static server
kathrynle20 Dec 2, 2024
627a60e
logging dataset loading
kathrynle20 Dec 2, 2024
a7b2adc
reduced test size during workflow testing
kathrynle20 Dec 4, 2024
06c9d43
workflow debugging
kathrynle20 Dec 4, 2024
3784b69
workflow debugging
kathrynle20 Dec 4, 2024
002b1f0
workflow run on push to main only
kathrynle20 Dec 4, 2024
f9ad68a
using requirements cpu
kathrynle20 Dec 4, 2024
e24a2f8
using test_samples_per_user to reduce test set
kathrynle20 Dec 5, 2024
c05efb8
download data in server
kathrynle20 Dec 5, 2024
0ed3c87
Merge branch 'fedstatic' into optimize-tests
kathrynle20 Dec 5, 2024
4163219
Merge pull request #1 from kathrynle20/optimize-tests
kathrynle20 Dec 5, 2024
668bb42
fedstatic works with testing
kathrynle20 Dec 5, 2024
e854c6e
Merge pull request #2 from kathrynle20/fedstatic
kathrynle20 Dec 5, 2024
0720cda
change dump_dir
kathrynle20 Dec 7, 2024
d4376d8
code cleanup
kathrynle20 Dec 9, 2024
76758f7
code cleanup
kathrynle20 Dec 9, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions .github/workflows/train.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,10 @@ jobs:
- name: Run test
run: |
cd src
# chmod +x ./configs/algo_config_test.py

echo "starting main grpc"
python main_grpc.py -n 4 -host localhost
python main_grpc.py -n 4 -host localhost -dev True
echo "starting main"
python main.py -super true -s "./configs/sys_config_test.py"
python main.py -b "./configs/algo_config_test.py" -s "./configs/sys_config_test.py" -super true
echo "done"

# further checks:
Expand Down
12 changes: 10 additions & 2 deletions src/algos/base_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,8 +265,8 @@ def set_shared_exp_parameters(self, config: Dict[str, ConfigType]) -> None:
)
else:
raise ValueError(f"Unknown community type: {community_type}.")
if self.node_id == 0:
self.log_utils.log_console(f"Communities: {self.communities}")
# if self.node_id == 0:
# self.log_utils.log_console(f"Communities: {self.communities}")

def local_round_done(self) -> None:
self.round += 1
Expand Down Expand Up @@ -686,6 +686,14 @@ def is_same_dest(dset):
if self.dset.startswith("domainnet"):
test_dset = CacheDataset(test_dset)

# reduce test_dset size
if config.get("test_samples_per_user", 0) != 0:
print(f"Reducing test size to {config.get('test_samples_per_user', 0)}")
reduced_test_size = config.get("test_samples_per_user", 0)
indices = np.random.choice(len(test_dset), reduced_test_size, replace=False)
test_dset = Subset(test_dset, indices)
print(f"test_dset size: {len(test_dset)}")

self._test_loader = DataLoader(test_dset, batch_size=batch_size)
# TODO: fix print_data_summary
# self.print_data_summary(train_dset, test_dset, val_dset=val_dset)
Expand Down
15 changes: 14 additions & 1 deletion src/algos/fl_static.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from algos.base_class import BaseFedAvgClient
from algos.topologies.collections import select_topology
from utils.data_utils import get_dataset

class FedStaticNode(BaseFedAvgClient):
"""
Expand Down Expand Up @@ -71,7 +72,19 @@ class FedStaticServer(BaseFedAvgClient):
def __init__(
self, config: Dict[str, Any], comm_utils: CommunicationManager
) -> None:
pass
self.comm_utils = comm_utils
self.node_id = self.comm_utils.get_rank()
self.comm_utils.register_node(self)
self.is_working = True
if isinstance(config["dset"], dict):
if self.node_id != 0:
config["dset"].pop("0") # type: ignore
self.dset = str(config["dset"][str(self.node_id)]) # type: ignore
config["dpath"] = config["dpath"][self.dset]
else:
self.dset = config["dset"]
print(f"Node {self.node_id} getting dset at {self.dset}")
self.dset_obj = get_dataset(self.dset, dpath=config["dpath"])

def run_protocol(self) -> None:
pass
20 changes: 10 additions & 10 deletions src/configs/algo_config_test.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
from utils.types import ConfigType

# fedstatic: ConfigType = {
# # Collaboration setup
# "algo": "fedstatic",
# "topology": {"name": "watts_strogatz", "k": 3, "p": 0.2}, # type: ignore
# "rounds": 1,
fedstatic: ConfigType = {
# Collaboration setup
"algo": "fedstatic",
"topology": {"name": "watts_strogatz", "k": 3, "p": 0.2}, # type: ignore
"rounds": 1,

# # Model parameters
# "model": "resnet10",
# "model_lr": 3e-4,
# "batch_size": 256,
# }
# Model parameters
"model": "resnet10",
"model_lr": 3e-4,
"batch_size": 256,
}

traditional_fl: ConfigType = {
# Collaboration setup
Expand Down
2 changes: 0 additions & 2 deletions src/configs/sys_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,6 @@ def get_digit_five_support(num_users: int, domains: List[str] = DIGIT_FIVE):
CIAR10_DPATH = "./datasets/imgs/cifar10/"

NUM_COLLABORATORS = 1
# DUMP_DIR = "../../../../../../../home/"
DUMP_DIR = "/tmp/"

num_users = 3
Expand Down Expand Up @@ -391,4 +390,3 @@ def get_digit_five_support(num_users: int, domains: List[str] = DIGIT_FIVE):

current_config = grpc_system_config
# current_config = mpi_system_config

6 changes: 3 additions & 3 deletions src/configs/sys_config_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
from utils.types import ConfigType

from .algo_config_test import (
traditional_fl
traditional_fl,
fedstatic
)

def get_device_ids(num_users: int, gpus_available: List[int | Literal["cpu"]]) -> Dict[str, List[int | Literal["cpu"]]]:
Expand Down Expand Up @@ -80,7 +81,6 @@ def get_algo_configs(
CIFAR10_DSET = "cifar10"
CIAR10_DPATH = "./datasets/imgs/cifar10/"

# DUMP_DIR = "../../../../../../../home/"
DUMP_DIR = "/tmp/"

NUM_COLLABORATORS = 1
Expand Down Expand Up @@ -112,7 +112,7 @@ def get_algo_configs(
"seed": 2,
"device_ids": get_device_ids(num_users, gpu_ids),
# "algos": get_algo_configs(num_users=num_users, algo_configs=default_config_list), # type: ignore
"algos": get_algo_configs(num_users=num_users, algo_configs=[traditional_fl]), # type: ignore
"algos": get_algo_configs(num_users=num_users, algo_configs=[fedstatic]), # type: ignore
# "samples_per_user": 50000 // num_users, # distributed equally
"samples_per_user": 100,
"train_label_distribution": "non_iid",
Expand Down
9 changes: 9 additions & 0 deletions src/main_grpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,19 @@
help=f"host address of the nodes",
)

parser.add_argument(
"-dev",
nargs="?",
type=bool,
help=f"whether or not development testing",
)

args : argparse.Namespace = parser.parse_args()

# Command for opening each process
command_list: List[str] = ["python", "main.py", "-host", args.host]
if args.dev == True:
command_list: List[str] = ["python", "main.py", "-b", "./configs/algo_config_test.py", "-s", "./configs/sys_config_test.py", "-host", args.host]

# Start process for each user
for i in range(args.n):
Expand Down
2 changes: 1 addition & 1 deletion src/utils/communication/mpi.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def initialize(self):
def send_quorum(self) -> Any:
# return super().send_quorum(node_ids)
pass

def register_self(self, obj: "BaseNode"):
self.base_node = obj

Expand Down
Loading