aidecentralized · tremblerz · Dec 9, 2024 · Oct 19, 2024 · Oct 22, 2024 · Oct 22, 2024
diff --git a/.github/workflows/train.yml b/.github/workflows/train.yml
@@ -45,12 +45,10 @@ jobs:
       - name: Run test
         run: |
           cd src
-          # chmod +x ./configs/algo_config_test.py
-
           echo "starting main grpc"
-          python main_grpc.py -n 4 -host localhost 
+          python main_grpc.py -n 4 -host localhost -dev True
           echo "starting main"
-          python main.py -super true -s "./configs/sys_config_test.py"
+          python main.py -b "./configs/algo_config_test.py" -s "./configs/sys_config_test.py" -super true
           echo "done"
 
       # further checks:

diff --git a/src/algos/base_class.py b/src/algos/base_class.py
@@ -265,8 +265,8 @@ def set_shared_exp_parameters(self, config: Dict[str, ConfigType]) -> None:
                 )
             else:
                 raise ValueError(f"Unknown community type: {community_type}.")
-        if self.node_id == 0:
-            self.log_utils.log_console(f"Communities: {self.communities}")
+        # if self.node_id == 0:
+        #     self.log_utils.log_console(f"Communities: {self.communities}")
 
     def local_round_done(self) -> None:
         self.round += 1
@@ -686,6 +686,14 @@ def is_same_dest(dset):
             if self.dset.startswith("domainnet"):
                 test_dset = CacheDataset(test_dset)
 
+            # reduce test_dset size
+            if config.get("test_samples_per_user", 0) != 0:
+                print(f"Reducing test size to {config.get('test_samples_per_user', 0)}")
+                reduced_test_size = config.get("test_samples_per_user", 0)
+                indices = np.random.choice(len(test_dset), reduced_test_size, replace=False)
+                test_dset = Subset(test_dset, indices)
+            print(f"test_dset size: {len(test_dset)}")
+
             self._test_loader = DataLoader(test_dset, batch_size=batch_size)
             # TODO: fix print_data_summary
             # self.print_data_summary(train_dset, test_dset, val_dset=val_dset)

diff --git a/src/algos/fl_static.py b/src/algos/fl_static.py
@@ -9,6 +9,7 @@
 
 from algos.base_class import BaseFedAvgClient
 from algos.topologies.collections import select_topology
+from utils.data_utils import get_dataset
 
 class FedStaticNode(BaseFedAvgClient):
     """
@@ -71,7 +72,19 @@ class FedStaticServer(BaseFedAvgClient):
     def __init__(
         self, config: Dict[str, Any], comm_utils: CommunicationManager
     ) -> None:
-        pass
+        self.comm_utils = comm_utils
+        self.node_id = self.comm_utils.get_rank()
+        self.comm_utils.register_node(self)
+        self.is_working = True
+        if isinstance(config["dset"], dict):
+            if self.node_id != 0:
+                config["dset"].pop("0") # type: ignore
+            self.dset = str(config["dset"][str(self.node_id)]) # type: ignore
+            config["dpath"] = config["dpath"][self.dset]
+        else:
+            self.dset = config["dset"]
+        print(f"Node {self.node_id} getting dset at {self.dset}")
+        self.dset_obj = get_dataset(self.dset, dpath=config["dpath"])
 
     def run_protocol(self) -> None:
         pass
diff --git a/src/configs/algo_config_test.py b/src/configs/algo_config_test.py
@@ -1,16 +1,16 @@
 from utils.types import ConfigType
 
-# fedstatic: ConfigType = {
-#     # Collaboration setup
-#     "algo": "fedstatic",
-#     "topology": {"name": "watts_strogatz", "k": 3, "p": 0.2}, # type: ignore
-#     "rounds": 1,
+fedstatic: ConfigType = {
+    # Collaboration setup
+    "algo": "fedstatic",
+    "topology": {"name": "watts_strogatz", "k": 3, "p": 0.2}, # type: ignore
+    "rounds": 1,
 
-#     # Model parameters
-#     "model": "resnet10",
-#     "model_lr": 3e-4,
-#     "batch_size": 256,
-# }
+    # Model parameters
+    "model": "resnet10",
+    "model_lr": 3e-4,
+    "batch_size": 256,
+}
 
 traditional_fl: ConfigType = {
     # Collaboration setup

diff --git a/src/configs/sys_config.py b/src/configs/sys_config.py
@@ -158,7 +158,6 @@ def get_digit_five_support(num_users: int, domains: List[str] = DIGIT_FIVE):
 CIAR10_DPATH = "./datasets/imgs/cifar10/"
 
 NUM_COLLABORATORS = 1
-# DUMP_DIR = "../../../../../../../home/"
 DUMP_DIR = "/tmp/"
 
 num_users = 3
@@ -391,4 +390,3 @@ def get_digit_five_support(num_users: int, domains: List[str] = DIGIT_FIVE):
 
 current_config = grpc_system_config
 # current_config = mpi_system_config
-
diff --git a/src/configs/sys_config_test.py b/src/configs/sys_config_test.py
@@ -3,7 +3,8 @@
 from utils.types import ConfigType
 
 from .algo_config_test import (
-    traditional_fl
+    traditional_fl,
+    fedstatic
 )
 
 def get_device_ids(num_users: int, gpus_available: List[int | Literal["cpu"]]) -> Dict[str, List[int | Literal["cpu"]]]:
@@ -80,7 +81,6 @@ def get_algo_configs(
 CIFAR10_DSET = "cifar10"
 CIAR10_DPATH = "./datasets/imgs/cifar10/"
 
-# DUMP_DIR = "../../../../../../../home/"
 DUMP_DIR = "/tmp/"
 
 NUM_COLLABORATORS = 1
@@ -112,7 +112,7 @@ def get_algo_configs(
     "seed": 2,
     "device_ids": get_device_ids(num_users, gpu_ids),
     # "algos": get_algo_configs(num_users=num_users, algo_configs=default_config_list),  # type: ignore
-    "algos": get_algo_configs(num_users=num_users, algo_configs=[traditional_fl]),  # type: ignore
+    "algos": get_algo_configs(num_users=num_users, algo_configs=[fedstatic]),  # type: ignore
     # "samples_per_user": 50000 // num_users,  # distributed equally
     "samples_per_user": 100,
     "train_label_distribution": "non_iid",

diff --git a/src/main_grpc.py b/src/main_grpc.py
@@ -23,10 +23,19 @@
     help=f"host address of the nodes",
 )
 
+parser.add_argument(
+    "-dev",
+    nargs="?",
+    type=bool,
+    help=f"whether or not development testing",
+)
+
 args : argparse.Namespace = parser.parse_args()
 
 # Command for opening each process
 command_list: List[str] = ["python", "main.py", "-host", args.host]
+if args.dev == True:
+    command_list: List[str] = ["python", "main.py", "-b", "./configs/algo_config_test.py", "-s", "./configs/sys_config_test.py", "-host", args.host]
 
 # Start process for each user
 for i in range(args.n):

diff --git a/src/utils/communication/mpi.py b/src/utils/communication/mpi.py
@@ -48,7 +48,7 @@ def initialize(self):
     def send_quorum(self) -> Any:
         # return super().send_quorum(node_ids)
         pass
-
+    
     def register_self(self, obj: "BaseNode"):
         self.base_node = obj