fix(dataset) trailing-whitespace

Signed-off-by: Marchons <[email protected]>
kubeedge · Oct 2, 2024 · 8eaab3c · 8eaab3c
1 parent d09d8e8
commit 8eaab3c
Show file tree

Hide file tree

Showing 23 changed files with 504 additions and 104 deletions.
diff --git a/...econtroller/algorithm/paradigm/federated_learning/federated_class_incremental_learning.py b/...econtroller/algorithm/paradigm/federated_learning/federated_class_incremental_learning.py
@@ -289,7 +289,11 @@ def evaluation(self, testdataset_files, incremental_round):
         current_forget_rate = (
             max_acc_sum / len(old_class_acc_list) if incremental_round > 0 else 0.0
         )
+        tavk_avg_acc = self.system_metric_info[SystemMetricType.TASK_AVG_ACC.value][
+            "accuracy"
+        ]
         LOGGER.info(
-            f"for current round: {incremental_round} forget rate: {current_forget_rate} task avg acc: {self.system_metric_info[SystemMetricType.TASK_AVG_ACC.value]['accuracy']}"
+            f"for current round: {incremental_round} forget rate: {current_forget_rate}"
+            f"task avg acc: {tavk_avg_acc}"
         )
         return current_forget_rate
diff --git a/core/testcasecontroller/algorithm/paradigm/federated_learning/federated_learning.py b/core/testcasecontroller/algorithm/paradigm/federated_learning/federated_learning.py
@@ -103,6 +103,11 @@ def run(self):
         return test_res, self.system_metric_info
 
     def get_all_train_data(self):
+        """Get all train data for the paradigm of federated learning.
+
+        Returns:
+            list: train data list
+        """
         split_time = 1  # only one split ——all the data
         return self._split_dataset(split_time)
 

diff --git a/core/testcasecontroller/metrics/metrics.py b/core/testcasecontroller/metrics/metrics.py
@@ -39,8 +39,7 @@ def samples_transfer_ratio_func(system_metric_info: dict):
 
     """
 
-    info = system_metric_info.get(
-        SystemMetricType.SAMPLES_TRANSFER_RATIO.value)
+    info = system_metric_info.get(SystemMetricType.SAMPLES_TRANSFER_RATIO.value)
     inference_num = 0
     transfer_num = 0
     for inference_data, transfer_data in info:
@@ -53,8 +52,7 @@ def compute(key, matrix):
     """
     Compute BWT and FWT scores for a given matrix.
     """
-    print(
-        f"compute function: key={key}, matrix={matrix}, type(matrix)={type(matrix)}")
+    print(f"compute function: key={key}, matrix={matrix}, type(matrix)={type(matrix)}")
 
     length = len(matrix)
     accuracy = 0.0
@@ -63,7 +61,7 @@ def compute(key, matrix):
     flag = True
 
     for row in matrix:
-        if not isinstance(row, list) or len(row) != length-1:
+        if not isinstance(row, list) or len(row) != length - 1:
             flag = False
             break
 
@@ -72,30 +70,29 @@ def compute(key, matrix):
         fwt_score = np.nan
         return bwt_score, fwt_score
 
-    for i in range(length-1):
-        for j in range(length-1):
-            if 'accuracy' in matrix[i+1][j] and 'accuracy' in matrix[i][j]:
-                accuracy += matrix[i+1][j]['accuracy']
-                bwt_score += matrix[i+1][j]['accuracy'] - \
-                    matrix[i][j]['accuracy']
+    for i in range(length - 1):
+        for j in range(length - 1):
+            if "accuracy" in matrix[i + 1][j] and "accuracy" in matrix[i][j]:
+                accuracy += matrix[i + 1][j]["accuracy"]
+                bwt_score += matrix[i + 1][j]["accuracy"] - matrix[i][j]["accuracy"]
 
-    for i in range(0, length-1):
-        if 'accuracy' in matrix[i][i] and 'accuracy' in matrix[0][i]:
-            fwt_score += matrix[i][i]['accuracy'] - matrix[0][i]['accuracy']
+    for i in range(0, length - 1):
+        if "accuracy" in matrix[i][i] and "accuracy" in matrix[0][i]:
+            fwt_score += matrix[i][i]["accuracy"] - matrix[0][i]["accuracy"]
 
-    accuracy = accuracy / ((length-1) * (length-1))
-    bwt_score = bwt_score / ((length-1) * (length-1))
-    fwt_score = fwt_score / (length-1)
+    accuracy = accuracy / ((length - 1) * (length - 1))
+    bwt_score = bwt_score / ((length - 1) * (length - 1))
+    fwt_score = fwt_score / (length - 1)
 
     print(f"{key} BWT_score: {bwt_score}")
     print(f"{key} FWT_score: {fwt_score}")
 
     my_matrix = []
-    for i in range(length-1):
+    for i in range(length - 1):
         my_matrix.append([])
-        for j in range(length-1):
-            if 'accuracy' in matrix[i+1][j]:
-                my_matrix[i].append(matrix[i+1][j]['accuracy'])
+        for j in range(length - 1):
+            if "accuracy" in matrix[i + 1][j]:
+                my_matrix[i].append(matrix[i + 1][j]["accuracy"])
 
     return my_matrix, bwt_score, fwt_score
 
@@ -141,7 +138,8 @@ def task_avg_acc_func(system_metric_info: dict):
     compute task average accuracy
     """
     info = system_metric_info.get(SystemMetricType.TASK_AVG_ACC.value)
-    return info["accuracy"]
+    return round(info["accuracy"], 3)
+
 
 def forget_rate_func(system_metric_info: dict):
     """
@@ -150,7 +148,7 @@ def forget_rate_func(system_metric_info: dict):
     info = system_metric_info.get(SystemMetricType.FORGET_RATE.value)
     forget_rate = np.mean(info)
     print(f"forget_rate: {forget_rate}")
-    return forget_rate
+    return round(forget_rate, 3)
 
 
 def get_metric_func(metric_dict: dict):
@@ -176,10 +174,12 @@ def get_metric_func(metric_dict: dict):
         try:
             load_module(url)
             metric_func = ClassFactory.get_cls(
-                type_name=ClassType.GENERAL, t_cls_name=name)
+                type_name=ClassType.GENERAL, t_cls_name=name
+            )
             return name, metric_func
         except Exception as err:
             raise RuntimeError(
-                f"get metric func(url={url}) failed, error: {err}.") from err
+                f"get metric func(url={url}) failed, error: {err}."
+            ) from err
 
     return name, getattr(sys.modules[__name__], str.lower(name) + "_func")
diff --git a/core/testenvmanager/dataset/dataset.py b/core/testenvmanager/dataset/dataset.py
@@ -116,6 +116,7 @@ def process_dataset(self):
         self.test_url = self._process_index_file(self.test_url)
 
     # pylint: disable=too-many-arguments
+    # pylint: disable=too-many-positional-arguments
     def split_dataset(self, dataset_url, dataset_format, ratio, method="default",
                       dataset_types=None, output_dir=None, times=1):
         """
@@ -203,13 +204,15 @@ def _read_data_file(cls, data_file, data_format):
 
         return data
 
+    # pylint: disable=too-many-positional-arguments
     def _get_dataset_file(self, data, output_dir, dataset_type, index, dataset_format):
         data_file = self._get_file_url(output_dir, dataset_type, index, dataset_format)
 
         self._write_data_file(data, data_file, dataset_format)
 
         return data_file
 
+    # pylint: disable=too-many-positional-arguments
     def _splitting_more_times(self, data_file, data_format, ratio,
                               data_types=None, output_dir=None, times=1):
         if not data_types:
@@ -243,6 +246,7 @@ def _splitting_more_times(self, data_file, data_format, ratio,
 
         return data_files
 
+    # pylint: disable=too-many-positional-arguments
     def _fwt_splitting(self, data_file, data_format, ratio,
                               data_types=None, output_dir=None, times=1):
         if not data_types:
@@ -281,6 +285,7 @@ def _fwt_splitting(self, data_file, data_format, ratio,
 
         return data_files
 
+    # pylint: disable=too-many-positional-arguments
     # add new splitting method for semantic segmentation
     def _city_splitting(self, data_file, data_format, ratio,
                               data_types=None, output_dir=None, times=1):
@@ -326,6 +331,7 @@ def _city_splitting(self, data_file, data_format, ratio,
 
         return data_files
 
+    # pylint: disable=too-many-positional-arguments
     def _hard_example_splitting(self, data_file, data_format, ratio,
                               data_types=None, output_dir=None, times=1):
         if not data_types:

diff --git a/core/testenvmanager/dataset/utils.py b/core/testenvmanager/dataset/utils.py
@@ -37,7 +37,6 @@ def read_data_from_file_to_npy(files: BaseDataSource):
     """
     x_train = []
     y_train = []
-    LOGGER.info(f"{files.x}, {files.y}")
     for i, file in enumerate(files.x):
         x_data = np.load(file)
         # print(x_data.shape)
@@ -47,7 +46,6 @@ def read_data_from_file_to_npy(files: BaseDataSource):
         y_train.append(y_data)
     x_train = np.concatenate(x_train, axis=0)
     y_train = np.concatenate(y_train, axis=0)
-    print(x_train.shape, y_train.shape)
     return x_train, y_train
 
 

diff --git a/examples/cifar100/fci_ssl/fed_ci_match/algorithm/FedCiMatch.py b/examples/cifar100/fci_ssl/fed_ci_match/algorithm/FedCiMatch.py
@@ -42,8 +42,8 @@ def __init__(
         self.learning_rate = learning_rate
         self.memory_size = memory_size
         self.task_size = None
-        self.warm_up_round = 1
-        self.accept_threshold = 0.85
+        self.warm_up_round = 4
+        self.accept_threshold = 0.95
         self.old_task_id = -1
 
         self.classifier = None
@@ -69,8 +69,12 @@ def __init__(
 
     def build_feature_extractor(self):
         feature_extractor = resnet10()
+
         feature_extractor.build(input_shape=(None, 32, 32, 3))
         feature_extractor.call(keras.Input(shape=(32, 32, 3)))
+        feature_extractor.load_weights(
+            "examples/cifar100/fci_ssl/fed_ci_match/algorithm/feature_extractor.weights.h5"
+        )
         return feature_extractor
 
     def build_classifier(self):
@@ -298,11 +302,15 @@ def train(self, round):
         all_params.extend(self.classifier.trainable_variables)
 
         for epoch in range(self.epochs):
-            # for (labeled_data, unlabeled_data) in zip(self.labeled_train_loader, self.unlabeled_train_loader):
-            for step, (labeled_x, labeled_y) in enumerate(self.labeled_train_loader):
+            for labeled_data, unlabeled_data in zip(
+                self.labeled_train_loader, self.unlabeled_train_loader
+            ):
+                # for step, (labeled_x, labeled_y) in enumerate(self.labeled_train_loader):
                 # print(labeled_data.shape)
-                # labeled_x, labeled_y = labeled_data
-                # unlabeled_x, weak_unlabeled_x, strong_unlabeled_x, unlabeled_y = unlabeled_data
+                labeled_x, labeled_y = labeled_data
+                unlabeled_x, weak_unlabeled_x, strong_unlabeled_x, unlabeled_y = (
+                    unlabeled_data
+                )
                 with tf.GradientTape() as tape:
                     input = self.feature_extractor(inputs=labeled_x, training=True)
                     y_pred = self.classifier(inputs=input, training=True)
@@ -314,15 +322,21 @@ def train(self, round):
                         tf.cast(tf.equal(label_pred, labeled_y), dtype=tf.int32)
                     )
                     CE_loss = self.supervised_loss(labeled_x, labeled_y)
-                    KD_loss = self.distil_loss(labeled_x, labeled_y, q, step)
+                    KD_loss = self.distil_loss(labeled_x, labeled_y)
                     # loss = tf.reduce_mean(keras.losses.sparse_categorical_crossentropy(labeled_y, y_pred, from_logits=True))
-                    # if round > self.warm_up_round:
-                    #     unsupervised_loss = self.unsupervised_loss(weak_unlabeled_x, strong_unlabeled_x, unlabeled_x)
+                    supervised_loss = CE_loss
+                    # logging.info(f"supervised loss: {supervised_loss}")
+                    # if epoch > self.warm_up_round:
+                    #     unsupervised_loss = self.unsupervised_loss(
+                    #         weak_unlabeled_x, strong_unlabeled_x, unlabeled_x
+                    #     )
+                    #     logging.info(f"unsupervised loss: {unsupervised_loss}")
                     #     loss = 0.5 * supervised_loss + 0.5 * unsupervised_loss
-                    loss = CE_loss if KD_loss == 0 else CE_loss + 0.4 * KD_loss
-                    loss = CE_loss
+                    # else:
+                    #     loss = supervised_loss
+                    loss = CE_loss + KD_loss
                 logging.info(
-                    f"epoch {epoch} step {step} loss: {loss}  correct {correct} and total {labeled_x.shape[0]} class is {np.unique(labeled_y)}"
+                    f"epoch {epoch}  loss: {loss}  correct {correct} and total {labeled_x.shape[0]} class is {np.unique(labeled_y)}"
                 )
                 grads = tape.gradient(loss, all_params)
                 optimizer.apply_gradients(zip(grads, all_params))
@@ -349,7 +363,7 @@ def supervised_loss(self, x, y):
 
         return loss
 
-    def distil_loss(self, x, y, q, step):
+    def distil_loss(self, x, y):
         KD_loss = 0
 
         if len(self.learned_classes) > 0 and self.best_old_model is not None:
@@ -386,15 +400,23 @@ def distil_loss(self, x, y, q, step):
         return KD_loss
 
     def unsupervised_loss(self, weak_x, strong_x, x):
-        prob_on_wux = tf.nn.softmax(self.model_call(weak_x, training=True))
+        prob_on_wux = tf.nn.softmax(
+            self.classifier(
+                self.feature_extractor(weak_x, training=True), training=True
+            )
+        )
         pseudo_mask = tf.cast(
             tf.reduce_max(prob_on_wux, axis=1) > self.accept_threshold, tf.float32
         )
-        pse_uy = tf.one_hot(tf.argmax(prob_on_wux, axis=1), depth=self.num_classes)
-        prob_on_sux = tf.nn.softmax(self.model_call(strong_x, training=True))
-        loss = keras.losses.categorical_crossentropy(
-            pse_uy, prob_on_sux, from_logits=True
+        pse_uy = tf.one_hot(
+            tf.argmax(prob_on_wux, axis=1), depth=self.num_classes
+        ).numpy()
+        prob_on_sux = tf.nn.softmax(
+            self.classifier(
+                self.feature_extractor(strong_x, training=True), training=True
+            )
         )
+        loss = keras.losses.categorical_crossentropy(pse_uy, prob_on_sux)
         loss = tf.reduce_mean(loss * pseudo_mask)
         return loss
 

diff --git a/examples/cifar100/fci_ssl/fed_ci_match/algorithm/algorithm.yaml b/examples/cifar100/fci_ssl/fed_ci_match/algorithm/algorithm.yaml
@@ -3,13 +3,13 @@ algorithm:
   fl_data_setting:
     train_ratio: 1.0
     splitting_method: "default"
-    label_data_ratio: 1.0
+    label_data_ratio: 0.3
     data_partition: "iid"
   initial_model_url: "/home/wyd/ianvs/project/init_model/cnn.pb"
 
   modules:
     - type: "basemodel"
-      name: "FedCILMatch"
+      name: "FediCarl-Client"
       url: "./examples/cifar100/fci_ssl/fed_ci_match/algorithm/basemodel.py"
       hyperparameters:
         - batch_size:

diff --git a/examples/cifar100/fci_ssl/fed_ci_match/algorithm/basemodel.py b/examples/cifar100/fci_ssl/fed_ci_match/algorithm/basemodel.py
@@ -29,7 +29,7 @@
 logging.getLogger().setLevel(logging.INFO)
 
 
-@ClassFactory.register(ClassType.GENERAL, alias="FedCILMatch")
+@ClassFactory.register(ClassType.GENERAL, alias="FediCarl-Client")
 class BaseModel:
     def __init__(self, **kwargs) -> None:
         self.kwargs = kwargs