Add intel phe

nvidianz · Apr 22, 2024 · d9a2e7c · d9a2e7c
1 parent fa4d00f
commit d9a2e7c
Show file tree

Hide file tree

Showing 10 changed files with 407 additions and 51 deletions.
diff --git a/nvflare/app_opt/xgboost/histogram_based_v2/defs.py b/nvflare/app_opt/xgboost/histogram_based_v2/defs.py
@@ -34,7 +34,7 @@ class Constant:
 
     TASK_CHECK_INTERVAL = 0.5
     JOB_STATUS_CHECK_INTERVAL = 2.0
-    MAX_CLIENT_OP_INTERVAL = 90.0
+    MAX_CLIENT_OP_INTERVAL = 600.0
     WORKFLOW_PROGRESS_TIMEOUT = 3600.0
 
     # message topics

diff --git a/nvflare/app_opt/xgboost/histogram_based_v2/executor.py b/nvflare/app_opt/xgboost/histogram_based_v2/executor.py
@@ -32,7 +32,7 @@ def __init__(
         adaptor_component_id: str,
         configure_task_name=Constant.CONFIG_TASK_NAME,
         start_task_name=Constant.START_TASK_NAME,
-        req_timeout=60.0,
+        req_timeout=600.0,
     ):
         """Constructor
 

diff --git a/nvflare/app_opt/xgboost/histogram_based_v2/fed_executor.py b/nvflare/app_opt/xgboost/histogram_based_v2/fed_executor.py
@@ -27,7 +27,7 @@ def __init__(
         verbose_eval=False,
         use_gpus=False,
         int_server_grpc_options=None,
-        req_timeout=60.0,
+        req_timeout=600.0,
         model_file_name="model.json",
         metrics_writer_id: str = None,
         in_process=True,

diff --git a/nvflare/app_opt/xgboost/histogram_based_v2/partial_he/__init__.py b/nvflare/app_opt/xgboost/histogram_based_v2/partial_he/__init__.py
diff --git a/nvflare/app_opt/xgboost/histogram_based_v2/partial_he/adder.py b/nvflare/app_opt/xgboost/histogram_based_v2/partial_he/adder.py
@@ -0,0 +1,93 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import concurrent.futures
+
+from nvflare.app_opt.xgboost.histogram_based_v2.aggr import Aggregator
+from nvflare.app_opt.xgboost.histogram_based_v2.partial_he.util import encode_encrypted_numbers_to_str
+
+
+class Adder:
+    def __init__(self, max_workers=10):
+        self.exe = concurrent.futures.ProcessPoolExecutor(max_workers=max_workers)
+
+    def add(self, encrypted_numbers, features, sample_groups=None, encode_sum=True):
+        """
+
+        Args:
+            encrypted_numbers: list of encrypted numbers (combined gh), one for each sample
+            features: list of tuples of (feature_id, mask, num_bins), one for each feature.
+                    size of mask = size of encrypted_numbers: there is a bin number for each sample
+                    num_bins specifies the number of bins for the feature
+            sample_groups: list of sample groups, each group is a tuple of (group_id, id_list)
+                    group_id is the group id, id_list is a list of sample IDs for which the add will be applied to
+            encode_sum: if true, encode the sum into a JSON string
+
+        Returns: list of tuples of (feature_id, group_id, sum), sum is the result of adding encrypted values of
+            samples in the group for the feature.
+
+        """
+        items = []
+
+        for f in features:
+            fid, mask, num_bins = f
+            if not sample_groups:
+                items.append((encode_sum, fid, encrypted_numbers, mask, num_bins, 0, None))
+            else:
+                for g in sample_groups:
+                    gid, sample_id_list = g
+                    items.append((encode_sum, fid, encrypted_numbers, mask, num_bins, gid, sample_id_list))
+
+        results = self.exe.map(_do_add, items)
+        rl = []
+        for r in results:
+            rl.append(r)
+        return rl
+
+
+def _do_add(item):
+    encode_sum, fid, encrypted_numbers, mask, num_bins, gid, sample_id_list = item
+    # bins = [0 for _ in range(num_bins)]
+    aggr = Aggregator()
+
+    bins = aggr.aggregate(
+        gh_values=encrypted_numbers,
+        sample_bin_assignment=mask,
+        num_bins=num_bins,
+        sample_ids=sample_id_list,
+    )
+    #
+    # if not sample_id_list:
+    #     # all samples
+    #     for sample_id in range(len(encrypted_numbers)):
+    #         bid = mask[sample_id]
+    #         if bins[bid] == 0:
+    #             # avoid plain_text + cypher_text, which could be slow!
+    #             bins[bid] = encrypted_numbers[sample_id]
+    #         else:
+    #             bins[bid] += encrypted_numbers[sample_id]
+    # else:
+    #     for sample_id in sample_id_list:
+    #         bid = mask[sample_id]
+    #         if bins[bid] == 0:
+    #             # avoid plain_text + cypher_text, which could be slow!
+    #             bins[bid] = encrypted_numbers[sample_id]
+    #         else:
+    #             bins[bid] += encrypted_numbers[sample_id]
+
+    if encode_sum:
+        sums = encode_encrypted_numbers_to_str(bins)
+    else:
+        sums = bins
+    return fid, gid, sums
diff --git a/nvflare/app_opt/xgboost/histogram_based_v2/partial_he/decrypter.py b/nvflare/app_opt/xgboost/histogram_based_v2/partial_he/decrypter.py
@@ -0,0 +1,56 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import concurrent.futures
+
+
+class Decrypter:
+    def __init__(self, private_key, max_workers=10):
+        self.max_workers = max_workers
+        self.private_key = private_key
+        self.exe = concurrent.futures.ProcessPoolExecutor(max_workers=max_workers)
+
+    def decrypt(self, encrypted_number_groups):
+        """
+        Encrypt a list of clear text numbers
+
+        Args:
+            encrypted_number_groups: list of lists of encrypted numbers to be decrypted
+
+        Returns: list of lists of decrypted numbers
+
+        """
+        print(f"decrypting {len(encrypted_number_groups)} number groups")
+        items = [None] * len(encrypted_number_groups)
+
+        for i, g in enumerate(encrypted_number_groups):
+            items[i] = (self.private_key, g)
+
+        results = self.exe.map(_do_decrypt, items)
+        rl = []
+        for r in results:
+            rl.append(r)
+        return rl
+
+
+def _do_decrypt(item):
+    private_key, numbers = item
+    ev = [None] * len(numbers)
+    for i, v in enumerate(numbers):
+        if isinstance(v, int):
+            d = v
+        else:
+            d = private_key.decrypt(v)
+        ev[i] = d
+    return ev
diff --git a/nvflare/app_opt/xgboost/histogram_based_v2/partial_he/encryptor.py b/nvflare/app_opt/xgboost/histogram_based_v2/partial_he/encryptor.py
@@ -0,0 +1,81 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import concurrent.futures
+
+
+class Encryptor:
+    def __init__(self, pubkey, max_workers=10):
+        self.max_workers = max_workers
+        self.pubkey = pubkey
+        self.exe = concurrent.futures.ProcessPoolExecutor(max_workers=max_workers)
+
+    def encrypt(self, numbers):
+        """
+        Encrypt a list of clear text numbers
+
+        Args:
+            numbers: clear text numbers to be encrypted
+
+        Returns: list of encrypted numbers
+
+        """
+
+        num_values = len(numbers)
+        if num_values <= self.max_workers:
+            w_values = [numbers]
+            workers_needed = 1
+        else:
+            workers_needed = self.max_workers
+            w_values = [None for _ in range(self.max_workers)]
+            n = int(num_values / self.max_workers)
+            w_load = [n for _ in range(self.max_workers)]
+            r = num_values % self.max_workers
+            if r > 0:
+                for i in range(r):
+                    w_load[i] += 1
+
+            start = 0
+            for i in range(self.max_workers):
+                end = start + w_load[i]
+                w_values[i] = numbers[start:end]
+                start = end
+
+        total_count = 0
+        for v in w_values:
+            total_count += len(v)
+        assert total_count == num_values
+
+        items = []
+        for i in range(workers_needed):
+            items.append((self.pubkey, w_values[i]))
+        return self._encrypt(items)
+
+    def _encrypt(self, items):
+        results = self.exe.map(_do_enc, items)
+        rl = []
+        for r in results:
+            rl.extend(r)
+        return rl
+
+
+def _do_enc(item):
+    pubkey, numbers = item
+    ev = [None] * len(numbers)
+    for i, v in enumerate(numbers):
+        #print(f"encrypting {v=}")
+        d = pubkey.encrypt(v)
+        #print(f"result is {d=}")
+        #print(f"result is {d.exponent()=}")
+        ev[i] = d
+    return ev