google
diff --git a/‎tf_shell/cc/kernels/add_kernels.cc
Lines changed: 18 additions & 27 deletions b/‎tf_shell/cc/kernels/add_kernels.cc
Lines changed: 18 additions & 27 deletions
diff --git a/‎tf_shell/cc/kernels/mul_kernels.cc
Lines changed: 62 additions & 54 deletions b/‎tf_shell/cc/kernels/mul_kernels.cc
Lines changed: 62 additions & 54 deletions
@@ -106,13 +106,10 @@ class AddCtCtOp : public OpKernel {
         op_ctx, bcast.IsValid(),
         InvalidArgument("Invalid broadcast between ", a.shape().DebugString(),
                         " and ", b.shape().DebugString()));
-    auto flat_a = MyBFlat<Variant>(op_ctx, a, bcast.x_reshape(), bcast.x_bcast());
-    auto flat_b = MyBFlat<Variant>(op_ctx, b, bcast.y_reshape(), bcast.y_bcast());
-
-    // Check the inputs have the same shape.
-    OP_REQUIRES(
-        op_ctx, flat_a.size() == flat_b.size(),
-        InvalidArgument("Broadcasted inputs must have the same shape."));
+    auto flat_a = a.flat<Variant>();
+    auto flat_b = b.flat<Variant>();
+    IndexConverterFunctor a_bcaster(bcast.output_shape(), a.shape());
+    IndexConverterFunctor b_bcaster(bcast.output_shape(), b.shape());
 
     // Allocate the output tensor which is the same size as one of the inputs.
     Tensor* output;
@@ -122,14 +119,14 @@ class AddCtCtOp : public OpKernel {
 
     for (int i = 0; i < flat_output.dimension(0); ++i) {
       SymmetricCtVariant<T> const* ct_a_var =
-          std::move(flat_a(i).get<SymmetricCtVariant<T>>());
+          std::move(flat_a(a_bcaster(i)).get<SymmetricCtVariant<T>>());
       OP_REQUIRES(op_ctx, ct_a_var != nullptr,
                   InvalidArgument("SymmetricCtVariant at flat index: ", i,
                                   " for input a did not unwrap successfully."));
       SymmetricCt const& ct_a = ct_a_var->ct;
 
       SymmetricCtVariant<T> const* ct_b_var =
-          std::move(flat_b(i).get<SymmetricCtVariant<T>>());
+          std::move(flat_b(b_bcaster(i)).get<SymmetricCtVariant<T>>());
       OP_REQUIRES(op_ctx, ct_b_var != nullptr,
                   InvalidArgument("SymmetricCtVariant at flat index: ", i,
                                   " for input b did not unwrap successfully."));
@@ -166,13 +163,10 @@ class AddCtPtOp : public OpKernel {
         op_ctx, bcast.IsValid(),
         InvalidArgument("Invalid broadcast between ", a.shape().DebugString(),
                         " and ", b.shape().DebugString()));
-    auto flat_a = MyBFlat<Variant>(op_ctx, a, bcast.x_reshape(), bcast.x_bcast());
-    auto flat_b = MyBFlat<Variant>(op_ctx, b, bcast.y_reshape(), bcast.y_bcast());
-
-    // Check the inputs have the same shape.
-    OP_REQUIRES(
-        op_ctx, flat_a.size() == flat_b.size(),
-        InvalidArgument("Broadcasted inputs must have the same shape."));
+    auto flat_a = a.flat<Variant>();
+    auto flat_b = b.flat<Variant>();
+    IndexConverterFunctor a_bcaster(bcast.output_shape(), a.shape());
+    IndexConverterFunctor b_bcaster(bcast.output_shape(), b.shape());
 
     // Allocate the output tensor which is the same size as one of the inputs.
     Tensor* output;
@@ -182,14 +176,14 @@ class AddCtPtOp : public OpKernel {
 
     for (int i = 0; i < flat_output.dimension(0); ++i) {
       SymmetricCtVariant<T> const* ct_a_var =
-          std::move(flat_a(i).get<SymmetricCtVariant<T>>());
+          std::move(flat_a(a_bcaster(i)).get<SymmetricCtVariant<T>>());
       OP_REQUIRES(op_ctx, ct_a_var != nullptr,
                   InvalidArgument("SymmetricCtVariant at flat index: ", i,
                                   " for input a did not unwrap successfully."));
       SymmetricCt const& ct_a = ct_a_var->ct;
 
       PolynomialVariant<T> const* pv_b_var =
-          std::move(flat_b(i).get<PolynomialVariant<T>>());
+          std::move(flat_b(b_bcaster(i)).get<PolynomialVariant<T>>());
       OP_REQUIRES(op_ctx, pv_b_var != nullptr,
                   InvalidArgument("PolynomialVariant at flat index: ", i,
                                   " for input b did not unwrap successfully."));
@@ -229,13 +223,10 @@ class AddPtPtOp : public OpKernel {
         op_ctx, bcast.IsValid(),
         InvalidArgument("Invalid broadcast between ", a.shape().DebugString(),
                         " and ", b.shape().DebugString()));
-    auto flat_a = MyBFlat<Variant>(op_ctx, a, bcast.x_reshape(), bcast.x_bcast());
-    auto flat_b = MyBFlat<Variant>(op_ctx, b, bcast.y_reshape(), bcast.y_bcast());
-
-    // Check the inputs have the same shape.
-    OP_REQUIRES(
-        op_ctx, flat_a.size() == flat_b.size(),
-        InvalidArgument("Broadcasted inputs must have the same shape."));
+    auto flat_a = a.flat<Variant>();
+    auto flat_b = b.flat<Variant>();
+    IndexConverterFunctor a_bcaster(bcast.output_shape(), a.shape());
+    IndexConverterFunctor b_bcaster(bcast.output_shape(), b.shape());
 
     // Allocate the output tensor which is the same size as one of the inputs.
     Tensor* output;
@@ -245,14 +236,14 @@ class AddPtPtOp : public OpKernel {
 
     for (int i = 0; i < flat_output.dimension(0); ++i) {
       PolynomialVariant<T> const* pv_a_var =
-          std::move(flat_a(i).get<PolynomialVariant<T>>());
+          std::move(flat_a(a_bcaster(i)).get<PolynomialVariant<T>>());
       OP_REQUIRES(op_ctx, pv_a_var != nullptr,
                   InvalidArgument("PolynomialVariant at flat index: ", i,
                                   " for input a did not unwrap successfully."));
       RnsPolynomial const& pt_a = pv_a_var->poly;
 
       PolynomialVariant<T> const* pv_b_var =
-          std::move(flat_b(i).get<PolynomialVariant<T>>());
+          std::move(flat_b(b_bcaster(i)).get<PolynomialVariant<T>>());
       OP_REQUIRES(op_ctx, pv_b_var != nullptr,
                   InvalidArgument("PolynomialVariant at flat index: ", i,
                                   " for input b did not unwrap successfully."));
 
@@ -64,13 +64,10 @@ class MulCtCtOp : public OpKernel {
         op_ctx, bcast.IsValid(),
         InvalidArgument("Invalid broadcast between ", a.shape().DebugString(),
                         " and ", b.shape().DebugString()));
-    auto flat_a = MyBFlat<Variant>(op_ctx, a, bcast.x_reshape(), bcast.x_bcast());
-    auto flat_b = MyBFlat<Variant>(op_ctx, b, bcast.y_reshape(), bcast.y_bcast());
-
-    // Check the inputs have the same shape.
-    OP_REQUIRES(
-        op_ctx, flat_a.size() == flat_b.size(),
-        InvalidArgument("Broadcasted inputs must have the same shape."));
+    auto flat_a = a.flat<Variant>();
+    auto flat_b = b.flat<Variant>();
+    IndexConverterFunctor a_bcaster(bcast.output_shape(), a.shape());
+    IndexConverterFunctor b_bcaster(bcast.output_shape(), b.shape());
 
     // Allocate the output tensor which is the same shape as each of the inputs.
     Tensor* output;
@@ -81,14 +78,14 @@ class MulCtCtOp : public OpKernel {
     // Multiply each pair of ciphertexts and store the result in the output.
     for (int i = 0; i < flat_output.dimension(0); ++i) {
       SymmetricCtVariant<T> const* ct_a_var =
-          std::move(flat_a(i).get<SymmetricCtVariant<T>>());
+          std::move(flat_a(a_bcaster(i)).get<SymmetricCtVariant<T>>());
       OP_REQUIRES(op_ctx, ct_a_var != nullptr,
                   InvalidArgument("SymmetricCtVariant at flat index:", i,
                                   " for input a did not unwrap successfully."));
       SymmetricCt const& ct_a = ct_a_var->ct;
 
       SymmetricCtVariant<T> const* ct_b_var =
-          std::move(flat_b(i).get<SymmetricCtVariant<T>>());
+          std::move(flat_b(b_bcaster(i)).get<SymmetricCtVariant<T>>());
       OP_REQUIRES(op_ctx, ct_b_var != nullptr,
                   InvalidArgument("SymmetricCtVariant at flat index:", i,
                                   " for input b did not unwrap successfully."));
@@ -124,46 +121,63 @@ class MulCtPtOp : public OpKernel {
         op_ctx, bcast.IsValid(),
         InvalidArgument("Invalid broadcast between ", a.shape().DebugString(),
                         " and ", b.shape().DebugString()));
-    auto flat_a = MyBFlat<Variant>(op_ctx, a, bcast.x_reshape(), bcast.x_bcast());
-    auto flat_b = MyBFlat<Variant>(op_ctx, b, bcast.y_reshape(), bcast.y_bcast());
-
-    // Check the inputs have the same shape.
-    OP_REQUIRES(
-        op_ctx, flat_a.size() == flat_b.size(),
-        InvalidArgument("Broadcasted inputs must have the same shape."));
+    auto flat_a = a.flat<Variant>();
+    auto flat_b = b.flat<Variant>();
+    IndexConverterFunctor a_bcaster(bcast.output_shape(), a.shape());
+    IndexConverterFunctor b_bcaster(bcast.output_shape(), b.shape());
 
     // Allocate the output tensor which is the same shape as each of the inputs.
     Tensor* output;
     TensorShape output_shape = BCast::ToShape(bcast.output_shape());
     OP_REQUIRES_OK(op_ctx, op_ctx->allocate_output(0, output_shape, &output));
     auto flat_output = output->flat<Variant>();
 
-    for (int i = 0; i < flat_output.dimension(0); ++i) {
-      SymmetricCtVariant<T> const* ct_a_var =
-          std::move(flat_a(i).get<SymmetricCtVariant<T>>());
-      OP_REQUIRES(op_ctx, ct_a_var != nullptr,
-                  InvalidArgument("SymmetricCtVariant at flat index:", i,
-                                  " for input a did not unwrap successfully."));
-      SymmetricCt const& ct_a = ct_a_var->ct;
+    // Recover num_slots from first ciphertext.
+    SymmetricCtVariant<T> const* ct_var =
+        std::move(flat_a(0).get<SymmetricCtVariant<T>>());
+    OP_REQUIRES(
+        op_ctx, ct_var != nullptr,
+        InvalidArgument("SymmetricCtVariant a did not unwrap successfully."));
+    SymmetricCt const& ct = ct_var->ct;
+    int num_slots = 1 << ct.LogN();
+    int num_components = ct.NumModuli();
 
-      PolynomialVariant<T> const* pv_b_var =
-          std::move(flat_b(i).get<PolynomialVariant<T>>());
-      OP_REQUIRES(op_ctx, pv_b_var != nullptr,
-                  InvalidArgument("PolynomialVariant at flat index:", i,
-                                  " for input b did not unwrap successfully."));
-      RnsPolynomial const& pt_b = pv_b_var->poly;
+    auto mul_in_range = [&](int start, int end) {
+      for (int i = start; i < end; ++i) {
+        SymmetricCtVariant<T> const* ct_a_var =
+            std::move(flat_a(a_bcaster(i)).get<SymmetricCtVariant<T>>());
+        OP_REQUIRES(
+            op_ctx, ct_a_var != nullptr,
+            InvalidArgument("SymmetricCtVariant at flat index:", i,
+                            " for input a did not unwrap successfully."));
+        SymmetricCt const& ct_a = ct_a_var->ct;
 
-      OP_REQUIRES_VALUE(SymmetricCt ct_c, op_ctx,
-                        ct_a * pt_b);  // shell absorb operation
+        PolynomialVariant<T> const* pv_b_var =
+            std::move(flat_b(b_bcaster(i)).get<PolynomialVariant<T>>());
+        OP_REQUIRES(
+            op_ctx, pv_b_var != nullptr,
+            InvalidArgument("PolynomialVariant at flat index:", i,
+                            " for input b did not unwrap successfully."));
+        RnsPolynomial const& pt_b = pv_b_var->poly;
 
-      SymmetricCtVariant ct_c_var(std::move(ct_c));
-      flat_output(i) = std::move(ct_c_var);
-    }
+        OP_REQUIRES_VALUE(SymmetricCt ct_c, op_ctx,
+                          ct_a * pt_b);  // shell absorb operation
+
+        SymmetricCtVariant ct_c_var(std::move(ct_c));
+        flat_output(i) = std::move(ct_c_var);
+      }
+    };
+
+    auto thread_pool =
+        op_ctx->device()->tensorflow_cpu_worker_threads()->workers;
+    int const cost_per_mul = 30 * num_slots * num_components;
+    thread_pool->ParallelFor(flat_output.dimension(0), cost_per_mul,
+                             mul_in_range);
   }
 };
 
-// This Op can multiply either a shell ciphertext or a plaintext polynomial by a
-// plaintext scalar, depending on the class template.
+// This Op can multiply either a shell ciphertext or a plaintext polynomial by
+// a plaintext scalar, depending on the class template.
 template <typename T, typename PtT, typename CtOrPolyVariant>
 class MulShellTfScalarOp : public OpKernel {
  private:
@@ -194,12 +208,8 @@ class MulShellTfScalarOp : public OpKernel {
         InvalidArgument("Invalid broadcast between ", a.shape().DebugString(),
                         " and ", b.shape().DebugString()));
     auto flat_a = a.flat<Variant>();  // a is not broadcasted, just b.
-    auto flat_b = MyBFlat<PtT>(op_ctx, b, bcast.y_reshape(), bcast.y_bcast());
-
-    // Check the inputs have the same shape.
-    OP_REQUIRES(
-        op_ctx, flat_a.size() == flat_b.size(),
-        InvalidArgument("Broadcasted inputs must have the same shape."));
+    auto flat_b = b.flat<PtT>();
+    IndexConverterFunctor b_bcaster(bcast.output_shape(), b.shape());
 
     // Allocate the output tensor which is the same shape as the first input.
     Tensor* output;
@@ -211,7 +221,7 @@ class MulShellTfScalarOp : public OpKernel {
       // First encode the scalar b
       // TDOO(jchoncholas): encode all scalars at once beforehand.
       T wrapped_b;
-      EncodeScalar(op_ctx, flat_b(i), encoder, &wrapped_b);
+      EncodeScalar(op_ctx, flat_b(b_bcaster(i)), encoder, &wrapped_b);
 
       CtOrPolyVariant const* ct_or_pt_var =
           std::move(flat_a(i).get<CtOrPolyVariant>());
@@ -241,7 +251,7 @@ class MulShellTfScalarOp : public OpKernel {
     }
   }
 
-private:
+ private:
   void EncodeScalar(OpKernelContext* op_ctx, PtT const& val, Encoder const* encoder, T* wrapped_val) {
     if constexpr (std::is_signed<PtT>::value) {
       // SHELL is built on the assumption that the plaintext type (in this
@@ -293,30 +303,28 @@ class MulPtPtOp : public OpKernel {
         op_ctx, bcast.IsValid(),
         InvalidArgument("Invalid broadcast between ", a.shape().DebugString(),
                         " and ", b.shape().DebugString()));
-    auto flat_a = MyBFlat<Variant>(op_ctx, a, bcast.x_reshape(), bcast.x_bcast());
-    auto flat_b = MyBFlat<Variant>(op_ctx, b, bcast.y_reshape(), bcast.y_bcast());
-
-    // Check the inputs have the same shape.
-    OP_REQUIRES(
-        op_ctx, flat_a.size() == flat_b.size(),
-        InvalidArgument("Broadcasted inputs must have the same shape."));
+    auto flat_a = a.flat<Variant>();
+    auto flat_b = b.flat<Variant>();
+    IndexConverterFunctor a_bcaster(bcast.output_shape(), a.shape());
+    IndexConverterFunctor b_bcaster(bcast.output_shape(), b.shape());
 
-    // Allocate the output tensor which is the same shape as each of the inputs.
+    // Allocate the output tensor which is the same shape as each of the
+    // inputs.
     Tensor* output;
     TensorShape output_shape = BCast::ToShape(bcast.output_shape());
     OP_REQUIRES_OK(op_ctx, op_ctx->allocate_output(0, output_shape, &output));
     auto flat_output = output->flat<Variant>();
 
     for (int i = 0; i < flat_output.dimension(0); ++i) {
       PolynomialVariant<T> const* pv_a_var =
-          std::move(flat_a(i).get<PolynomialVariant<T>>());
+          std::move(flat_a(a_bcaster(i)).get<PolynomialVariant<T>>());
       OP_REQUIRES(op_ctx, pv_a_var != nullptr,
                   InvalidArgument("PolynomialVariant at flat index:", i,
                                   " for input a did not unwrap successfully."));
       RnsPolynomial const& pt_a = pv_a_var->poly;
 
       PolynomialVariant<T> const* pv_b_var =
-          std::move(flat_b(i).get<PolynomialVariant<T>>());
+          std::move(flat_b(b_bcaster(i)).get<PolynomialVariant<T>>());
       OP_REQUIRES(op_ctx, pv_b_var != nullptr,
                   InvalidArgument("PolynomialVariant at flat index:", i,
                                   " for input b did not unwrap successfully."));