diff --git a/qhbmlib/ebm.py b/qhbmlib/ebm.py
index 3de0f806..e0302ecd 100644
--- a/qhbmlib/ebm.py
+++ b/qhbmlib/ebm.py
@@ -547,6 +547,14 @@ def has_operator(self):
   def is_analytic(self):
     return self._is_analytic
 
+  @property
+  def trainable_variables(self):
+    return self._energy_function.trainable_variables
+
+  @trainable_variables.setter
+  def trainable_variables(self, value):
+    self._energy_function.trainable_variables = value
+
   def copy(self):
     if self._energy_sampler is not None:
       energy_sampler = self._energy_sampler.copy()
@@ -558,7 +566,7 @@ def copy(self):
         energy_function,
         energy_sampler,
         is_analytic=self.is_analytic,
-        name=self.name)
+        name=f'{self.name}_copy')
 
   def energy(self, bitstrings):
     return self._energy_function.energy(bitstrings)
@@ -588,7 +596,7 @@ def energies(self):
 
   def probabilities(self):
     if self.is_analytic:
-      return tf.exp(-self.ebm.energies()) / tf.exp(
+      return tf.exp(-self.energies()) / tf.exp(
           self.log_partition_function())
     raise NotImplementedError()
 
@@ -645,7 +653,7 @@ def is_analytic(self):
 
   def copy(self):
     bernoulli = Bernoulli(
-        self.num_bits, is_analytic=self.is_analytic, name=self.name)
+        self.num_bits, is_analytic=self.is_analytic, name=f'{self.name}_copy')
     bernoulli.kernel.assign(self.kernel)
     return bernoulli
 
diff --git a/qhbmlib/qhbm.py b/qhbmlib/qhbm.py
index aed4412d..2976d962 100644
--- a/qhbmlib/qhbm.py
+++ b/qhbmlib/qhbm.py
@@ -72,22 +72,69 @@ def is_analytic(self):
     return self.ebm.is_analytic and self.qnn.is_analytic
 
   def copy(self):
-    return QHBM(self.ebm.copy(), self.qnn.copy(), name=self.name)
+    return QHBM(self.ebm.copy(), self.qnn.copy(), name=f'{self.name}_copy')
 
-  def circuits(self, num_samples):
-    bitstrings, counts = self.ebm.sample(num_samples)
-    circuits = self.qnn.circuits(bitstrings)
-    return circuits, counts
+  def circuits(self, num_samples, unique=True, resolve=True):
+    if unique:
+      bitstrings, counts = self.ebm.sample(num_samples, unique=unique)
+      circuits = self.qnn.circuits(bitstrings, resolve=resolve)
+      return circuits, counts
+    bitstrings = self.ebm.sample(num_samples, unique=unique)
+    circuits = self.qnn.circuits(bitstrings, resolve=resolve)
+    return circuits
 
   def sample(self, num_samples, mask=True, reduce=True, unique=True):
     bitstrings, counts = self.ebm.sample(num_samples)
     return self.qnn.sample(
         bitstrings, counts, mask=mask, reduce=reduce, unique=unique)
 
-  def expectation(self, operators, num_samples, reduce=True):
+  def expectation(self, operators, num_samples, mask=True, reduce=True):
+    """TODO: add gradient function"""
+    if isinstance(operators, QHBM):
+      circuits, counts = self.circuits(num_samples, resolve=False)
+      return operators.operator_expectation((circuits, counts),
+                                            symbol_names=self.qnn.symbols,
+                                            symbol_values=self.qnn.values,
+                                            mask=mask,
+                                            reduce=reduce)
     bitstrings, counts = self.ebm.sample(num_samples)
     return self.qnn.expectation(bitstrings, counts, operators, reduce=reduce)
 
+  def operator_expectation(self,
+                           density_operator,
+                           num_samples=None,
+                           symbol_names=None,
+                           symbol_values=None,
+                           reduce=True,
+                           mask=True):
+    """TODO: add gradient function"""
+    if isinstance(density_operator, tuple):
+      circuits, counts = density_operator
+    elif isinstance(density_operator, QHBM):
+      circuits, counts = density_operator.circuits(num_samples, resolve=False)
+      symbol_names = density_operator.qnn.symbols
+      symbol_values = density_operator.qnn.values
+    else:
+      raise TypeError()
+
+    if self.ebm.has_operator:
+      expectation_shards = self.qnn.pulled_back_expectation(
+          circuits,
+          counts,
+          self.operator_shards,
+          symbol_names=symbol_names,
+          symbol_values=symbol_values,
+          reduce=reduce)
+      return self.ebm.operator_expectation(expectation_shards)
+    bitstrings, counts = self.qnn.pulled_back_sample(
+        circuits, counts, mask=mask)
+    energies = self.ebm.energy(bitstrings)
+    if reduce:
+      probs = tf.cast(counts, tf.float32) / tf.cast(
+          tf.reduce_sum(counts), tf.float32)
+      return tf.reduce_sum(probs * energies)
+    return energies
+
   def probabilities(self):
     return self.ebm.probabilities()
 
@@ -111,10 +158,12 @@ def density_matrix(self):
 
   def fidelity(self, sigma: tf.Tensor):
     """TODO: convert to tf.keras.metric.Metric
+
     Calculate the fidelity between a QHBM and a density matrix.
         Args:
           sigma: 2-D `tf.Tensor` of dtype `complex64` representing the right
             density matrix in the fidelity calculation.
+
         Returns:
           A scalar `tf.Tensor` which is the fidelity between the density matrix
             represented by this QHBM and `sigma`.
diff --git a/qhbmlib/qmhl.py b/qhbmlib/qmhl.py
index 4fd2206b..bd8500a7 100644
--- a/qhbmlib/qmhl.py
+++ b/qhbmlib/qmhl.py
@@ -15,9 +15,10 @@
 """Impementations of the QMHL loss and its derivatives."""
 
 import tensorflow as tf
+from qhbmlib import qhbm
 
 
-def qmhl(qhbm_model, target_circuits, target_counts):
+def qmhl(qhbm_model, density_operator, num_samples=1000):
   """Calculate the QMHL loss of the qhbm model against the target.
 
   This loss is differentiable with respect to the trainable variables of the
@@ -25,8 +26,8 @@ def qmhl(qhbm_model, target_circuits, target_counts):
 
   Args:
     qhbm_model: Parameterized model density operator.
-    target_circuits: 1-D tensor of strings which are serialized circuits.
-      These circuits represent samples from the data density matrix.
+    target_circuits: 1-D tensor of strings which are serialized circuits. These
+      circuits represent samples from the data density matrix.
     target_counts: 1-D tensor of integers which are the number of samples to
       draw from the data density matrix: `target_counts[i]` is the number of
         samples to draw from `target_circuits[i]`.
@@ -36,73 +37,77 @@ def qmhl(qhbm_model, target_circuits, target_counts):
   """
 
   @tf.custom_gradient
-  def loss(trainable_variables):
-    # log_partition estimate
+  def function(trainable_variables):
+    # pulled back expectation of energy operator
+    if isinstance(density_operator, tuple):
+      circuits, counts = density_operator
+    elif isinstance(density_operator, qhbm.QHBM):
+      circuits, counts = density_operator.circuits(num_samples)
+    else:
+      raise TypeError()
 
+    if qhbm_model.ebm.has_operator:
+      expectation_shards = qhbm_model.qnn.pulled_back_expectation(
+          circuits, counts, qhbm_model.operator_shards)
+      expectation = qhbm_model.ebm.operator_expectation(expectation_shards)
+    else:
+      qnn_bitstrings, qnn_counts = qhbm_model.qnn.pulled_back_sample(
+          circuits, counts)
+      energies = qhbm_model.ebm.energy(qnn_bitstrings)
+      qnn_probs = tf.cast(qnn_counts, tf.float32) / tf.cast(
+          tf.reduce_sum(qnn_counts), tf.float32)
+      expectation = tf.reduce_sum(qnn_probs * energies)
+
+    # log_partition estimate
     if qhbm_model.ebm.is_analytic:
       log_partition_function = qhbm_model.log_partition_function()
     else:
-      bitstrings, _ = qhbm_model.ebm.sample(tf.reduce_sum(target_counts))
+      bitstrings, _ = qhbm_model.ebm.sample(tf.reduce_sum(counts))
       energies = qhbm_model.ebm.energy(bitstrings)
-      log_partition_function = tf.math.reduce_logsumexp(-1 * energies)
+      log_partition_function = tf.math.reduce_logsumexp(-energies)
 
-    # pulled back expectation of energy operator
-    qnn_bitstrings, qnn_counts = qhbm_model.qnn.pulled_back_sample(
-        target_circuits, target_counts)
-    qnn_probs = tf.cast(qnn_counts, tf.float32) / tf.cast(
-        tf.reduce_sum(qnn_counts), tf.float32)
-    energies = qhbm_model.ebm.energy(qnn_bitstrings)
-    avg_energy = tf.reduce_sum(qnn_probs * energies)
-
-    def grad(grad_y, variables=None):
+    def gradient(grad_y, variables=None):
       """Gradients are computed using estimators from the QHBM paper."""
-      # Thetas derivative.
-      ebm_bitstrings, ebm_counts = qhbm_model.ebm.sample(
-          tf.reduce_sum(target_counts))
-      ebm_probs = tf.cast(ebm_counts, tf.float32) / tf.cast(
-          tf.reduce_sum(ebm_counts), tf.float32)
-      with tf.GradientTape() as tape:
+      with tf.GradientTape(persistent=True) as tape:
         tape.watch(qhbm_model.ebm.trainable_variables)
-        qnn_energies = qhbm_model.ebm.energy(qnn_bitstrings)
-      # jacobian is a list over thetas, with ith entry a tensor of shape
-      # [tf.shape(qnn_energies)[0], tf.shape(thetas[i])[0]]
-      qnn_energy_jac = tape.jacobian(qnn_energies,
-                                     qhbm_model.ebm.trainable_variables)
+        if qhbm_model.ebm.has_operator:
+          tape.watch(qhbm_model.qnn.trainable_variables)
+          expectation_shards = qhbm_model.qnn.pulled_back_expectation(
+              circuits, counts, qhbm_model.operator_shards)
+          expectation = qhbm_model.ebm.operator_expectation(expectation_shards)
+        else:
+          energies = qhbm_model.ebm.energy(qnn_bitstrings)
+          expectation = tf.reduce_sum(qnn_probs * energies)
+      qnn_energy_grad = tape.gradient(expectation,
+                                      qhbm_model.ebm.trainable_variables)
+      if qhbm_model.ebm.has_operator:
+        grad_qnn = tape.gradient(expectation,
+                                 qhbm_model.qnn.trainable_variables)
+        grad_qnn = [grad_y * grad for grad in grad_qnn]
+      else:
+        raise NotImplementedError(
+            "Derivative when EBM has no operator is not yet supported.")
 
+      ebm_bitstrings, ebm_counts = qhbm_model.ebm.sample(tf.reduce_sum(counts))
+      ebm_probs = tf.cast(ebm_counts, tf.float32) / tf.cast(
+          tf.reduce_sum(ebm_counts), tf.float32)
       with tf.GradientTape() as tape:
         tape.watch(qhbm_model.ebm.trainable_variables)
-        ebm_energies = qhbm_model.ebm.energy(ebm_bitstrings)
-      ebm_energy_jac = tape.jacobian(ebm_energies,
-                                     qhbm_model.ebm.trainable_variables)
+        energies = qhbm_model.ebm.energy(ebm_bitstrings)
+        expectation = tf.reduce_sum(ebm_probs * energies)
+      ebm_energy_grad = tape.gradient(expectation,
+                                      qhbm_model.ebm.trainable_variables)
 
-      # contract over bitstring weights
       grad_ebm = [
-          grad_y *
-          (tf.reduce_sum(
-              tf.transpose(qnn_probs * tf.transpose(qnn_energy_grad)), 0) -
-           tf.reduce_sum(
-               tf.transpose(ebm_probs * tf.transpose(ebm_energy_grad)), 0))
-          for qnn_energy_grad, ebm_energy_grad in zip(qnn_energy_jac,
-                                                      ebm_energy_jac)
+          grad_y * (qnn_grad - ebm_grad)
+          for qnn_grad, ebm_grad in zip(qnn_energy_grad, ebm_energy_grad)
       ]
 
-      # Phis derivative.
-      if qhbm_model.ebm.has_operator:
-        with tf.GradientTape() as tape:
-          tape.watch(qhbm_model.qnn.trainable_variables)
-          energy_shards = qhbm_model.qnn.pulled_back_expectation(
-              target_circuits, target_counts, qhbm_model.operator_shards)
-          energy = qhbm_model.ebm.operator_expectation(energy_shards)
-        grad_qnn = tape.gradient(energy, qhbm_model.qnn.trainable_variables)
-        grad_qnn = [grad_y * g for g in grad_qnn]
-      else:
-        raise NotImplementedError(
-            "Derivative when EBM has no operator is not yet supported.")
       grad_qhbm = grad_ebm + grad_qnn
-      if variables is None:
-        return grad_qhbm
-      return grad_qhbm, [tf.zeros_like(g) for g in grad_qhbm]
+      if variables:
+        return grad_qhbm, [tf.zeros_like(var) for var in variables]
+      return grad_qhbm
 
-    return avg_energy + log_partition_function, grad
+    return expectation + log_partition_function, gradient
 
-  return loss(qhbm_model.trainable_variables)
+  return function(qhbm_model.trainable_variables)
diff --git a/qhbmlib/qnn.py b/qhbmlib/qnn.py
index 49a60d1b..f4c935be 100644
--- a/qhbmlib/qnn.py
+++ b/qhbmlib/qnn.py
@@ -62,12 +62,12 @@ def __init__(
         ignored if `values` is not None.
       backend: Optional Python `object` that specifies what backend TFQ will use
         for operations involving this QNN. Options are {"noisy", "noiseless"},
-        or however users may also specify a preconfigured cirq execution
-        object to use instead, which must inherit `cirq.Sampler`.
+        or however users may also specify a preconfigured cirq execution object
+        to use instead, which must inherit `cirq.Sampler`.
       differentiator: Either None or a `tfq.differentiators.Differentiator`,
         which specifies how to take the derivative of a quantum circuit.
-      is_analytic: bool flag that enables is_analytic methods. If True, then backend
-        must also be "noiseless".
+      is_analytic: bool flag that enables is_analytic methods. If True, then
+        backend must also be "noiseless".
       name: Identifier for this QNN.
     """
     super().__init__(name=name)
@@ -151,19 +151,24 @@ def copy(self):
         backend=self.backend,
         differentiator=self.differentiator,
         is_analytic=self.is_analytic,
-        name=self.name)
+        name=f'{self.name}_copy')
     qnn.values.assign(self.values)
     return qnn
 
-  def _sample_function(self,
-                       circuits,
-                       counts,
-                       mask=True,
-                       reduce=True,
-                       unique=True):
+  def _sample(self,
+              circuits,
+              counts,
+              symbol_names=None,
+              symbol_values=None,
+              mask=True,
+              reduce=True,
+              unique=True):
     """General function for sampling from circuits."""
     samples = self._sample_layer(
-        circuits, repetitions=tf.expand_dims(tf.math.reduce_max(counts), 0))
+        circuits,
+        symbol_names=symbol_names,
+        symbol_values=symbol_values,
+        repetitions=tf.expand_dims(tf.math.reduce_max(counts), 0))
     if mask:
       num_samples_mask = tf.cast((tf.ragged.range(counts) + 1).to_tensor(),
                                  tf.bool)
@@ -176,7 +181,13 @@ def _sample_function(self,
       samples = samples.values.to_tensor()
     return samples
 
-  def _expectation_function(self, circuits, counts, operators, reduce=True):
+  def _expectation(self,
+                   circuits,
+                   counts,
+                   operators,
+                   symbol_names=None,
+                   symbol_values=None,
+                   reduce=True):
     """General function for taking sampled expectations from circuits.
 
     `counts[i]` sets the weight of `circuits[i]` in the expectation.
@@ -185,21 +196,23 @@ def _expectation_function(self, circuits, counts, operators, reduce=True):
     """
     num_circuits = tf.shape(circuits)[0]
     num_operators = tf.shape(operators)[0]
-    tiled_values = tf.tile(tf.expand_dims(self.values, 0), [num_circuits, 1])
-    tiled_operators = tf.tile(tf.expand_dims(operators, 0), [num_circuits, 1])
+    if symbol_values is not None:
+      symbol_values = tf.tile(
+          tf.expand_dims(symbol_values, 0), [num_circuits, 1])
+    operators = tf.tile(tf.expand_dims(operators, 0), [num_circuits, 1])
     if self.backend == "noiseless":
       expectations = self._expectation_layer(
           circuits,
-          symbol_names=self.symbols,
-          symbol_values=tiled_values,
-          operators=tiled_operators,
+          symbol_names=symbol_names,
+          symbol_values=symbol_values,
+          operators=operators,
       )
     else:
       expectations = self._expectation_layer(
           circuits,
-          symbol_names=self.symbols,
-          symbol_values=tiled_values,
-          operators=tiled_operators,
+          symbol_names=symbol_names,
+          symbol_values=symbol_values,
+          operators=operators,
           repetitions=tf.tile(tf.expand_dims(counts, 1), [1, num_operators]),
       )
     if reduce:
@@ -226,8 +239,8 @@ def circuits(self, bitstrings, resolve=True):
       Args:
         bitstrings: 2D tensor of dtype `tf.int8` whose entries are bits. These
           specify the state inputs to use in the returned set of circuits.
-        resolve: bool which says whether or not to resolve the QNN
-          unitary before appending to the bit injection circuits.
+        resolve: bool which says whether or not to resolve the QNN unitary
+          before appending to the bit injection circuits.
 
       Returns:
         1D tensor of strings which represent the current QNN circuits.
@@ -254,7 +267,7 @@ def sample(self, bitstrings, counts, mask=True, reduce=True, unique=True):
           `self.u|bitstrings[i]>`.
     """
     circuits = self.circuits(bitstrings)
-    return self._sample_function(
+    return self._sample(
         circuits, counts, mask=mask, reduce=reduce, unique=unique)
 
   def expectation(self, bitstrings, counts, operators, reduce=True):
@@ -266,8 +279,8 @@ def expectation(self, bitstrings, counts, operators, reduce=True):
           relative weight of `bitstrings[i]` when computing expectations.
         operators: 1D tensor of strings, the result of calling
           `tfq.convert_to_tensor` on a list of cirq.PauliSum, `[op1, op2, ...]`.
-          Will be tiled to measure `<opj>_self.u_dagger|circuits[i]>`
-          for each i and j.
+          Will be tiled to measure `<opj>_self.u_dagger|circuits[i]>` for each i
+          and j.
         reduce: bool flag for whether or not to average over i.
 
       Returns:
@@ -276,16 +289,21 @@ def expectation(self, bitstrings, counts, operators, reduce=True):
         expectation values.
       """
     circuits = self.circuits(bitstrings, resolve=False)
-    return self._expectation_function(
-        circuits, counts, operators, reduce=reduce)
+    return self._expectation(
+        circuits,
+        counts,
+        operators,
+        symbol_names=self.symbols,
+        symbol_values=self.values,
+        reduce=reduce)
 
   def pulled_back_circuits(self, circuits, resolve=True):
     """Returns the pulled back circuits for this QNN given input quantum data.
 
       Args:
-        circuits: 1-D `tf.Tensor` of type `tf.string` which contains
-          circuits serialized by `tfq.convert_to_tensor`. These represent pure
-          state samples from the data density matrix.
+        circuits: 1-D `tf.Tensor` of type `tf.string` which contains circuits
+          serialized by `tfq.convert_to_tensor`. These represent pure state
+          samples from the data density matrix.
         resolve: bool tensor which says whether or not to resolve the QNN
           inverse unitary before appending to the data circuits.
 
@@ -310,34 +328,40 @@ def pulled_back_sample(self,
       from each circuit.
 
       Args:
-        circuits: 1-D `tf.Tensor` of type `tf.string` which contains
-          circuits serialized by `tfq.convert_to_tensor`. These represent pure
-          state samples from the data density matrix.
+        circuits: 1-D `tf.Tensor` of type `tf.string` which contains circuits
+          serialized by `tfq.convert_to_tensor`. These represent pure state
+          samples from the data density matrix.
         counts: 1-D `tf.Tensor` of type `tf.int32`, must be the same size as
-          `circuits`. Contains the number of samples to draw from each
-          input circuit.
+          `circuits`. Contains the number of samples to draw from each input
+          circuit.
 
       Returns:
         ragged_samples: `tf.RaggedTensor` of DType `tf.int8` structured such
             that `ragged_samples[i]` contains `counts[i]` bitstrings.
       """
     pulled_back_circuits = self.pulled_back_circuits(circuits)
-    return self._sample_function(
+    return self._sample(
         pulled_back_circuits, counts, mask=mask, reduce=reduce, unique=unique)
 
-  def pulled_back_expectation(self, circuits, counts, operators, reduce=True):
+  def pulled_back_expectation(self,
+                              circuits,
+                              counts,
+                              operators,
+                              symbol_names=None,
+                              symbol_values=None,
+                              reduce=True):
     """Returns the expectation values for a given pulled-back dataset.
 
       Args:
-        circuits: 1-D `tf.Tensor` of type `tf.string` which contains
-          circuits serialized by `tfq.convert_to_tensor`. These represent pure
-          state samples from the data density matrix.
+        circuits: 1-D `tf.Tensor` of type `tf.string` which contains circuits
+          serialized by `tfq.convert_to_tensor`. These represent pure state
+          samples from the data density matrix.
         counts: 1D tensor of dtype `tf.int32` such that `counts[i]` is the
           relative weight of `circuits[i]` when computing expectations.
         operators: 1D tensor of strings, the result of calling
           `tfq.convert_to_tensor` on a list of cirq.PauliSum, `[op1, op2, ...]`.
-          Will be tiled to measure `<opj>_self.u_dagger|circuits[i]>`
-          for each i and j, then averaged over i.
+          Will be tiled to measure `<opj>_self.u_dagger|circuits[i]>` for each i
+          and j, then averaged over i.
 
       Returns:
         1-D tensor of floats which are the bitstring-averaged expectation values
@@ -345,8 +369,21 @@ def pulled_back_expectation(self, circuits, counts, operators, reduce=True):
         expectation values.
     """
     pulled_back_circuits = self.pulled_back_circuits(circuits, resolve=False)
-    return self._expectation_function(
-        pulled_back_circuits, counts, operators, reduce=reduce)
+    if symbol_names is not None:
+      symbol_names = tf.reshape(tf.stack([symbol_names, self.symbols]), [-1])
+    else:
+      symbol_names = self.symbols
+    if symbol_values is not None:
+      symbol_values = tf.reshape(tf.stack([symbol_values, self.values]), [-1])
+    else:
+      symbol_values = self.values
+    return self._expectation(
+        pulled_back_circuits,
+        counts,
+        operators,
+        symbol_names=symbol_names,
+        symbol_values=symbol_values,
+        reduce=reduce)
 
   def pqc_unitary(self):
     if self.is_analytic:
diff --git a/qhbmlib/vqt.py b/qhbmlib/vqt.py
index e358ccd8..77fdcc73 100644
--- a/qhbmlib/vqt.py
+++ b/qhbmlib/vqt.py
@@ -15,9 +15,10 @@
 """Impementations of the VQT loss and its derivatives."""
 
 import tensorflow as tf
+from qhbmlib import qhbm
 
 
-def vqt(model, num_samples, hamiltonian, beta):
+def vqt(qhbm_model, hamiltonian, beta=1.0, num_samples=1000):
   """Computes the VQT loss of a given QHBM against given thermal state params.
 
   This function is differentiable within a `tf.GradientTape` scope.
@@ -37,43 +38,82 @@ def vqt(model, num_samples, hamiltonian, beta):
   """
 
   @tf.custom_gradient
-  def loss(trainable_variables):
-    bitstrings, counts = model.ebm.sample(num_samples)
+  def function(trainable_variables):
+    bitstrings, counts = qhbm_model.ebm.sample(num_samples)
     probs = tf.cast(counts, tf.float32) / tf.cast(num_samples, tf.float32)
-    expectation = tf.squeeze(
-        model.qnn.expectation(bitstrings, counts, hamiltonian), -1)
-    if model.is_analytic:
-      entropy = model.entropy()
+
+    if isinstance(hamiltonian, qhbm.QHBM):
+      circuits = qhbm_model.qnn.circuits(bitstrings, resolve=False)
+      if hamiltonian.ebm.has_operator:
+        expectation_shards = hamiltonian.qnn.pulled_back_expectation(
+            circuits,
+            counts,
+            hamiltonian.operator_shards,
+            symbol_names=qhbm_model.qnn.symbols,
+            symbol_values=qhbm_model.qnn.values)
+        expectation = hamiltonian.ebm.operator_expectation(expectation_shards)
+      else:
+        qnn_bitstrings, qnn_counts = hamiltonian.qnn.pulled_back_sample(
+            circuits,
+            counts,
+            symbol_names=qhbm_model.qnn.symbols,
+            symbol_values=qhbm_model.qnn.values)
+        energies = hamiltonian.ebm.energy(qnn_bitstrings)
+        qnn_probs = tf.cast(qnn_counts, tf.float32) / tf.cast(
+            tf.reduce_sum(qnn_counts), tf.float32)
+        expectation = tf.reduce_sum(qnn_probs * energies)
+    else:
+      expectation = tf.squeeze(
+          qhbm_model.qnn.expectation(bitstrings, counts, hamiltonian), -1)
+
+    if qhbm_model.is_analytic:
+      entropy = qhbm_model.entropy()
     else:
       entropy = -tf.reduce_sum(probs * tf.math.log(probs))
 
-    def grad(grad_y, variables=None):
+    def gradient(grad_y, variables=None):
       with tf.GradientTape() as tape:
-        tape.watch(model.qnn.trainable_variables)
-        beta_expectations = beta * tf.squeeze(
-            model.qnn.expectation(
-                bitstrings, counts, hamiltonian, reduce=False), -1)
+        tape.watch(qhbm_model.qnn.trainable_variables)
+        if isinstance(hamiltonian, qhbm.QHBM):
+          if hamiltonian.ebm.has_operator:
+            expectation_shards = hamiltonian.qnn.pulled_back_expectation(
+                circuits,
+                counts,
+                hamiltonian.operator_shards,
+                symbol_names=qhbm_model.qnn.symbols,
+                symbol_values=qhbm_model.qnn.values,
+                reduce=False)
+            beta_expectations = beta * hamiltonian.ebm.operator_expectation(
+                expectation_shards)
+          else:
+            raise NotImplementedError()
+        else:
+          beta_expectations = beta * tf.squeeze(
+              qhbm_model.qnn.expectation(
+                  bitstrings, counts, hamiltonian, reduce=False), -1)
         beta_expectation = tf.reduce_sum(probs * beta_expectations)
-      grad_qnn = tape.gradient(beta_expectation, model.qnn.trainable_variables)
-      grad_qnn = [grad_y * g for g in grad_qnn]
+      grad_qnn = tape.gradient(beta_expectation,
+                               qhbm_model.qnn.trainable_variables)
+      grad_qnn = [grad_y * grad for grad in grad_qnn]
 
       with tf.GradientTape() as tape:
-        tape.watch(model.ebm.trainable_variables)
-        energies = model.ebm.energy(bitstrings)
-      energy_jac = tape.jacobian(energies, model.ebm.trainable_variables)
+        tape.watch(qhbm_model.ebm.trainable_variables)
+        energies = qhbm_model.ebm.energy(bitstrings)
+      energy_jac = tape.jacobian(energies, qhbm_model.ebm.trainable_variables)
       probs_diffs = probs * (beta_expectations - energies)
-      avg_diff = tf.reduce_sum(probs_diffs)
       grad_ebm = [
           grad_y *
-          (avg_diff * tf.reduce_sum(tf.transpose(probs * tf.transpose(g)), 0) -
-           tf.reduce_sum(tf.transpose(probs_diffs * tf.transpose(g)), 0))
-          for g in energy_jac
+          (tf.reduce_sum(probs_diffs) *
+           tf.reduce_sum(tf.transpose(probs * tf.transpose(jac)), 0) -
+           tf.reduce_sum(tf.transpose(probs_diffs * tf.transpose(jac)), 0))
+          for jac in energy_jac
       ]
+
       grad_qhbm = grad_ebm + grad_qnn
-      if variables is None:
-        return grad_qhbm
-      return grad_qhbm, [tf.zeros_like(g) for g in grad_qhbm]
+      if variables:
+        return grad_qhbm, [tf.zeros_like(var) for var in variables]
+      return grad_qhbm
 
-    return beta * expectation - entropy, grad
+    return beta * expectation - entropy, gradient
 
-  return loss(model.trainable_variables)
+  return function(qhbm_model.trainable_variables)
diff --git a/tests/architectures_test.py b/tests/architectures_test.py
index 4b985526..37b41c41 100644
--- a/tests/architectures_test.py
+++ b/tests/architectures_test.py
@@ -12,311 +12,511 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests of the architectures module."""
-import random
+"""Selection of quantum circuits architectures used in defining QHBMs."""
+
+import bisect
 
-from absl.testing import parameterized
 import cirq
-from qhbmlib import architectures
 import sympy
-import tensorflow as tf
 import tensorflow_quantum as tfq
 
+from qhbmlib import ebm
+
+# ============================================================================ #
+# HEA components.
+# ============================================================================ #
+
+
+def get_xz_rotation(q, a, b):
+  """Two-axis single qubit rotation."""
+  return cirq.Circuit(cirq.X(q)**a, cirq.Z(q)**b)
+
+
+def get_xyz_rotation(q, a, b, c):
+  """General single qubit rotation."""
+  return cirq.Circuit(cirq.X(q)**a, cirq.Y(q)**b, cirq.Z(q)**c)
+
+
+def get_cz_exp(q0, q1, a):
+  """Exponent of entangling CZ gate."""
+  return cirq.Circuit(cirq.CZPowGate(exponent=a)(q0, q1))
+
+
+def get_zz_exp(q0, q1, a):
+  """Exponent of entangling ZZ gate."""
+  return cirq.Circuit(cirq.ZZPowGate(exponent=a)(q0, q1))
+
+
+def get_cnot_exp(q0, q1, a):
+  """Exponent of entangling CNot gate."""
+  return cirq.Circuit(cirq.CNotPowGate(exponent=a)(q0, q1))
+
+
+def get_xz_rotation_layer(qubits, layer_num, name):
+  """Apply two-axis single qubit rotations to all the given qubits."""
+  layer_symbols = []
+  circuit = cirq.Circuit()
+  for n, q in enumerate(qubits):
+    sx, sz = sympy.symbols("sx_{0}_{1}_{2} sz_{0}_{1}_{2}".format(
+        name, layer_num, n))
+    layer_symbols += [sx, sz]
+    circuit += get_xz_rotation(q, sx, sz)
+  return circuit, layer_symbols
+
+
+def get_cz_exp_layer(qubits, layer_num, name):
+  """Apply parameterized CZ gates to all pairs of nearest-neighbor qubits."""
+  layer_symbols = []
+  circuit = cirq.Circuit()
+  for n, (q0, q1) in enumerate(zip(qubits[::2], qubits[1::2])):
+    a = sympy.symbols("sc_{0}_{1}_{2}".format(name, layer_num, 2 * n))
+    layer_symbols += [a]
+    circuit += get_cz_exp(q0, q1, a)
+  shifted_qubits = qubits[1::]
+  for n, (q0, q1) in enumerate(zip(shifted_qubits[::2], shifted_qubits[1::2])):
+    a = sympy.symbols("sc_{0}_{1}_{2}".format(name, layer_num, 2 * n + 1))
+    layer_symbols += [a]
+    circuit += get_cz_exp(q0, q1, a)
+  return circuit, layer_symbols
+
+
+def get_hardware_efficient_model_unitary(qubits, num_layers, name):
+  """Build our full parameterized model unitary."""
+  circuit = cirq.Circuit()
+  all_symbols = []
+  for layer_num in range(num_layers):
+    new_circ, new_symb = get_xz_rotation_layer(qubits, layer_num, name)
+    circuit += new_circ
+    all_symbols += new_symb
+    if len(qubits) > 1:
+      new_circ, new_symb = get_cz_exp_layer(qubits, layer_num, name)
+      circuit += new_circ
+      all_symbols += new_symb
+  return circuit, all_symbols
+
+
+def get_cnot_exp_layer(qubits, layer_num, name):
+  """Apply CNot gates to all pairs of nearest-neighbor qubits."""
+  layer_symbols = []
+  circuit = cirq.Circuit()
+  for n, (q0, q1) in enumerate(zip(qubits[::2], qubits[1::2])):
+    a = sympy.symbols("sc_{0}_{1}_{2}".format(name, layer_num, 2 * n))
+    layer_symbols += [a]
+    circuit += get_cnot_exp(q0, q1, a)
+  shifted_qubits = qubits[1::]
+  for n, (q0, q1) in enumerate(zip(shifted_qubits[::2], shifted_qubits[1::2])):
+    a = sympy.symbols("sc_{0}_{1}_{2}".format(name, layer_num, 2 * n + 1))
+    layer_symbols += [a]
+    circuit += get_cnot_exp(q0, q1, a)
+  return circuit, layer_symbols
+
+
+def hea_1d_cnot(qubits, num_layers, name):
+  """Build our full parameterized model unitary."""
+  circuit = cirq.Circuit()
+  all_symbols = []
+  for layer_num in range(num_layers):
+    new_circ, new_symb = get_xz_rotation_layer(qubits, layer_num, name)
+    circuit += new_circ
+    all_symbols += new_symb
+    if len(qubits) > 1:
+      new_circ, new_symb = get_cnot_exp_layer(qubits, layer_num, name)
+      circuit += new_circ
+      all_symbols += new_symb
+  return circuit, all_symbols
+
+
+def get_zz_exp_layer(qubits, layer_num, name):
+  """Apply ZZ gates to all pairs of nearest-neighbor qubits."""
+  layer_symbols = []
+  circuit = cirq.Circuit()
+  for n, (q0, q1) in enumerate(zip(qubits[::2], qubits[1::2])):
+    a = sympy.symbols("sc_{0}_{1}_{2}".format(name, layer_num, 2 * n))
+    layer_symbols += [a]
+    circuit += get_zz_exp(q0, q1, a)
+  shifted_qubits = qubits[1::]
+  for n, (q0, q1) in enumerate(zip(shifted_qubits[::2], shifted_qubits[1::2])):
+    a = sympy.symbols("sc_{0}_{1}_{2}".format(name, layer_num, 2 * n + 1))
+    layer_symbols += [a]
+    circuit += get_zz_exp(q0, q1, a)
+  return circuit, layer_symbols
+
+
+def hea_1d_zz(qubits, num_layers, name):
+  """Build our full parameterized model unitary."""
+  circuit = cirq.Circuit()
+  all_symbols = []
+  for layer_num in range(num_layers):
+    new_circ, new_symb = get_xz_rotation_layer(qubits, layer_num, name)
+    circuit += new_circ
+    all_symbols += new_symb
+    if len(qubits) > 1:
+      new_circ, new_symb = get_zz_exp_layer(qubits, layer_num, name)
+      circuit += new_circ
+      all_symbols += new_symb
+  return circuit, all_symbols
+
+
+# ============================================================================ #
+# 2D HEA.
+# ============================================================================ #
+
+
+def get_2d_xz_rotation_layer(rows, cols, layer_num, name):
+  """Apply single qubit rotations on a grid of qubits."""
+  layer_symbols = []
+  circuit = cirq.Circuit()
+  for r in range(rows):
+    for c in range(cols):
+      sx = sympy.Symbol(f"sx_{name}_{layer_num}_{r}_{c}")
+      sz = sympy.Symbol(f"sz_{name}_{layer_num}_{r}_{c}")
+      layer_symbols += [sx, sz]
+      circuit += get_xz_rotation(cirq.GridQubit(r, c), sx, sz)
+  return circuit, layer_symbols
+
+
+def get_2d_cz_exp_layer(rows, cols, layer_num, name):
+  """Apply CZ gates to all pairs of nearest-neighbor qubits on a grid."""
+  layer_symbols = []
+  circuit = cirq.Circuit()
+  # Apply horizontal bonds
+  for r in range(rows):
+    for par in [0, 1]:
+      for q_c_0, q_c_1 in zip(range(par, cols, 2), range(par + 1, cols, 2)):
+        scz = sympy.Symbol(f"scz_{name}_{layer_num}_row{r}_{q_c_0}_{q_c_1}")
+        layer_symbols += [scz]
+        circuit += get_cz_exp(
+            cirq.GridQubit(r, q_c_0), cirq.GridQubit(r, q_c_1), scz)
+  # Apply vertical bonds
+  for c in range(cols):
+    for par in [0, 1]:
+      for q_r_0, q_r_1 in zip(range(par, rows, 2), range(par + 1, rows, 2)):
+        scz = sympy.Symbol(f"scz_{name}_{layer_num}_col{c}_{q_r_0}_{q_r_1}")
+        layer_symbols += [scz]
+        circuit += get_cz_exp(
+            cirq.GridQubit(q_r_0, c), cirq.GridQubit(q_r_1, c), scz)
+  return circuit, layer_symbols
+
+
+def get_2d_hea(rows, cols, num_layers, name):
+  """Build a 2D HEA ansatz.
+
+    Args:
+      rows: int specifying the number of rows in the ansatz.
+      cols: int specifying the number of columns in the ansatz.
+      num_layers: int specifying how many layers of 2D HEA to apply.
+      name: string which will be included in the parameters of the ansatz.
+
+    Returns:
+      circuit: `cirq.Circuit` which is the ansatz.
+      symbols: list of `sympy.Symbol`s which are the parameters of the model.
+    """
+  symbols = []
+  circuit = cirq.Circuit()
+  for layer in range(num_layers):
+    xz_circuit, xz_symbols = get_2d_xz_rotation_layer(rows, cols, layer, name)
+    circuit += xz_circuit
+    symbols += xz_symbols
+    cz_circuit, cz_symbols = get_2d_cz_exp_layer(rows, cols, layer, name)
+    circuit += cz_circuit
+    symbols += cz_symbols
+  return circuit, symbols
+
+
+def get_2d_xyz_rotation_layer(rows, cols, layer_num, name):
+  """Apply single qubit rotations on a grid of qubits."""
+  layer_symbols = []
+  circuit = cirq.Circuit()
+  for r in range(rows):
+    for c in range(cols):
+      sx = sympy.Symbol(f"sx_{name}_{layer_num}_{r}_{c}")
+      sy = sympy.Symbol(f"sy_{name}_{layer_num}_{r}_{c}")
+      sz = sympy.Symbol(f"sz_{name}_{layer_num}_{r}_{c}")
+      layer_symbols += [sx, sy, sz]
+      circuit += get_xyz_rotation(cirq.GridQubit(r, c), sx, sy, sz)
+  return circuit, layer_symbols
+
+
+def get_2d_hea_y(rows, cols, num_layers, name):
+  """Build a 2D HEA ansatz.
+
+    Args:
+      rows: int specifying the number of rows in the ansatz.
+      cols: int specifying the number of columns in the ansatz.
+      num_layers: int specifying how many layers of 2D HEA to apply.
+      name: string which will be included in the parameters of the ansatz.
+
+    Returns:
+      circuit: `cirq.Circuit` which is the ansatz.
+      symbols: list of `sympy.Symbol`s which are the parameters of the model.
+    """
+  symbols = []
+  circuit = cirq.Circuit()
+  for layer in range(num_layers):
+    xyz_circuit, xyz_symbols = get_2d_xyz_rotation_layer(
+        rows, cols, layer, name)
+    circuit += xyz_circuit
+    symbols += xyz_symbols
+    cz_circuit, cz_symbols = get_2d_cz_exp_layer(rows, cols, layer, name)
+    circuit += cz_circuit
+    symbols += cz_symbols
+  return circuit, symbols
+
+
+def get_2d_cnot_exp_layer(rows, cols, layer_num, name):
+  """Apply CNot gates to all pairs of nearest-neighbor qubits on a grid."""
+  layer_symbols = []
+  circuit = cirq.Circuit()
+  # Apply horizontal bonds
+  for r in range(rows):
+    for par in [0, 1]:
+      for q_c_0, q_c_1 in zip(range(par, cols, 2), range(par + 1, cols, 2)):
+        scnot = sympy.Symbol(f"scnot_{name}_{layer_num}_row{r}_{q_c_0}_{q_c_1}")
+        layer_symbols += [scnot]
+        circuit += get_cnot_exp(
+            cirq.GridQubit(r, q_c_0), cirq.GridQubit(r, q_c_1), scnot)
+  # Apply vertical bonds
+  for c in range(cols):
+    for par in [0, 1]:
+      for q_r_0, q_r_1 in zip(range(par, rows, 2), range(par + 1, rows, 2)):
+        scnot = sympy.Symbol(f"scnot_{name}_{layer_num}_col{c}_{q_r_0}_{q_r_1}")
+        layer_symbols += [scnot]
+        circuit += get_cnot_exp(
+            cirq.GridQubit(q_r_0, c), cirq.GridQubit(q_r_1, c), scnot)
+  return circuit, layer_symbols
+
+
+def get_2d_hea_cnot(rows, cols, num_layers, name):
+  """Build a 2D HEA ansatz.
+
+    Args:
+      rows: int specifying the number of rows in the ansatz.
+      cols: int specifying the number of columns in the ansatz.
+      num_layers: int specifying how many layers of 2D HEA to apply.
+      name: string which will be included in the parameters of the ansatz.
+
+    Returns:
+      circuit: `cirq.Circuit` which is the ansatz.
+      symbols: list of `sympy.Symbol`s which are the parameters of the model.
+    """
+  symbols = []
+  circuit = cirq.Circuit()
+  for layer in range(num_layers):
+    xz_circuit, xz_symbols = get_2d_xz_rotation_layer(rows, cols, layer, name)
+    circuit += xz_circuit
+    symbols += xz_symbols
+    cnot_circuit, cnot_symbols = get_2d_cnot_exp_layer(rows, cols, layer, name)
+    circuit += cnot_circuit
+    symbols += cnot_symbols
+  return circuit, symbols
+
+
+# ============================================================================ #
+# Trotter components.
+# ============================================================================ #
+
+
+def get_trotter_model_unitary(p, h_list, name):
+  """Get a trotterized ansatz.
+
+    Args:
+      p: integer representing the number of QAOA steps.
+      h_list: List of `cirq.PauliSum`s representing the Hamiltonians to
+        exponentiate to build the circuit.
+      name: string used to make symbols unique to this call.
+
+    Returns:
+      circuit: `cirq.Circuit` representing the parameterized QAOA ansatz.
+      all_symbols: Python `list` of `sympy.Symbol`s containing all the
+          parameters of the circuit.
+    """
+  circuit = cirq.Circuit()
+  all_symbols = []
+  for j in range(p):
+    for n, h in enumerate(h_list):
+      new_symb = sympy.Symbol("phi_{0}_L{1}_H{2}".format(name, j, n))
+      circuit += tfq.util.exponential([h], coefficients=[new_symb])
+      all_symbols.append(new_symb)
+  return circuit, all_symbols
+
+
+# ============================================================================ #
+# QNHF components.
+# ============================================================================ #
+
+
+def get_general_trotter_unitary(symbol_array, h_list):
+  """
+    Args:
+      symbol_array: 2-D array (list of lists) of `sympy.Symbol`s to use when
+        exponentiating the Hamiltonians.  The first index is the trotter layer,
+        the second index is corresponding Hamiltonian in `h_list`.
+      h_list: List of `cirq.PauliSum`s representing all the Hamiltonians to
+        exponentiate in a single trotter step.
+
+    Returns:
+      circuit: `cirq.Circuit` representing the parameterized trotter ansatz.
+    """
+  if len(symbol_array[0]) != len(h_list):
+    raise ValueError(
+        "Must have the same number of symbols as Hamiltonians in each layer.")
+  circuit = cirq.Circuit()
+  for s_layer in symbol_array:
+    for n, h in enumerate(h_list):
+      circuit += tfq.util.exponential([h], coefficients=[s_layer[n]])
+  return circuit
+
+
+def get_qnhf_symbols(p, n_bits, n_h, max_k, name):
+  """Get all the symbols used by QNHF QHBMs.
+
+    Args:
+      p: the number of trotter steps.
+      n_bits: the number of bits in the discrete sample space.
+      max_k: the maximum locality of interactions in the classical model.
+      n_h: the number of non-classical hamiltonian terms.
+      name: string appended to symbols to uniqueify across QHBMs.
+
+    Returns:
+      eta_theta_symbols: 2-D array of `sympy.Symbol`s, where the first index is
+      the
+        trotter step and the second index is the particular symbol at that
+        layer.
+      phis_symbols: 2-D array of `sympy.Symbol`s, where the first index is the
+        trotter step and the second index is the hamiltonian term at that layer.
+    """
+  eta_theta_symbols = []
+  phis_symbols = []
+  num_thetas_per_layer = ebm.get_klocal_energy_function_num_values(
+      n_bits, max_k)
+  for t_step in range(p):
+    eta_theta_symbols.append([])
+    for j in range(num_thetas_per_layer):
+      eta_theta_symbols[-1].append(
+          sympy.Symbol("eta_L{1}_theta_T{2}_{0}".format(name, t_step, j)))
+    phis_symbols.append([])
+    for m in range(n_h):
+      phis_symbols[-1].append(
+          sympy.Symbol("phi_L{1}_H{2}_{0}".format(name, t_step, m)))
+  return eta_theta_symbols, phis_symbols
+
+
+def get_qnhf_diagonal_operators(qubits, max_k):
+  diag_op_list = []
+  for k in range(1, max_k + 1):
+    index_list = ebm.get_parity_index_list(len(qubits), k)
+    for this_index_list in index_list:
+      this_z_list = [cirq.Z(qubits[i]) for i in this_index_list]
+      diag_op_list.append(
+          cirq.PauliSum.from_pauli_strings(cirq.PauliString(*this_z_list)))
+  return diag_op_list
+
+
+def get_qnhf_model_unitary(p, qubits, max_k, h_list, name):
+  """Get the QNHF unitary corresponding to the given Hamiltonians.
+
+    Args:
+      p: the number of trotter steps.
+      qubits: list of `cirq.GridQubit`s on which to build KOBE.
+      max_k: the maximum locality of interactions in the classical model.
+      h_list: list of `cirq.PauliSum`s representing the hamiltonian terms.
+      name: string appended to symbols to uniqueify across QHBMs.
+
+    Returns:
+      circuit: `cirq.Circuit` representing the QNHF ansatz.
+    """
+  eta_theta_symbols, phis_symbols = get_qnhf_symbols(p, len(qubits),
+                                                     len(h_list), max_k, name)
+  total_symbols = []
+  for t_layer, p_layer in zip(eta_theta_symbols, phis_symbols):
+    total_symbols.append(t_layer + p_layer)
+  classical_h_list = get_qnhf_diagonal_operators(qubits, max_k)
+  total_h_list = classical_h_list + h_list
+  return get_general_trotter_unitary(total_symbols, total_h_list)
+
+
+# ============================================================================ #
+# Convolutional components.
+# ============================================================================ #
+
+
+def qubits_to_grid(qubits):
+  qubit_grid = []
+  for q in qubits:
+    if q.row > len(qubit_grid) - 1:
+      qubit_grid.append([])
+    bisect.insort(qubit_grid[q.row], q)
+  return qubit_grid
+
+
+def one_qubit_unitary(q, symbols):
+  """Make a Cirq circuit for an arbitrary one qubit unitary."""
+  return cirq.Circuit(
+      cirq.X(q)**symbols[0],
+      cirq.Y(q)**symbols[1],
+      cirq.Z(q)**symbols[2])
+
+
+def two_qubit_unitary(q0, q1, symbols):
+  """Make a Cirq circuit for an arbitrary two qubit unitary."""
+  circuit = cirq.Circuit()
+  circuit += one_qubit_unitary(q0, symbols[0:3])
+  circuit += one_qubit_unitary(q1, symbols[3:6])
+  circuit += [cirq.ZZ(q0, q1)**symbols[6]]
+  circuit += [cirq.YY(q0, q1)**symbols[7]]
+  circuit += [cirq.XX(q0, q1)**symbols[8]]
+  circuit += one_qubit_unitary(q0, symbols[9:12])
+  circuit += one_qubit_unitary(q1, symbols[12:])
+  return circuit
+
+
+def two_qubit_pool(source_qubit, sink_qubit, symbols):
+  """Make a Cirq circuit to do a parameterized 'pooling' operation, which
+
+    attempts to reduce entanglement down from two qubits to just one.
+  """
+  pool_circuit = cirq.Circuit()
+  sink_basis_selector = one_qubit_unitary(sink_qubit, symbols[0:3])
+  source_basis_selector = one_qubit_unitary(source_qubit, symbols[3:6])
+  pool_circuit.append(sink_basis_selector)
+  pool_circuit.append(source_basis_selector)
+  pool_circuit.append(cirq.CNOT(control=source_qubit, target=sink_qubit))
+  pool_circuit.append(sink_basis_selector**-1)
+  return pool_circuit
+
+
+# TODO(#19)
+# def quantum_convolutional_layer(qubits, layer_num, name):
+#     """Assumes the qubits are arranged on a grid."""
+#     qubit_grid = qubits_to_grid(qubits)
+#     layer_symbols = []
+#     circuit = cirq.Circuit()
+#     tied_2q_symbols = [
+#         sympy.Symbol("s_conv_I{0}_L{1}_N{2}".format(name, layer_num, s_num))
+#         for s_num in range(15)
+#     ]
+#     # Apply horizontal bonds
+#     for r in qubit_grid:
+#         r_clipped = r[1:]
+#         for alt_r in [r, r_clipped]:
+#             for q0, q1 in zip(alt_r[::2], alt_r[1::2]):
+#                 circuit += two_qubit_unitary(q0, q1, tied_2q_symbols)
+#     # Apply vertical bonds
+#     grid_clipped = qubit_grid[1:]
+#     for r0, r1 in zip(qubit_grid[::2], qubit_grid[1::2]):
+#         for q0, q1 in zip(r0, r1):
+#             circuit += two_qubit_unitary(q0, q1, tied_2q_symbols)
+#     for r0, r1 in zip(grid_clipped[::2], grid_clipped[1::2]):
+#         for q0, q1 in zip(r0, r1):
+#             circuit += two_qubit_unitary(q0, q1, tied_2q_symbols)
+#     return circuit, tied_2q_symbols
 
-class RPQCTest(tf.test.TestCase, parameterized.TestCase):
-  """Test RPQC functions in the architectures module."""
-
-  def test_get_xz_rotation(self):
-    """Confirm an XZ rotation is returned."""
-    q = cirq.GridQubit(7, 9)
-    a, b = sympy.symbols("a b")
-    expected_circuit = cirq.Circuit(cirq.X(q)**a, cirq.Z(q)**b)
-    test_circuit = architectures.get_xz_rotation(q, a, b)
-    self.assertEqual(expected_circuit, test_circuit)
-
-  def test_get_cz_exp(self):
-    """Confirm an exponentiated CNOT is returned."""
-    q0 = cirq.GridQubit(4, 1)
-    q1 = cirq.GridQubit(2, 5)
-    a = sympy.Symbol("a")
-    expected_circuit = cirq.Circuit(cirq.CZ(q0, q1)**a)
-    test_circuit = architectures.get_cz_exp(q0, q1, a)
-    self.assertEqual(expected_circuit, test_circuit)
-
-  def test_get_xz_rotation_layer(self):
-    """Confirm an XZ rotation on every qubit is returned."""
-    qubits = cirq.GridQubit.rect(1, 2)
-    layer_num = 3
-    name = "test_rot"
-    expected_symbols = []
-    expected_circuit = cirq.Circuit()
-    for n, q in enumerate(qubits):
-      expected_symbols.append(
-          sympy.Symbol("sx_{0}_{1}_{2}".format(name, layer_num, n)))
-      expected_circuit += cirq.Circuit(cirq.X(q)**expected_symbols[-1])
-      expected_symbols.append(
-          sympy.Symbol("sz_{0}_{1}_{2}".format(name, layer_num, n)))
-      expected_circuit += cirq.Circuit(cirq.Z(q)**expected_symbols[-1])
-    test_circuit, test_symbols = architectures.get_xz_rotation_layer(
-        qubits, layer_num, name)
-    self.assertEqual(expected_circuit, test_circuit)
-    self.assertEqual(expected_symbols, test_symbols)
-    # Confirm all symbols are unique
-    self.assertEqual(len(expected_symbols), len(set(test_symbols)))
-
-  @parameterized.parameters([{"n_qubits": 11}, {"n_qubits": 12}])
-  def test_get_cz_exp_layer(self, n_qubits):
-    """Confirm an exponentiated CZ on every qubit is returned."""
-    qubits = cirq.GridQubit.rect(1, n_qubits)
-    layer_num = 0
-    name = "test_cz"
-    expected_symbols = []
-    expected_circuit = cirq.Circuit()
-    for n, (q0, q1) in enumerate(zip(qubits, qubits[1:])):
-      if n % 2 == 0:
-        expected_symbols.append(
-            sympy.Symbol("sc_{0}_{1}_{2}".format(name, layer_num, n)))
-        expected_circuit += cirq.Circuit(cirq.CZ(q0, q1)**expected_symbols[-1])
-    for n, (q0, q1) in enumerate(zip(qubits, qubits[1:])):
-      if n % 2 == 1:
-        expected_symbols.append(
-            sympy.Symbol("sc_{0}_{1}_{2}".format(name, layer_num, n)))
-        expected_circuit += cirq.Circuit(cirq.CZ(q0, q1)**expected_symbols[-1])
-    test_circuit, test_symbols = architectures.get_cz_exp_layer(
-        qubits, layer_num, name)
-    self.assertEqual(expected_circuit, test_circuit)
-    self.assertEqual(expected_symbols, test_symbols)
-    # Confirm all symbols are unique
-    self.assertEqual(len(expected_symbols), len(set(test_symbols)))
-
-  @parameterized.parameters([{"n_qubits": 11}, {"n_qubits": 12}])
-  def test_get_hardware_efficient_model_unitary(self, n_qubits):
-    """Confirm a multi-layered circuit is returned."""
-    qubits = cirq.GridQubit.rect(1, n_qubits)
-    name = "test_hardware_efficient_model"
-    expected_symbols = []
-    expected_circuit = cirq.Circuit()
-    this_circuit, this_symbols = architectures.get_xz_rotation_layer(
-        qubits, 0, name)
-    expected_symbols += this_symbols
-    expected_circuit += this_circuit
-    this_circuit, this_symbols = architectures.get_cz_exp_layer(qubits, 0, name)
-    expected_symbols += this_symbols
-    expected_circuit += this_circuit
-    this_circuit, this_symbols = architectures.get_xz_rotation_layer(
-        qubits, 1, name)
-    expected_symbols += this_symbols
-    expected_circuit += this_circuit
-    this_circuit, this_symbols = architectures.get_cz_exp_layer(qubits, 1, name)
-    expected_symbols += this_symbols
-    expected_circuit += this_circuit
-    test_circuit, test_symbols = architectures.get_hardware_efficient_model_unitary(
-        qubits, 2, name)
-    self.assertEqual(expected_circuit, test_circuit)
-    self.assertEqual(expected_symbols, test_symbols)
-    # Confirm all symbols are unique
-    self.assertEqual(len(expected_symbols), len(set(test_symbols)))
-
-  def test_get_hardware_efficient_model_unitary_1q(self):
-    """Confirm the correct model is returned when there is only one qubit."""
-    qubits = [cirq.GridQubit(2, 3)]
-    name = "test_harware_efficient_model_1q"
-    expected_symbols = []
-    expected_circuit = cirq.Circuit()
-    this_circuit, this_symbols = architectures.get_xz_rotation_layer(
-        qubits, 0, name)
-    expected_symbols += this_symbols
-    expected_circuit += this_circuit
-    this_circuit, this_symbols = architectures.get_xz_rotation_layer(
-        qubits, 1, name)
-    expected_symbols += this_symbols
-    expected_circuit += this_circuit
-    test_circuit, test_symbols = architectures.get_hardware_efficient_model_unitary(
-        qubits, 2, name)
-    self.assertEqual(expected_circuit, test_circuit)
-    self.assertEqual(expected_symbols, test_symbols)
-    # Confirm all symbols are unique
-    self.assertEqual(len(expected_symbols), len(set(test_symbols)))
-
-
-class HEA2dTest(tf.test.TestCase):
-  """Test 2D HEA functions in the architectures module."""
-
-  def test_get_2d_xz_rotation_layer(self):
-    """Confirms the xz rotations are correct on a 2x3 grid."""
-    rows = 2
-    cols = 3
-    name = "test_xz"
-    layer_num = 7
-    circuit_expect = cirq.Circuit()
-    symbols_expect = []
-    for r in range(rows):
-      for c in range(cols):
-        q = cirq.GridQubit(r, c)
-        s = sympy.Symbol(f"sx_{name}_{layer_num}_{r}_{c}")
-        x_gate = cirq.X(q)**s
-        symbols_expect.append(s)
-        s = sympy.Symbol(f"sz_{name}_{layer_num}_{r}_{c}")
-        z_gate = cirq.Z(q)**s
-        symbols_expect.append(s)
-        circuit_expect += cirq.Circuit(x_gate, z_gate)
-    test_circuit, test_symbols = architectures.get_2d_xz_rotation_layer(
-        rows, cols, layer_num, name)
-    self.assertEqual(circuit_expect, test_circuit)
-    self.assertEqual(symbols_expect, test_symbols)
-
-  def test_get_2d_xz_rotation_layer_small(self):
-    """Confirms the xz rotation layer on one qubit is just a single xz."""
-    name = "test_small_xz"
-    layer_num = 29
-    circuit_expect = cirq.Circuit()
-    symbols_expect = []
-    q = cirq.GridQubit(0, 0)
-    s = sympy.Symbol(f"sx_{name}_{layer_num}_{0}_{0}")
-    x_gate = cirq.X(q)**s
-    symbols_expect.append(s)
-    s = sympy.Symbol(f"sz_{name}_{layer_num}_{0}_{0}")
-    z_gate = cirq.Z(q)**s
-    symbols_expect.append(s)
-    circuit_expect += cirq.Circuit(x_gate, z_gate)
-    test_circuit, test_symbols = architectures.get_2d_xz_rotation_layer(
-        1, 1, layer_num, name)
-    self.assertEqual(circuit_expect, test_circuit)
-    self.assertEqual(symbols_expect, test_symbols)
-
-  def test_get_2d_cz_exp_layer(self):
-    """Confirms the cz exponentials are correct on a 2x3 grid."""
-    name = "test_cz"
-    layer_num = 19
-    circuit_expect = cirq.Circuit()
-    symbols_expect = []
-
-    # Apply horizontal bonds
-    s = sympy.Symbol(f"scz_{name}_{layer_num}_row{0}_{0}_{1}")
-    circuit_expect += cirq.Circuit(
-        cirq.CZPowGate(exponent=s)(cirq.GridQubit(0, 0), cirq.GridQubit(0, 1)))
-    symbols_expect.append(s)
-    s = sympy.Symbol(f"scz_{name}_{layer_num}_row{0}_{1}_{2}")
-    circuit_expect += cirq.Circuit(
-        cirq.CZPowGate(exponent=s)(cirq.GridQubit(0, 1), cirq.GridQubit(0, 2)))
-    symbols_expect.append(s)
-    s = sympy.Symbol(f"scz_{name}_{layer_num}_row{1}_{0}_{1}")
-    circuit_expect += cirq.Circuit(
-        cirq.CZPowGate(exponent=s)(cirq.GridQubit(1, 0), cirq.GridQubit(1, 1)))
-    symbols_expect.append(s)
-    s = sympy.Symbol(f"scz_{name}_{layer_num}_row{1}_{1}_{2}")
-    circuit_expect += cirq.Circuit(
-        cirq.CZPowGate(exponent=s)(cirq.GridQubit(1, 1), cirq.GridQubit(1, 2)))
-    symbols_expect.append(s)
-
-    # Apply vertical bonds
-    s = sympy.Symbol(f"scz_{name}_{layer_num}_col{0}_{0}_{1}")
-    circuit_expect += cirq.Circuit(
-        cirq.CZPowGate(exponent=s)(cirq.GridQubit(0, 0), cirq.GridQubit(1, 0)))
-    symbols_expect.append(s)
-    s = sympy.Symbol(f"scz_{name}_{layer_num}_col{1}_{0}_{1}")
-    circuit_expect += cirq.Circuit(
-        cirq.CZPowGate(exponent=s)(cirq.GridQubit(0, 1), cirq.GridQubit(1, 1)))
-    symbols_expect.append(s)
-    s = sympy.Symbol(f"scz_{name}_{layer_num}_col{2}_{0}_{1}")
-    circuit_expect += cirq.Circuit(
-        cirq.CZPowGate(exponent=s)(cirq.GridQubit(0, 2), cirq.GridQubit(1, 2)))
-    symbols_expect.append(s)
-
-    test_circuit, test_symbols = architectures.get_2d_cz_exp_layer(
-        2, 3, layer_num, name)
-    self.assertEqual(circuit_expect, test_circuit)
-    self.assertEqual(symbols_expect, test_symbols)
-
-  def test_get_2d_cz_exp_layer_empty(self):
-    """On single qubit, no gates should be returned."""
-    test_circuit, test_symbols = architectures.get_2d_cz_exp_layer(1, 1, 1, "")
-    self.assertEqual(cirq.Circuit(), test_circuit)
-    self.assertEqual([], test_symbols)
-
-  def test_get_2d_cz_exp_layer_small(self):
-    """Tests on 2 qubits."""
-    name = "small"
-    layer_num = 51
-    s = sympy.Symbol(f"scz_{name}_{layer_num}_row{0}_{0}_{1}")
-    circuit_expect = cirq.Circuit(
-        cirq.CZPowGate(exponent=s)(cirq.GridQubit(0, 0), cirq.GridQubit(0, 1)))
-    symbols_expect = [s]
-    test_circuit, test_symbols = architectures.get_2d_cz_exp_layer(
-        1, 2, layer_num, name)
-    self.assertEqual(circuit_expect, test_circuit)
-    self.assertEqual(symbols_expect, test_symbols)
-
-  def test_get_2d_hea(self):
-    """Confirms the hea is correct on a 2x3 grid."""
-    num_layers = 2
-    name = "test_hea"
-    circuit_expect = cirq.Circuit()
-    symbols_expect = []
-    for layer in range(num_layers):
-      xz_circuit, xz_symbols = architectures.get_2d_xz_rotation_layer(
-          2, 3, layer, name)
-      cz_circuit, cz_symbols = architectures.get_2d_cz_exp_layer(
-          2, 3, layer, name)
-      circuit_expect += xz_circuit
-      symbols_expect += xz_symbols
-      circuit_expect += cz_circuit
-      symbols_expect += cz_symbols
-    test_circuit, test_symbols = architectures.get_2d_hea(2, 3, 2, name)
-    self.assertEqual(circuit_expect, test_circuit)
-    self.assertEqual(symbols_expect, test_symbols)
-
-
-class TrotterTest(tf.test.TestCase, parameterized.TestCase):
-  """Test trotter functions in the architectures module."""
-
-  def test_get_trotter_model_unitary(self):
-    """Confirm correct trotter unitary and parameters are returned."""
-    n_qubits = 4
-    qubits = cirq.GridQubit.rect(1, n_qubits)
-    p = 7
-    hz = cirq.PauliSum()
-    hx = cirq.PauliSum()
-    test_name = "test_trotter"
-    for q in qubits:
-      hz += cirq.PauliString(random.uniform(-4.5, 4.5), cirq.Z(q))
-      hx += cirq.PauliString(cirq.X(q))
-    for q0, q1 in zip(qubits[:-1], qubits[1:]):
-      hz += cirq.PauliString(random.uniform(-4.5, 4.5), cirq.Z(q0), cirq.Z(q1))
-    gammas = [
-        sympy.Symbol("phi_test_trotter_L{}_H0".format(j)) for j in range(p)
-    ]
-    betas = [
-        sympy.Symbol("phi_test_trotter_L{}_H1".format(j)) for j in range(p)
-    ]
-    expected_symbols = []
-    for g, b in zip(gammas, betas):
-      expected_symbols += [g, b]
-    expected_circuit = cirq.Circuit()
-    x_circuit = cirq.PauliSum()
-    for q in qubits:
-      x_circuit += cirq.X(q)
-    for j in range(p):
-      expected_circuit += tfq.util.exponential([hz], coefficients=[gammas[j]])
-      expected_circuit += tfq.util.exponential([x_circuit],
-                                               coefficients=[betas[j]])
-    test_circuit, test_symbols = architectures.get_trotter_model_unitary(
-        p, [hz, hx], test_name)
-    self.assertEqual(expected_circuit, test_circuit)
-    self.assertAllEqual(expected_symbols, test_symbols)
-
-
-class ConvolutionalTest(tf.test.TestCase, parameterized.TestCase):
-  """Test convolutional functions in the architectures module."""
-
-  def test_one_qubit_unitary(self):
-    pass
-
-  def test_two_qubit_unitary(self):
-    pass
-
-  def test_two_qubit_pool(self):
-    pass
-
-  def test_get_convolutional_model_unitary(self):
-    pass
+# def quantum_pool_layer(qubits, layer_num, name, direction_flag):
+#     """Assumes the qubits are arranged on a grid."""
+#     qubit_grid = qubits_to_grid(qubits)
+#     layer_symbols = []
+#     circuit = cirq.Circuit()
+#     tied_pool_symbols = [
+#         sympy.Symbol("s_pool_I{0}_L{1}_N{2}".format(name, layer_num, s_num))
+#         for s_num in range(6)
+#     ]
\ No newline at end of file
diff --git a/tests/qmhl_test.py b/tests/qmhl_test.py
index c9a4a1b0..16b7817b 100644
--- a/tests/qmhl_test.py
+++ b/tests/qmhl_test.py
@@ -49,7 +49,7 @@ def test_zero_grad(self):
       target_samples = tf.constant(1e6)
       target_circuits, target_counts = target.circuits(target_samples)
       with tf.GradientTape() as tape:
-        loss = qmhl.qmhl(qhbm_model, target_circuits, target_counts)
+        loss = qmhl.qmhl(qhbm_model, (target_circuits, target_counts))
       thetas_grads, phis_grads = tape.gradient(
           loss, (qhbm_model.ebm.trainable_variables,
                  qhbm_model.qnn.trainable_variables))
@@ -139,7 +139,7 @@ def test_loss_value_x_rot(self):
         target_states = tfq.convert_to_tensor(target_states_list)
 
         with tf.GradientTape() as tape:
-          actual_loss = qmhl_func(test_qhbm, target_states, target_counts)
+          actual_loss = qmhl_func(test_qhbm, (target_states, target_counts))
         # TODO(zaqqwerty): add way to use a log QHBM as observable on states
         expected_expectation = tf.reduce_sum(
             test_thetas * (2 * data_probs - 1) * tf.math.cos(alphas) *
@@ -158,6 +158,27 @@ def test_loss_value_x_rot(self):
         self.assertAllClose(
             actual_phis_grads, expected_phis_grads, atol=ATOL, rtol=RTOL)
 
+  def test_modular_hamiltonian(self):
+    """Confirm correct gradients and loss at the optimal settings."""
+    for num_qubits in [1, 2, 3, 4, 5]:
+      qubits = cirq.GridQubit.rect(1, num_qubits)
+      qhbm_model = test_util.get_random_qhbm(
+          qubits, 1, "QMHLLossTest{}".format(num_qubits))
+      qhbm_model_copy = qhbm_model.copy()
+
+      # Get the QMHL loss gradients
+      qhbm_model_samples = tf.constant(1e6)
+      with tf.GradientTape() as tape:
+        loss = qmhl.qmhl(
+            qhbm_model, qhbm_model_copy, num_samples=qhbm_model_samples)
+      thetas_grads, phis_grads = tape.gradient(
+          loss, (qhbm_model.ebm.trainable_variables,
+                 qhbm_model.qnn.trainable_variables))
+      self.assertAllClose(loss, qhbm_model.ebm.entropy(), atol=5e-3)
+      self.assertAllClose(
+          thetas_grads, tf.zeros(tf.shape(thetas_grads)), atol=5e-3)
+      self.assertAllClose(phis_grads, tf.zeros(tf.shape(phis_grads)), atol=5e-3)
+
   def test_hypernetwork(self):
     for num_qubits in [1, 2, 3, 4, 5]:
       qubits = cirq.GridQubit.rect(1, num_qubits)
@@ -197,7 +218,7 @@ def test_hypernetwork(self):
               tf.reshape(output[index:index + size], shape))
           index += size
         qhbm_model.trainable_variables = output_trainable_variables
-        loss = qmhl.qmhl(qhbm_model, target_circuits, target_counts)
+        loss = qmhl.qmhl(qhbm_model, (target_circuits, target_counts))
 
       grads = tape.gradient(loss, [
           hypernetwork.trainable_variables, output,
@@ -228,7 +249,7 @@ def test_hypernetwork(self):
               tf.reshape(output[index:index + size], shape))
           index += size
         qhbm_model.trainable_variables = output_trainable_variables
-        loss = qmhl.qmhl(qhbm_model, target_circuits, target_counts)
+        loss = qmhl.qmhl(qhbm_model, (target_circuits, target_counts))
       grads = tape.gradient(loss, [c, qhbm_model.trainable_variables])
       c_grad = grads[0]
       qhbm_grads = grads[1]
diff --git a/tests/vqt_test.py b/tests/vqt_test.py
index 115ec2b1..556ba43d 100644
--- a/tests/vqt_test.py
+++ b/tests/vqt_test.py
@@ -55,8 +55,9 @@ def test_loss_consistency(self):
       for _ in range(num_random_hamiltonians):
         cirq_ham = test_util.get_random_pauli_sum(self.raw_qubits)
         tf_ham = tfq.convert_to_tensor([cirq_ham])
-        loss = vqt.vqt(test_qhbm, num_samples, tf_ham, beta)
-        loss_copy = vqt.vqt(test_qhbm_copy, num_samples, tf_ham, beta)
+        loss = vqt.vqt(test_qhbm, tf_ham, beta=beta, num_samples=num_samples)
+        loss_copy = vqt.vqt(
+            test_qhbm_copy, tf_ham, beta=beta, num_samples=num_samples)
         self.assertAllClose(loss_copy, loss, rtol=RTOL)
 
   def test_zero_grad(self):
@@ -72,7 +73,11 @@ def test_zero_grad(self):
     test_qnn.values.assign(tf.constant([1.0]))
     test_qhbm = qhbm.QHBM(test_ebm, test_qnn)
     with tf.GradientTape() as tape:
-      loss = vqt.vqt(test_qhbm, tf.constant(int(5e6)), tf_ham, tf.constant(1.0))
+      loss = vqt.vqt(
+          test_qhbm,
+          tf_ham,
+          beta=tf.constant(1.0),
+          num_samples=tf.constant(int(5e6)))
     gradient = tape.gradient(loss, test_qhbm.trainable_variables)
     for grad in gradient:
       self.assertAllClose(grad, tf.zeros_like(grad), rtol=RTOL)
@@ -132,7 +137,8 @@ def test_loss_value_x_rot(self):
         self.assertAllClose(actual_entropy, expected_entropy, rtol=RTOL)
 
         with tf.GradientTape() as tape:
-          actual_loss = vqt_func(test_qhbm, test_num_samples, test_h, test_beta)
+          actual_loss = vqt_func(
+              test_qhbm, test_h, beta=test_beta, num_samples=test_num_samples)
         expected_loss = test_beta * expected_expectation - expected_entropy
         self.assertAllClose(actual_loss, expected_loss, rtol=RTOL)
 
@@ -146,6 +152,23 @@ def test_loss_value_x_rot(self):
             actual_thetas_grads, expected_thetas_grads, rtol=RTOL)
         self.assertAllClose(actual_phis_grads, expected_phis_grads, rtol=RTOL)
 
+  def test_modular_hamiltonian(self):
+    for num_qubits in [1, 2, 3, 4, 5]:
+      qubits = cirq.GridQubit.rect(1, num_qubits)
+      test_qhbm = test_util.get_random_qhbm(
+          qubits, 1, "VQTModHamTest{}".format(num_qubits))
+      test_qhbm_copy = test_qhbm.copy()
+      with tf.GradientTape() as tape:
+        loss = vqt.vqt(
+            test_qhbm,
+            test_qhbm_copy,
+            beta=tf.constant(1.0),
+            num_samples=tf.constant(int(5e6)))
+      gradient = tape.gradient(loss, test_qhbm.trainable_variables)
+      for grad in gradient:
+        self.assertAllClose(grad, tf.zeros_like(grad), rtol=RTOL)
+      self.assertAllClose(loss, -test_qhbm.log_partition_function(), rtol=RTOL)
+
   def test_hypernetwork(self):
     for num_qubits in [1, 2, 3, 4, 5]:
       qubits = cirq.GridQubit.rect(1, num_qubits)
@@ -181,8 +204,11 @@ def test_hypernetwork(self):
               tf.reshape(output[index:index + size], shape))
           index += size
         test_qhbm.trainable_variables = output_trainable_variables
-        loss = vqt.vqt(test_qhbm, tf.constant(int(5e6)), tf_ham,
-                       tf.constant(1.0))
+        loss = vqt.vqt(
+            test_qhbm,
+            tf_ham,
+            beta=tf.constant(1.0),
+            num_samples=tf.constant(int(5e6)))
       grads = tape.gradient(loss, [
           hypernetwork.trainable_variables, output,
           test_qhbm.trainable_variables
@@ -212,8 +238,11 @@ def test_hypernetwork(self):
               tf.reshape(output[index:index + size], shape))
           index += size
         test_qhbm.trainable_variables = output_trainable_variables
-        loss = vqt.vqt(test_qhbm, tf.constant(int(5e6)), tf_ham,
-                       tf.constant(1.0))
+        loss = vqt.vqt(
+            test_qhbm,
+            tf_ham,
+            beta=tf.constant(1.0),
+            num_samples=tf.constant(int(5e6)))
       grads = tape.gradient(loss, [c, test_qhbm.trainable_variables])
       c_grad = grads[0]
       qhbm_grads = grads[1]