Detach the tensors on batch-norm eval. (huggingface#1702)

* Detach the tensors on batch-norm eval. * Fix pyo3 bindings. * Black tweak. * Formatting. * Also update the pyo3-onnx formatting. * Apply black.
getong · Feb 13, 2024 · ad73e93 · ad73e93
1 parent 13c6722
commit ad73e93
Show file tree

Hide file tree

Showing 14 changed files with 117 additions and 27 deletions.
diff --git a/candle-core/src/backprop.rs b/candle-core/src/backprop.rs
@@ -175,7 +175,7 @@ impl Tensor {
             // the backprop graph of the backprop itself. This would be an issue for second order
             // derivatives but these are out of scope at the moment.
             let do_not_detach = CANDLE_GRAD_DO_NOT_DETACH.with(|b| *b);
-            let grad = if do_not_detach { grad } else { grad.detach()? };
+            let grad = if do_not_detach { grad } else { grad.detach() };
             if let Some(op) = node.op() {
                 match op {
                     Op::Binary(lhs, rhs, BinaryOp::Add) => {

diff --git a/candle-core/src/tensor.rs b/candle-core/src/tensor.rs
@@ -1882,9 +1882,9 @@ impl Tensor {
     /// this new node. The storage of this tensor is shared with the initial tensor.
     ///
     /// If the tensor is already detached from the computation graph, the same tensor is returned.
-    pub fn detach(&self) -> Result<Tensor> {
+    pub fn detach(&self) -> Tensor {
         if self.op.is_none() && !self.is_variable {
-            Ok(self.clone())
+            self.clone()
         } else {
             let tensor_ = Tensor_ {
                 id: TensorId::new(),
@@ -1895,7 +1895,7 @@ impl Tensor {
                 dtype: self.dtype,
                 device: self.device.clone(),
             };
-            Ok(Tensor(Arc::new(tensor_)))
+            Tensor(Arc::new(tensor_))
         }
     }
 

diff --git a/candle-core/src/variable.rs b/candle-core/src/variable.rs
@@ -107,6 +107,10 @@ impl Var {
         Ok(Self(inner))
     }
 
+    pub fn as_detached_tensor(&self) -> Tensor {
+        self.0.detach()
+    }
+
     pub fn as_tensor(&self) -> &Tensor {
         &self.0
     }

diff --git a/candle-examples/examples/reinforcement-learning/ddpg.rs b/candle-examples/examples/reinforcement-learning/ddpg.rs
@@ -411,7 +411,7 @@ impl DDPG<'_> {
     pub fn actions(&mut self, state: &Tensor) -> Result<f32> {
         let actions = self
             .actor
-            .forward(&state.detach()?.unsqueeze(0)?)?
+            .forward(&state.detach().unsqueeze(0)?)?
             .squeeze(0)?;
         let actions = if self.train {
             (actions + self.ou_noise.sample()?)?

diff --git a/candle-examples/examples/reinforcement-learning/policy_gradient.rs b/candle-examples/examples/reinforcement-learning/policy_gradient.rs
@@ -74,7 +74,7 @@ pub fn run() -> Result<()> {
         loop {
             let action = {
                 let action_probs: Vec<f32> =
-                    softmax(&model.forward(&state.detach()?.unsqueeze(0)?)?, 1)?
+                    softmax(&model.forward(&state.detach().unsqueeze(0)?)?, 1)?
                         .squeeze(0)?
                         .to_vec1()?;
                 weighted_sample(action_probs, &mut rng)? as i64
@@ -109,7 +109,7 @@ pub fn run() -> Result<()> {
 
         let rewards = Tensor::from_vec(accumulate_rewards(&steps), batch_size, &Device::Cpu)?
             .to_dtype(DType::F32)?
-            .detach()?;
+            .detach();
 
         let actions_mask = {
             let actions: Vec<i64> = steps.iter().map(|s| s.action).collect();
@@ -126,12 +126,12 @@ pub fn run() -> Result<()> {
                         .unwrap()
                 })
                 .collect();
-            Tensor::stack(&actions_mask, 0)?.detach()?
+            Tensor::stack(&actions_mask, 0)?.detach()
         };
 
         let states = {
             let states: Vec<Tensor> = steps.into_iter().map(|s| s.state).collect();
-            Tensor::stack(&states, 0)?.detach()?
+            Tensor::stack(&states, 0)?.detach()
         };
 
         let log_probs = actions_mask

diff --git a/candle-nn/src/batch_norm.rs b/candle-nn/src/batch_norm.rs
@@ -262,9 +262,19 @@ impl BatchNorm {
         let target_shape = target_shape.as_slice();
 
         let x = x
-            .broadcast_sub(&self.running_mean.as_tensor().reshape(target_shape)?)?
+            .broadcast_sub(
+                &self
+                    .running_mean
+                    .as_detached_tensor()
+                    .reshape(target_shape)?,
+            )?
             .broadcast_div(
-                &(self.running_var.as_tensor().reshape(target_shape)? + self.eps)?.sqrt()?,
+                &(self
+                    .running_var
+                    .as_detached_tensor()
+                    .reshape(target_shape)?
+                    + self.eps)?
+                    .sqrt()?,
             )?;
 
         match &self.weight_and_bias {