From cc3ff47b5118ff38e3e90db8561eb4dde13062dc Mon Sep 17 00:00:00 2001 From: Alexander Morozov Date: Sun, 24 Apr 2016 00:35:44 +0300 Subject: [PATCH] refactor/sync: convert to the new memory management API Refactor code CUDA and Native backend to match #autumnai/collenchyma/62 that provides enchanced memory management and syncronization. Since memory management is now automatic, `*_plain` variants of functions are removed. BREAKING CHANGE: *_plain versions of API functions are removed, arguments of their counterpart functions may have changed in mutablity. REFERENCE: autumnai/collenchyma#37, autumnai/collenchyma#62 --- src/frameworks/cuda/helper.rs | 1188 +++++++++---------------------- src/frameworks/cuda/mod.rs | 10 +- src/frameworks/native/helper.rs | 512 +++++-------- src/frameworks/native/mod.rs | 32 + src/plugin.rs | 416 +++-------- 5 files changed, 642 insertions(+), 1516 deletions(-) diff --git a/src/frameworks/cuda/helper.rs b/src/frameworks/cuda/helper.rs index a4416a8..f4190c1 100644 --- a/src/frameworks/cuda/helper.rs +++ b/src/frameworks/cuda/helper.rs @@ -1,27 +1,49 @@ //! Provides useful macros for easier NN implementation for CUDA/cuDNN. -/// Returns cuDNN ready memory pointer from a SharedTensor. -pub unsafe fn receive_memory_ptr(x: &::co::tensor::SharedTensor, device: &::co::device::DeviceType) -> Result<*const ::libc::c_void, ::co::plugin::Error> { - Ok(::std::mem::transmute::( - *try!( - try!( - x.get(device).ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to resolve memory.")) - ).as_cuda().ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to receive CUDA memory.")) - ).id_c() - )) +macro_rules! read { + ($x:ident, $slf:ident) => ( + try!($x.read($slf.device())).as_cuda() + .expect("Broken invariant: not a CUDA memory") + ) +} + +macro_rules! read_write { + ($x:ident, $slf:ident) => ( + try!($x.read_write($slf.device())).as_cuda() + .expect("Broken invariant: not a CUDA memory") + ) +} + +macro_rules! write_only { + ($x:ident, $slf:ident) => ( + try!($x.write_only($slf.device())).as_cuda() + .expect("Broken invariant: not a CUDA memory") + ) +} + +// trans! cannot be inlined into macros above, because `$mem` would become +// intermidiate variable and `*mut $t` will outlive it. +macro_rules! trans { + ($mem:ident) => ( + unsafe { ::std::mem::transmute::(*$mem.id_c()) } + ) } -/// Returns mutable cuDNN ready memory pointer from a SharedTensor. -pub unsafe fn receive_memory_ptr_mut(x: &mut ::co::tensor::SharedTensor, device: &::co::device::DeviceType) -> Result<*mut ::libc::c_void, ::co::plugin::Error> { - Ok(::std::mem::transmute::( - *try!( - try!( - x.get_mut(device).ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to resolve memory.")) - ).as_mut_cuda().ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to receive CUDA memory.")) - ).id_c() - )) +macro_rules! trans_mut { + ($mem:ident) => ( + unsafe { ::std::mem::transmute::(*$mem.id_c()) } + ) +} + +macro_rules! exec { + ($name:ident, $f:expr) => ({ + let res = $f; + res.map_err(|_| PluginError::Operation( + stringify!(Unable to execute CUDA cuDNN $name)).into()) + }) } + #[macro_export] macro_rules! impl_oconf_for_cc(($($t: ident), +) => ( $( @@ -43,673 +65,241 @@ macro_rules! impl_oconf_for_pooling(($($t: ident), +) => ( )+ )); -#[macro_export] -macro_rules! impl_ops_sigmoid_for { - ($t:ident, $b:ty) => ( - impl ::plugin::Sigmoid<$t> for $b { - fn sigmoid( - &self, - x: &mut ::co::tensor::SharedTensor<$t>, - result: &mut ::co::tensor::SharedTensor<$t> - ) -> Result<(), ::co::error::Error> { - match x.add_device(self.device()) { _ => try!(x.sync(self.device())) } - match result.add_device(self.device()) { _ => () } - - self.sigmoid_plain(x, result) - } - - fn sigmoid_plain( - &self, - x: &::co::tensor::SharedTensor<$t>, - result: &mut ::co::tensor::SharedTensor<$t> - ) -> Result<(), ::co::error::Error> { - let scal_params: ::cudnn::utils::ScalParams<$t> = ::cudnn::utils::ScalParams::default(); - - Ok(try!(match CUDNN.sigmoid_forward( - &try!(x.cudnn_tensor_desc_flat()), // src_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr(x, self.device()) }), //src_data - &try!(result.cudnn_tensor_desc_flat()), // dest_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr_mut(result, self.device()) }), // dest_data - scal_params - ) { - Ok(_) => Ok(()), - Err(_) => { - Err(::co::plugin::Error::Operation("Unable to execute CUDA cuDNN Activation Sigmoid Forward.")) - } - })) - } - - fn sigmoid_grad( - &self, - x: &mut ::co::tensor::SharedTensor<$t>, - x_diff: &mut ::co::tensor::SharedTensor<$t>, - result: &mut ::co::tensor::SharedTensor<$t>, - result_diff: &mut ::co::tensor::SharedTensor<$t> - ) -> Result<(), ::co::error::Error> { - match x.add_device(self.device()) { _ => try!(x.sync(self.device())) } - match x_diff.add_device(self.device()) { _ => try!(x.sync(self.device())) } - match result.add_device(self.device()) { _ => try!(x.sync(self.device())) } - match result_diff.add_device(self.device()) { _ => () } - - self.sigmoid_grad_plain(x, x_diff, result, result_diff) - } - - fn sigmoid_grad_plain( - &self, - x: &::co::tensor::SharedTensor<$t>, - x_diff: &::co::tensor::SharedTensor<$t>, - result: &::co::tensor::SharedTensor<$t>, - result_diff: &mut ::co::tensor::SharedTensor<$t> - ) -> Result<(), ::co::error::Error> { - let scal_params: ::cudnn::utils::ScalParams<$t> = ::cudnn::utils::ScalParams::default(); - - Ok(try!(match CUDNN.sigmoid_backward( - &try!(x.cudnn_tensor_desc_flat()), // src_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr(x, self.device()) }), //src_data - &try!(x_diff.cudnn_tensor_desc_flat()), // src_diff_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr(x_diff, self.device()) }), //src_diff_data - &try!(result.cudnn_tensor_desc_flat()), // dest_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr(result, self.device()) }), // dest_data - &try!(result_diff.cudnn_tensor_desc_flat()), // dest_diff_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr_mut(result_diff, self.device()) }), // dest_diff_data - scal_params - ) { - Ok(_) => Ok(()), - Err(_) => { - Err(::co::plugin::Error::Operation("Unable to execute CUDA cuDNN Activation Sigmoid Backward.")) - } - })) - } - } - ) -} +// Implementation of Sigmoid, Relu, Tanh is mostly the same, excluding +// trait and function names. And it's quite big, so I think not repeating +// it here 3 times is worth another level of indirection. +// Since concat_idents!() is not stable, this macro has a lot of arguments. #[macro_export] -macro_rules! impl_ops_sigmoid_pointwise_for { - ($t:ident, $b:ty) => ( - impl ::plugin::SigmoidPointwise<$t> for $b { - fn sigmoid_pointwise( - &self, - x: &mut ::co::tensor::SharedTensor<$t> - ) -> Result<(), ::co::error::Error> { - match x.add_device(self.device()) { _ => try!(x.sync(self.device())) } - - self.sigmoid_pointwise_plain(x) - } - - fn sigmoid_pointwise_plain( - &self, - x: &mut ::co::tensor::SharedTensor<$t> - ) -> Result<(), ::co::error::Error> { - let scal_params: ::cudnn::utils::ScalParams<$t> = ::cudnn::utils::ScalParams::default(); - - Ok(try!(match CUDNN.sigmoid_forward( - &try!(x.cudnn_tensor_desc_flat()), // src_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr(x, self.device()) }), //src_data - &try!(x.cudnn_tensor_desc_flat()), // dest_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr_mut(x, self.device()) }), // dest_data - scal_params - ) { - Ok(_) => Ok(()), - Err(_) => { - Err(::co::plugin::Error::Operation("Unable to execute CUDA cuDNN Sigmoid Pointwise forward.")) - } - })) - } - - fn sigmoid_pointwise_grad( - &self, - x: &mut ::co::tensor::SharedTensor<$t>, - x_diff: &mut ::co::tensor::SharedTensor<$t>, - ) -> Result<(), ::co::error::Error> { - match x.add_device(self.device()) { _ => try!(x.sync(self.device())) } - match x_diff.add_device(self.device()) { _ => try!(x.sync(self.device())) } - - self.sigmoid_pointwise_grad_plain(x, x_diff) +macro_rules! impl_activation_ops { + ($t:ty, $b:ty, + $plugin_name:ident, $plugin_pointwise_name:ident, + $fwd_cuda:ident, $bkw_cuda:ident, + $fwd_name:ident, $bkw_name:ident, + $fwd_pointwise_name:ident, $bkw_pointwise_name:ident) => ( + + impl ::plugin::$plugin_name<$t> for $b { + fn $fwd_name(&self, x: &SharedTensor<$t>, result: &mut SharedTensor<$t>) + -> Result<(), CoError> { + let r_desc = try!(result.cudnn_tensor_desc_flat()); + let x_mem = read!(x, self); + let r_mem = write_only!(result, self); + + exec!($fwd_name, CUDNN.$fwd_cuda( + &try!(x.cudnn_tensor_desc_flat()), + trans!(x_mem), + &r_desc, + trans_mut!(r_mem), + ScalParams::<$t>::default())) + + } + + fn $bkw_name(&self, + x: &SharedTensor<$t>, + x_diff: &SharedTensor<$t>, + result: &SharedTensor<$t>, + result_diff: &mut SharedTensor<$t>) + -> Result<(), CoError> { + let dr_desc = try!(result_diff.cudnn_tensor_desc_flat()); + let x_mem = read!(x, self); + let dx_mem = read!(x_diff, self); + let r_mem = read!(result, self); + let dr_mem = write_only!(result_diff, self); + + exec!($bkw_name, CUDNN.$bkw_cuda( + &try!(x.cudnn_tensor_desc_flat()), + trans!(x_mem), + &try!(x_diff.cudnn_tensor_desc_flat()), + trans!(dx_mem), + &try!(result.cudnn_tensor_desc_flat()), + trans!(r_mem), + &dr_desc, + trans_mut!(dr_mem), + ScalParams::<$t>::default())) } + } - fn sigmoid_pointwise_grad_plain( - &self, - x: &::co::tensor::SharedTensor<$t>, - x_diff: &mut ::co::tensor::SharedTensor<$t>, - ) -> Result<(), ::co::error::Error> { - let scal_params: ::cudnn::utils::ScalParams<$t> = ::cudnn::utils::ScalParams::default(); - - Ok(try!(match CUDNN.sigmoid_backward( - &try!(x.cudnn_tensor_desc_flat()), // src_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr(x, self.device()) }), //src_data - &try!(x_diff.cudnn_tensor_desc_flat()), // src_diff_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr(x_diff, self.device()) }), //src_diff_data - &try!(x.cudnn_tensor_desc_flat()), // dest_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr(x, self.device()) }), // dest_data - &try!(x_diff.cudnn_tensor_desc_flat()), // dest_diff_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr_mut(x_diff, self.device()) }), // dest_diff_data - scal_params - ) { - Ok(_) => Ok(()), - Err(_) => { - Err(::co::plugin::Error::Operation("Unable to execute CUDA cuDNN Sigmoid Pointwise backward.")) - } - })) + impl ::plugin::$plugin_pointwise_name<$t> for $b { + fn $fwd_pointwise_name(&self, x: &mut SharedTensor<$t>) + -> Result<(), CoError> { + let x_desc = try!(x.cudnn_tensor_desc_flat()); + let x_mem = read_write!(x, self); + exec!($fwd_pointwise_name, CUDNN.$fwd_cuda( + &x_desc, + trans!(x_mem), + &x_desc, + trans_mut!(x_mem), + ScalParams::<$t>::default())) + } + + fn $bkw_pointwise_name(&self, x: &SharedTensor<$t>, + x_diff: &mut SharedTensor<$t>) + -> Result<(), CoError> { + let x_desc = try!(x.cudnn_tensor_desc_flat()); + let dx_desc = try!(x_diff.cudnn_tensor_desc_flat()); + let x_mem = read!(x, self); + let dx_mem = read_write!(x_diff, self); + exec!($bkw_pointwise_name, CUDNN.$bkw_cuda( + &x_desc, trans!(x_mem), + &dx_desc, trans!(dx_mem), + &x_desc, trans!(x_mem), + &dx_desc, trans_mut!(dx_mem), + ScalParams::<$t>::default())) } } ) } -#[macro_export] -macro_rules! impl_ops_relu_for { - ($t:ident, $b:ty) => ( - impl ::plugin::Relu<$t> for $b { - fn relu( - &self, - x: &mut ::co::tensor::SharedTensor<$t>, - result: &mut ::co::tensor::SharedTensor<$t> - ) -> Result<(), ::co::error::Error> { - match x.add_device(self.device()) { _ => try!(x.sync(self.device())) } - match result.add_device(self.device()) { _ => () } - - self.relu_plain(x, result) - } - - fn relu_plain( - &self, - x: &::co::tensor::SharedTensor<$t>, - result: &mut ::co::tensor::SharedTensor<$t> - ) -> Result<(), ::co::error::Error> { - let scal_params: ::cudnn::utils::ScalParams<$t> = ::cudnn::utils::ScalParams::default(); - - Ok(try!(match CUDNN.relu_forward( - &try!(x.cudnn_tensor_desc_flat()), // src_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr(x, self.device()) }), //src_data - &try!(result.cudnn_tensor_desc_flat()), // dest_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr_mut(result, self.device()) }), // dest_data - scal_params - ) { - Ok(_) => Ok(()), - Err(_) => { - Err(::co::plugin::Error::Operation("Unable to execute CUDA cuDNN Activation relu Forward.")) - } - })) - } - - fn relu_grad( - &self, - x: &mut ::co::tensor::SharedTensor<$t>, - x_diff: &mut ::co::tensor::SharedTensor<$t>, - result: &mut ::co::tensor::SharedTensor<$t>, - result_diff: &mut ::co::tensor::SharedTensor<$t> - ) -> Result<(), ::co::error::Error> { - match x.add_device(self.device()) { _ => try!(x.sync(self.device())) } - match x_diff.add_device(self.device()) { _ => try!(x.sync(self.device())) } - match result.add_device(self.device()) { _ => try!(x.sync(self.device())) } - match result_diff.add_device(self.device()) { _ => () } - - self.relu_grad_plain(x, x_diff, result, result_diff) - } - - fn relu_grad_plain( - &self, - x: &::co::tensor::SharedTensor<$t>, - x_diff: &::co::tensor::SharedTensor<$t>, - result: &::co::tensor::SharedTensor<$t>, - result_diff: &mut ::co::tensor::SharedTensor<$t> - ) -> Result<(), ::co::error::Error> { - let scal_params: ::cudnn::utils::ScalParams<$t> = ::cudnn::utils::ScalParams::default(); - - Ok(try!(match CUDNN.relu_backward( - &try!(x.cudnn_tensor_desc_flat()), // src_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr(x, self.device()) }), //src_data - &try!(x_diff.cudnn_tensor_desc_flat()), // src_diff_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr(x_diff, self.device()) }), //src_diff_data - &try!(result.cudnn_tensor_desc_flat()), // dest_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr(result, self.device()) }), // dest_data - &try!(result_diff.cudnn_tensor_desc_flat()), // dest_diff_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr_mut(result_diff, self.device()) }), // dest_diff_data - scal_params - ) { - Ok(_) => Ok(()), - Err(_) => { - Err(::co::plugin::Error::Operation("Unable to execute CUDA cuDNN Activation relu Backward.")) - } - })) - } - } +macro_rules! impl_ops_sigmoid_for { + ($t:ty, $b:ty) => ( + impl_activation_ops!( + $t, $b, + Sigmoid, SigmoidPointwise, + sigmoid_forward, sigmoid_backward, + sigmoid, sigmoid_grad, + sigmoid_pointwise, sigmoid_pointwise_grad); ) } -#[macro_export] -macro_rules! impl_ops_relu_pointwise_for { - ($t:ident, $b:ty) => ( - impl ::plugin::ReluPointwise<$t> for $b { - fn relu_pointwise( - &self, - x: &mut ::co::tensor::SharedTensor<$t>, - ) -> Result<(), ::co::error::Error> { - match x.add_device(self.device()) { _ => try!(x.sync(self.device())) } - - self.relu_pointwise_plain(x) - } - - fn relu_pointwise_plain( - &self, - x: &mut ::co::tensor::SharedTensor<$t>, - ) -> Result<(), ::co::error::Error> { - let scal_params: ::cudnn::utils::ScalParams<$t> = ::cudnn::utils::ScalParams::default(); - - Ok(try!(match CUDNN.relu_forward( - &try!(x.cudnn_tensor_desc_flat()), // src_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr(x, self.device()) }), //src_data - &try!(x.cudnn_tensor_desc_flat()), // dest_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr_mut(x, self.device()) }), // dest_data - scal_params - ) { - Ok(_) => Ok(()), - Err(_) => { - Err(::co::plugin::Error::Operation("Unable to execute CUDA cuDNN ReLU Pointwise forward.")) - } - })) - } - - fn relu_pointwise_grad( - &self, - x: &mut ::co::tensor::SharedTensor<$t>, - x_diff: &mut ::co::tensor::SharedTensor<$t> - ) -> Result<(), ::co::error::Error> { - match x.add_device(self.device()) { _ => try!(x.sync(self.device())) } - match x_diff.add_device(self.device()) { _ => try!(x.sync(self.device())) } - - self.relu_pointwise_grad_plain(x, x_diff) - } - - fn relu_pointwise_grad_plain( - &self, - x: &::co::tensor::SharedTensor<$t>, - x_diff: &mut ::co::tensor::SharedTensor<$t> - ) -> Result<(), ::co::error::Error> { - let scal_params: ::cudnn::utils::ScalParams<$t> = ::cudnn::utils::ScalParams::default(); - - Ok(try!(match CUDNN.relu_backward( - &try!(x.cudnn_tensor_desc_flat()), // src_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr(x, self.device()) }), //src_data - &try!(x_diff.cudnn_tensor_desc_flat()), // src_diff_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr(x_diff, self.device()) }), //src_diff_data - &try!(x.cudnn_tensor_desc_flat()), // dest_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr(x, self.device()) }), // dest_data - &try!(x_diff.cudnn_tensor_desc_flat()), // dest_diff_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr_mut(x_diff, self.device()) }), // dest_diff_data - scal_params - ) { - Ok(_) => Ok(()), - Err(_) => { - Err(::co::plugin::Error::Operation("Unable to execute CUDA cuDNN ReLU Pointwise backward.")) - } - })) - } - } +macro_rules! impl_ops_relu_for { + ($t:ty, $b:ty) => ( + impl_activation_ops!( + $t, $b, + Relu, ReluPointwise, + relu_forward, relu_backward, + relu, relu_grad, + relu_pointwise, relu_pointwise_grad); ) } -#[macro_export] macro_rules! impl_ops_tanh_for { - ($t:ident, $b:ty) => ( - impl ::plugin::Tanh<$t> for $b { - fn tanh( - &self, - x: &mut ::co::tensor::SharedTensor<$t>, - result: &mut ::co::tensor::SharedTensor<$t> - ) -> Result<(), ::co::error::Error> { - match x.add_device(self.device()) { _ => try!(x.sync(self.device())) } - match result.add_device(self.device()) { _ => () } - - self.tanh_plain(x, result) - } - - fn tanh_plain( - &self, - x: &::co::tensor::SharedTensor<$t>, - result: &mut ::co::tensor::SharedTensor<$t> - ) -> Result<(), ::co::error::Error> { - let scal_params: ::cudnn::utils::ScalParams<$t> = ::cudnn::utils::ScalParams::default(); - - Ok(try!(match CUDNN.tanh_forward( - &try!(x.cudnn_tensor_desc_flat()), // src_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr(x, self.device()) }), //src_data - &try!(result.cudnn_tensor_desc_flat()), // dest_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr_mut(result, self.device()) }), // dest_data - scal_params - ) { - Ok(_) => Ok(()), - Err(_) => { - Err(::co::plugin::Error::Operation("Unable to execute CUDA cuDNN Activation tanh Forward.")) - } - })) - } - - fn tanh_grad( - &self, - x: &mut ::co::tensor::SharedTensor<$t>, - x_diff: &mut ::co::tensor::SharedTensor<$t>, - result: &mut ::co::tensor::SharedTensor<$t>, - result_diff: &mut ::co::tensor::SharedTensor<$t> - ) -> Result<(), ::co::error::Error> { - match x.add_device(self.device()) { _ => try!(x.sync(self.device())) } - match x_diff.add_device(self.device()) { _ => try!(x.sync(self.device())) } - match result.add_device(self.device()) { _ => try!(x.sync(self.device())) } - match result_diff.add_device(self.device()) { _ => () } - - self.tanh_grad_plain(x, x_diff, result, result_diff) - } - - fn tanh_grad_plain( - &self, - x: &::co::tensor::SharedTensor<$t>, - x_diff: &::co::tensor::SharedTensor<$t>, - result: &::co::tensor::SharedTensor<$t>, - result_diff: &mut ::co::tensor::SharedTensor<$t> - ) -> Result<(), ::co::error::Error> { - let scal_params: ::cudnn::utils::ScalParams<$t> = ::cudnn::utils::ScalParams::default(); - - Ok(try!(match CUDNN.tanh_backward( - &try!(x.cudnn_tensor_desc_flat()), // src_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr(x, self.device()) }), //src_data - &try!(x_diff.cudnn_tensor_desc_flat()), // src_diff_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr(x_diff, self.device()) }), //src_diff_data - &try!(result.cudnn_tensor_desc_flat()), // dest_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr(result, self.device()) }), // dest_data - &try!(result_diff.cudnn_tensor_desc_flat()), // dest_diff_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr_mut(result_diff, self.device()) }), // dest_diff_data - scal_params - ) { - Ok(_) => Ok(()), - Err(_) => { - Err(::co::plugin::Error::Operation("Unable to execute CUDA cuDNN Activation tanh Backward.")) - } - })) - } - } + ($t:ty, $b:ty) => ( + impl_activation_ops!( + $t, $b, + Tanh, TanhPointwise, + tanh_forward, tanh_backward, + tanh, tanh_grad, + tanh_pointwise, tanh_pointwise_grad); ) } -#[macro_export] -macro_rules! impl_ops_tanh_pointwise_for { - ($t:ident, $b:ty) => ( - impl ::plugin::TanhPointwise<$t> for $b { - fn tanh_pointwise( - &self, - x: &mut ::co::tensor::SharedTensor<$t> - ) -> Result<(), ::co::error::Error> { - match x.add_device(self.device()) { _ => try!(x.sync(self.device())) } - - self.tanh_pointwise_plain(x) - } - - fn tanh_pointwise_plain( - &self, - x: &mut ::co::tensor::SharedTensor<$t> - ) -> Result<(), ::co::error::Error> { - let scal_params: ::cudnn::utils::ScalParams<$t> = ::cudnn::utils::ScalParams::default(); - - Ok(try!(match CUDNN.tanh_forward( - &try!(x.cudnn_tensor_desc_flat()), // src_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr(x, self.device()) }), //src_data - &try!(x.cudnn_tensor_desc_flat()), // dest_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr_mut(x, self.device()) }), // dest_data - scal_params - ) { - Ok(_) => Ok(()), - Err(_) => { - Err(::co::plugin::Error::Operation("Unable to execute CUDA cuDNN Tanh Pointwise forward.")) - } - })) - } - - fn tanh_pointwise_grad( - &self, - x: &mut ::co::tensor::SharedTensor<$t>, - x_diff: &mut ::co::tensor::SharedTensor<$t> - ) -> Result<(), ::co::error::Error> { - match x.add_device(self.device()) { _ => try!(x.sync(self.device())) } - match x_diff.add_device(self.device()) { _ => try!(x.sync(self.device())) } - - self.tanh_pointwise_grad_plain(x, x_diff) - } - fn tanh_pointwise_grad_plain( - &self, - x: &::co::tensor::SharedTensor<$t>, - x_diff: &mut ::co::tensor::SharedTensor<$t> - ) -> Result<(), ::co::error::Error> { - let scal_params: ::cudnn::utils::ScalParams<$t> = ::cudnn::utils::ScalParams::default(); - - Ok(try!(match CUDNN.tanh_backward( - &try!(x.cudnn_tensor_desc_flat()), // src_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr(x, self.device()) }), //src_data - &try!(x_diff.cudnn_tensor_desc_flat()), // src_diff_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr(x_diff, self.device()) }), //src_diff_data - &try!(x.cudnn_tensor_desc_flat()), // dest_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr(x, self.device()) }), // dest_data - &try!(x_diff.cudnn_tensor_desc_flat()), // dest_diff_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr_mut(x_diff, self.device()) }), // dest_diff_data - scal_params - ) { - Ok(_) => Ok(()), - Err(_) => { - Err(::co::plugin::Error::Operation("Unable to execute CUDA cuDNN Tanh Pointwise backward.")) - } - })) - } - } - ) -} #[macro_export] macro_rules! impl_ops_convolution_for { ($t:ty, $b:ty) => ( fn convolution( &self, - filter: &mut ::co::tensor::SharedTensor<$t>, - x: &mut ::co::tensor::SharedTensor<$t>, - result: &mut ::co::tensor::SharedTensor<$t>, - workspace: &mut ::co::tensor::SharedTensor, - config: &Self::CC //::frameworks::cuda::CC - ) -> Result<(), ::co::error::Error> { - match x.add_device(self.device()) { _ => try!(x.sync(self.device())) } - match result.add_device(self.device()) { _ => () } - match workspace.add_device(self.device()) { _ => () } - - self.convolution_plain(filter, x, result, workspace, config) - } - - fn convolution_plain( - &self, - filter: &::co::tensor::SharedTensor<$t>, - x: &::co::tensor::SharedTensor<$t>, - result: &mut ::co::tensor::SharedTensor<$t>, - workspace: &mut ::co::tensor::SharedTensor, - config: &Self::CC - ) -> Result<(), ::co::error::Error> { - let scal_params: ::cudnn::utils::ScalParams<$t> = ::cudnn::utils::ScalParams::default(); - - Ok(try!(match CUDNN.convolution_forward( + filter: &SharedTensor<$t>, + x: &SharedTensor<$t>, + result: &mut SharedTensor<$t>, + workspace: &mut SharedTensor, + config: &Self::CC) -> Result<(), CoError> { + + let r_desc = try!(result.cudnn_tensor_desc()); + let f_mem = read!(filter, self); + let x_mem = read!(x, self); + let r_mem = write_only!(result, self); + let w_mem = write_only!(workspace, self); + + exec!(convolution, CUDNN.convolution_forward::<$t>( config, - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr_mut(workspace, self.device()) }), - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr(filter, self.device()) }), + trans_mut!(w_mem), + trans!(f_mem), &try!(x.cudnn_tensor_desc()), // src_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr(x, self.device()) }), //src_data - &try!(result.cudnn_tensor_desc()), // dest_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr_mut(result, self.device()) }), // dest_data - scal_params - ) { - Ok(_) => Ok(()), - Err(_) => { - Err(::co::plugin::Error::Operation("Unable to execute CUDA cuDNN Activation convolution Forward.")) - } - })) + trans!(x_mem), + &r_desc, + trans_mut!(r_mem), + ScalParams::default())) } #[allow(unused_variables)] fn convolution_grad_filter( &self, - src_data: &mut ::co::tensor::SharedTensor<$t>, - dest_diff: &mut ::co::tensor::SharedTensor<$t>, - filter_diff: &mut ::co::tensor::SharedTensor<$t>, - workspace: &mut ::co::tensor::SharedTensor, - config: &Self::CC - ) -> Result<(), ::co::error::Error> { - match src_data.add_device(self.device()) { _ => try!(src_data.sync(self.device())) } - match dest_diff.add_device(self.device()) { _ => try!(dest_diff.sync(self.device())) } - match filter_diff.add_device(self.device()) { _ => try!(filter_diff.sync(self.device())) } - match workspace.add_device(self.device()) { _ => () } - - self.convolution_grad_filter_plain(src_data, dest_diff, filter_diff, workspace, config) - } - - #[allow(unused_variables)] - fn convolution_grad_filter_plain( - &self, - src_data: &::co::tensor::SharedTensor<$t>, - dest_diff: &::co::tensor::SharedTensor<$t>, - filter_diff: &mut ::co::tensor::SharedTensor<$t>, - workspace: &mut ::co::tensor::SharedTensor, - config: &Self::CC - ) -> Result<(), ::co::error::Error> { - let scal_params: ::cudnn::utils::ScalParams<$t> = ::cudnn::utils::ScalParams::default(); - - Ok(try!(match CUDNN.convolution_backward_filter( + src_data: &SharedTensor<$t>, + dest_diff: &SharedTensor<$t>, + filter_diff: &mut SharedTensor<$t>, + workspace: &mut SharedTensor, + config: &Self::CC) -> Result<(), CoError> { + + let s_mem = read!(src_data, self); + let dd_mem = read!(dest_diff, self); + let df_mem = write_only!(filter_diff, self); + let w_mem = write_only!(workspace, self); + exec!(convolution_grad_filter, CUDNN.convolution_backward_filter( config, - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr_mut(workspace, self.device()) }), + trans_mut!(w_mem), &try!(src_data.cudnn_tensor_desc()), - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr(src_data, self.device()) }), + trans!(s_mem), &try!(dest_diff.cudnn_tensor_desc()), - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr(dest_diff, self.device()) }), - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr_mut(filter_diff, self.device()) }), - scal_params - ) { - Ok(_) => Ok(()), - Err(_) => { - Err(::co::plugin::Error::Operation("Unable to execute CUDA cuDNN Activation convolution Backward.")) - } - })) + trans!(dd_mem), + trans_mut!(df_mem), + ScalParams::<$t>::default())) } #[allow(unused_variables)] fn convolution_grad_data( &self, - filter: &mut ::co::tensor::SharedTensor<$t>, - x_diff: &mut ::co::tensor::SharedTensor<$t>, - result_diff: &mut ::co::tensor::SharedTensor<$t>, - workspace: &mut ::co::tensor::SharedTensor, - config: &Self::CC - ) -> Result<(), ::co::error::Error> { - match filter.add_device(self.device()) { _ => try!(filter.sync(self.device())) } - match x_diff.add_device(self.device()) { _ => try!(x_diff.sync(self.device())) } - match result_diff.add_device(self.device()) { _ => try!(result_diff.sync(self.device())) } - match workspace.add_device(self.device()) { _ => () } - - self.convolution_grad_data_plain(filter, x_diff, result_diff, workspace, config) - } - - #[allow(unused_variables)] - fn convolution_grad_data_plain( - &self, - filter: &::co::tensor::SharedTensor<$t>, - x_diff: &::co::tensor::SharedTensor<$t>, - result_diff: &mut ::co::tensor::SharedTensor<$t>, - workspace: &mut ::co::tensor::SharedTensor, - config: &Self::CC - ) -> Result<(), ::co::error::Error> { - let scal_params: ::cudnn::utils::ScalParams<$t> = ::cudnn::utils::ScalParams::default(); - - Ok(try!(match CUDNN.convolution_backward_data( + filter: &SharedTensor<$t>, + x_diff: &SharedTensor<$t>, + result_diff: &mut SharedTensor<$t>, + workspace: &mut SharedTensor, + config: &Self::CC) -> Result<(), CoError> { + + let dr_desc = try!(result_diff.cudnn_tensor_desc_flat()); + let f_mem = read!(filter, self); + let dx_mem = read!(x_diff, self); + let dr_mem = write_only!(result_diff, self); + let w_mem = write_only!(workspace, self); + exec!(convolution_grad_data, CUDNN.convolution_backward_data( config, - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr_mut(workspace, self.device()) }), - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr(filter, self.device()) }), + trans_mut!(w_mem), + trans!(f_mem), &try!(x_diff.cudnn_tensor_desc()), - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr(x_diff, self.device()) }), - &try!(result_diff.cudnn_tensor_desc()), - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr_mut(result_diff, self.device()) }), - scal_params - ) { - Ok(_) => Ok(()), - Err(_) => { - Err(::co::plugin::Error::Operation("Unable to execute CUDA cuDNN Activation convolution Backward.")) - } - })) + trans!(dx_mem), + &dr_desc, + trans_mut!(dr_mem), + ScalParams::<$t>::default())) } ) } #[macro_export] macro_rules! impl_ops_softmax_for { - ($t:ident, $b:ty) => ( + ($t:ty, $b:ty) => ( impl ::plugin::Softmax<$t> for $b { - fn softmax( - &self, - x: &mut ::co::tensor::SharedTensor<$t>, - result: &mut ::co::tensor::SharedTensor<$t> - ) -> Result<(), ::co::error::Error> { - match x.add_device(self.device()) { _ => try!(x.sync(self.device())) } - match result.add_device(self.device()) { _ => () } - - self.softmax_plain(x, result) - } - - fn softmax_plain( - &self, - x: &::co::tensor::SharedTensor<$t>, - result: &mut ::co::tensor::SharedTensor<$t> - ) -> Result<(), ::co::error::Error> { - let scal_params: ::cudnn::utils::ScalParams<$t> = ::cudnn::utils::ScalParams::default(); - - Ok(try!(match CUDNN.softmax_forward( - &try!(x.cudnn_tensor_desc_softmax()), // src_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr(x, self.device()) }), //src_data - &try!(result.cudnn_tensor_desc_softmax()), // dest_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr_mut(result, self.device()) }), // dest_data - scal_params - ) { - Ok(_) => Ok(()), - Err(_) => { - Err(::co::plugin::Error::Operation("Unable to execute CUDA cuDNN softmax Forward.")) - } - })) + fn softmax(&self, x: &SharedTensor<$t>, result: &mut SharedTensor<$t>) + -> Result<(), CoError> { + let r_desc = try!(result.cudnn_tensor_desc_flat()); + let x_mem = read!(x, self); + let r_mem = write_only!(result, self); + exec!(softmax, CUDNN.softmax_forward( + &try!(x.cudnn_tensor_desc_softmax()), + trans!(x_mem), + &r_desc, + trans_mut!(r_mem), + ScalParams::<$t>::default())) } fn softmax_grad( &self, - x: &mut ::co::tensor::SharedTensor<$t>, - x_diff: &mut ::co::tensor::SharedTensor<$t>, - result_diff: &mut ::co::tensor::SharedTensor<$t> - ) -> Result<(), ::co::error::Error> { - match x.add_device(self.device()) { _ => try!(x.sync(self.device())) } - match x_diff.add_device(self.device()) { _ => try!(x.sync(self.device())) } - match result_diff.add_device(self.device()) { _ => () } - - self.softmax_grad_plain(x, x_diff, result_diff) - } + x: &SharedTensor<$t>, + x_diff: &SharedTensor<$t>, + result_diff: &mut SharedTensor<$t>) -> Result<(), CoError> { - fn softmax_grad_plain( - &self, - x: &::co::tensor::SharedTensor<$t>, - x_diff: &::co::tensor::SharedTensor<$t>, - result_diff: &mut ::co::tensor::SharedTensor<$t> - ) -> Result<(), ::co::error::Error> { - let scal_params: ::cudnn::utils::ScalParams<$t> = ::cudnn::utils::ScalParams::default(); - - Ok(try!(match CUDNN.softmax_backward( - &try!(x.cudnn_tensor_desc_softmax()), // src_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr(x, self.device()) }), //src_data - &try!(x_diff.cudnn_tensor_desc_softmax()), // src_diff_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr(x_diff, self.device()) }), //src_diff_data - &try!(result_diff.cudnn_tensor_desc_softmax()), // dest_diff_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr_mut(result_diff, self.device()) }), // dest_diff_data - scal_params - ) { - Ok(_) => Ok(()), - Err(_) => { - Err(::co::plugin::Error::Operation("Unable to execute CUDA cuDNN softmax Backward.")) - } - })) + let dr_desc = try!(result_diff.cudnn_tensor_desc_flat()); + let x_mem = read!(x, self); + let dx_mem = read!(x_diff, self); + let dr_mem = write_only!(result_diff, self); + + exec!(softmax_backward, CUDNN.softmax_backward( + &try!(x.cudnn_tensor_desc_softmax()), + trans!(x_mem), + &try!(x_diff.cudnn_tensor_desc_softmax()), + trans!(dx_mem), + &dr_desc, + trans_mut!(dr_mem), + ScalParams::<$t>::default())) } } ) @@ -717,75 +307,40 @@ macro_rules! impl_ops_softmax_for { #[macro_export] macro_rules! impl_ops_log_softmax_for { - ($t:ident, $b:ty) => ( + ($t:ty, $b:ty) => ( impl ::plugin::LogSoftmax<$t> for $b { - fn log_softmax( - &self, - x: &mut ::co::tensor::SharedTensor<$t>, - result: &mut ::co::tensor::SharedTensor<$t> - ) -> Result<(), ::co::error::Error> { - match x.add_device(self.device()) { _ => try!(x.sync(self.device())) } - match result.add_device(self.device()) { _ => () } - - self.log_softmax_plain(x, result) - } - - fn log_softmax_plain( - &self, - x: &::co::tensor::SharedTensor<$t>, - result: &mut ::co::tensor::SharedTensor<$t> - ) -> Result<(), ::co::error::Error> { - let scal_params: ::cudnn::utils::ScalParams<$t> = ::cudnn::utils::ScalParams::default(); - - Ok(try!(match CUDNN.log_softmax_forward( - &try!(x.cudnn_tensor_desc_softmax()), // src_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr(x, self.device()) }), //src_data - &try!(result.cudnn_tensor_desc_softmax()), // dest_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr_mut(result, self.device()) }), // dest_data - scal_params - ) { - Ok(_) => Ok(()), - Err(_) => { - Err(::co::plugin::Error::Operation("Unable to execute CUDA cuDNN logarithmic softmax Forward.")) - } - })) + fn log_softmax(&self, x: &SharedTensor<$t>, result: &mut SharedTensor<$t>) + -> Result<(), CoError> { + let r_desc = try!(result.cudnn_tensor_desc_flat()); + let x_mem = read!(x, self); + let r_mem = write_only!(result, self); + exec!(log_softmax, CUDNN.log_softmax_forward( + &try!(x.cudnn_tensor_desc_softmax()), + trans!(x_mem), + &r_desc, + trans_mut!(r_mem), + ScalParams::<$t>::default())) } fn log_softmax_grad( &self, - x: &mut ::co::tensor::SharedTensor<$t>, - x_diff: &mut ::co::tensor::SharedTensor<$t>, - result_diff: &mut ::co::tensor::SharedTensor<$t> - ) -> Result<(), ::co::error::Error> { - match x.add_device(self.device()) { _ => try!(x.sync(self.device())) } - match x_diff.add_device(self.device()) { _ => try!(x.sync(self.device())) } - match result_diff.add_device(self.device()) { _ => () } - - self.log_softmax_grad_plain(x, x_diff, result_diff) - } + x: &SharedTensor<$t>, + x_diff: &SharedTensor<$t>, + result_diff: &mut SharedTensor<$t>) -> Result<(), CoError> { - fn log_softmax_grad_plain( - &self, - x: &::co::tensor::SharedTensor<$t>, - x_diff: &::co::tensor::SharedTensor<$t>, - result_diff: &mut ::co::tensor::SharedTensor<$t> - ) -> Result<(), ::co::error::Error> { - let scal_params: ::cudnn::utils::ScalParams<$t> = ::cudnn::utils::ScalParams::default(); - - Ok(try!(match CUDNN.log_softmax_backward( - &try!(x.cudnn_tensor_desc_softmax()), // src_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr(x, self.device()) }), //src_data - &try!(x_diff.cudnn_tensor_desc_softmax()), // src_diff_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr(x_diff, self.device()) }), //src_diff_data - &try!(result_diff.cudnn_tensor_desc_softmax()), // dest_diff_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr_mut(result_diff, self.device()) }), // dest_diff_data - scal_params - ) { - Ok(_) => Ok(()), - Err(_) => { - Err(::co::plugin::Error::Operation("Unable to execute CUDA cuDNN logarithmic softmax Backward.")) - } - })) + let dr_desc = try!(result_diff.cudnn_tensor_desc_flat()); + let x_mem = read!(x, self); + let dx_mem = read!(x_diff, self); + let dr_mem = write_only!(result_diff, self); + + exec!(log_softmax_backward, CUDNN.log_softmax_backward( + &try!(x.cudnn_tensor_desc_softmax()), + trans!(x_mem), + &try!(x_diff.cudnn_tensor_desc_softmax()), + trans!(dx_mem), + &dr_desc, + trans_mut!(dr_mem), + ScalParams::<$t>::default())) } } ) @@ -793,98 +348,53 @@ macro_rules! impl_ops_log_softmax_for { #[macro_export] macro_rules! impl_ops_lrn_for { - ($t:ident, $b:ty) => ( + ($t:ty, $b:ty) => ( impl ::plugin::LRN<$t> for $b { - fn new_lrn_config( - &self, - n: u32, - alpha: f64, - beta: f64, - k: f64 - ) -> Result { + fn new_lrn_config(&self, n: u32, alpha: f64, beta: f64, k: f64) + -> Result { + // FIXME: unwrap() Ok(CUDNN.init_normalization(n, alpha, beta, k).unwrap()) } - fn lrn( - &self, - x: &mut ::co::tensor::SharedTensor<$t>, - result: &mut ::co::tensor::SharedTensor<$t>, - config: &Self::CLRN //::frameworks::cuda::CC - ) -> Result<(), ::co::error::Error> { - match x.add_device(self.device()) { _ => try!(x.sync(self.device())) } - match result.add_device(self.device()) { _ => () } - - self.lrn_plain(x, result, config) - } - - fn lrn_plain( - &self, - x: &::co::tensor::SharedTensor<$t>, - result: &mut ::co::tensor::SharedTensor<$t>, - config: &Self::CLRN - ) -> Result<(), ::co::error::Error> { - let scal_params: ::cudnn::utils::ScalParams<$t> = ::cudnn::utils::ScalParams::default(); - - Ok(try!(match CUDNN.lrn_forward( + fn lrn(&self, x: &SharedTensor<$t>, result: &mut SharedTensor<$t>, + config: &Self::CLRN) -> Result<(), CoError> { + let r_desc = try!(result.cudnn_tensor_desc_flat()); + let x_mem = read!(x, self); + let r_mem = write_only!(result, self); + exec!(lrn_forward, CUDNN.lrn_forward( config, - &try!(x.cudnn_tensor_desc()), // src_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr(x, self.device()) }), //src_data - &try!(result.cudnn_tensor_desc()), // dest_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr_mut(result, self.device()) }), // dest_data - scal_params - ) { - Ok(_) => Ok(()), - Err(_) => { - Err(::co::plugin::Error::Operation("Unable to execute CUDA cuDNN Activation lrn Forward.")) - } - })) + &try!(x.cudnn_tensor_desc()), + trans!(x_mem), + &r_desc, + trans_mut!(r_mem), + ScalParams::<$t>::default())) } #[allow(unused_variables)] fn lrn_grad( &self, - x: &mut ::co::tensor::SharedTensor<$t>, - x_diff: &mut ::co::tensor::SharedTensor<$t>, - result: &mut ::co::tensor::SharedTensor<$t>, - result_diff: &mut ::co::tensor::SharedTensor<$t>, - config: &Self::CLRN - ) -> Result<(), ::co::error::Error> { - match x.add_device(self.device()) { _ => try!(x.sync(self.device())) } - match x_diff.add_device(self.device()) { _ => try!(x.sync(self.device())) } - match result.add_device(self.device()) { _ => try!(x.sync(self.device())) } - match result_diff.add_device(self.device()) { _ => () } - - self.lrn_grad_plain(x, x_diff, result, result_diff, config) - } - - #[allow(unused_variables)] - fn lrn_grad_plain( - &self, - x: &::co::tensor::SharedTensor<$t>, - x_diff: &::co::tensor::SharedTensor<$t>, - result: &::co::tensor::SharedTensor<$t>, - result_diff: &mut ::co::tensor::SharedTensor<$t>, - config: &Self::CLRN - ) -> Result<(), ::co::error::Error> { - let scal_params: ::cudnn::utils::ScalParams<$t> = ::cudnn::utils::ScalParams::default(); - - Ok(try!(match CUDNN.lrn_backward( + x: &SharedTensor<$t>, + x_diff: &SharedTensor<$t>, + result: &SharedTensor<$t>, + result_diff: &mut SharedTensor<$t>, + config: &Self::CLRN) -> Result<(), CoError> { + + let dr_desc = try!(result_diff.cudnn_tensor_desc_flat()); + let x_mem = read!(x, self); + let dx_mem = read!(x_diff, self); + let r_mem = read!(result, self); + let dr_mem = write_only!(result_diff, self); + exec!(lrn_backward, CUDNN.lrn_backward( config, - &try!(x.cudnn_tensor_desc()), // src_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr(x, self.device()) }), //src_data - &try!(x_diff.cudnn_tensor_desc()), // src_diff_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr(x_diff, self.device()) }), //src_diff_data - &try!(result.cudnn_tensor_desc()), // dest_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr(result, self.device()) }), // dest_data - &try!(result_diff.cudnn_tensor_desc()), // dest_diff_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr_mut(result_diff, self.device()) }), // dest_diff_data - scal_params - ) { - Ok(_) => Ok(()), - Err(_) => { - Err(::co::plugin::Error::Operation("Unable to execute CUDA cuDNN Activation lrn Backward.")) - } - })) + &try!(x.cudnn_tensor_desc()), + trans!(x_mem), + &try!(x_diff.cudnn_tensor_desc()), + trans!(dx_mem), + &try!(result.cudnn_tensor_desc()), + trans!(r_mem), + &dr_desc, + trans_mut!(dr_mem), + ScalParams::<$t>::default())) } } ) @@ -892,99 +402,59 @@ macro_rules! impl_ops_lrn_for { #[macro_export] macro_rules! impl_ops_pooling_for { - ($t:ident, $b:ty) => ( + ($t:ty, $b:ty) => ( impl ::plugin::Pooling<$t> for $b { - fn new_pooling_config( - &self, - window: &[i32], - padding: &[i32], - stride: &[i32], - ) -> Result { - let pooling_avg = ::cudnn::PoolingDescriptor::new(::cudnn::cudnnPoolingMode_t::CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING, window, padding, stride).unwrap(); - let pooling_max = ::cudnn::PoolingDescriptor::new(::cudnn::cudnnPoolingMode_t::CUDNN_POOLING_MAX, window, padding, stride).unwrap(); - Ok(::cudnn::utils::PoolingConfig::new(pooling_avg, pooling_max)) - } - - fn pooling_max( - &self, - x: &mut ::co::tensor::SharedTensor<$t>, - result: &mut ::co::tensor::SharedTensor<$t>, - config: &Self::CPOOL - ) -> Result<(), ::co::error::Error> { - match x.add_device(self.device()) { _ => try!(x.sync(self.device())) } - match result.add_device(self.device()) { _ => () } - - self.pooling_max_plain(x, result, config) - } - - fn pooling_max_plain( - &self, - x: &::co::tensor::SharedTensor<$t>, - result: &mut ::co::tensor::SharedTensor<$t>, - config: &Self::CPOOL - ) -> Result<(), ::co::error::Error> { - let scal_params: ::cudnn::utils::ScalParams<$t> = ::cudnn::utils::ScalParams::default(); - - Ok(try!(match CUDNN.pooling_max_forward( + fn new_pooling_config(&self, window: &[i32], padding: &[i32], + stride: &[i32]) -> Result { + // FIXME: unwraps + let pooling_avg = PoolingDescriptor::new( + cudnnPoolingMode_t::CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING, + window, padding, stride).unwrap(); + let pooling_max = PoolingDescriptor::new( + cudnnPoolingMode_t::CUDNN_POOLING_MAX, + window, padding, stride).unwrap(); + Ok(utils::PoolingConfig::new(pooling_avg, pooling_max)) + } + + fn pooling_max(&self, x: &SharedTensor<$t>, result: &mut SharedTensor<$t>, + config: &Self::CPOOL) -> Result<(), CoError> { + let r_desc = try!(result.cudnn_tensor_desc_flat()); + let x_mem = read!(x, self); + let r_mem = write_only!(result, self); + exec!(pooling_max_forward, CUDNN.pooling_max_forward( config, - &try!(x.cudnn_tensor_desc()), // src_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr(x, self.device()) }), //src_data - &try!(result.cudnn_tensor_desc()), // dest_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr_mut(result, self.device()) }), // dest_data - scal_params - ) { - Ok(_) => Ok(()), - Err(_) => { - Err(::co::plugin::Error::Operation("Unable to execute CUDA cuDNN Activation pooling Forward.")) - } - })) + &try!(x.cudnn_tensor_desc()), + trans!(x_mem), + &r_desc, + trans_mut!(r_mem), + ScalParams::<$t>::default())) } #[allow(unused_variables)] fn pooling_max_grad( &self, - x: &mut ::co::tensor::SharedTensor<$t>, - x_diff: &mut ::co::tensor::SharedTensor<$t>, - result: &mut ::co::tensor::SharedTensor<$t>, - result_diff: &mut ::co::tensor::SharedTensor<$t>, - config: &Self::CPOOL - ) -> Result<(), ::co::error::Error> { - match x.add_device(self.device()) { _ => try!(x.sync(self.device())) } - match x_diff.add_device(self.device()) { _ => try!(x.sync(self.device())) } - match result.add_device(self.device()) { _ => try!(x.sync(self.device())) } - match result_diff.add_device(self.device()) { _ => () } - - self.pooling_max_grad_plain(x, x_diff, result, result_diff, config) - } - - #[allow(unused_variables)] - fn pooling_max_grad_plain( - &self, - x: &::co::tensor::SharedTensor<$t>, - x_diff: &::co::tensor::SharedTensor<$t>, - result: &::co::tensor::SharedTensor<$t>, - result_diff: &mut ::co::tensor::SharedTensor<$t>, - config: &Self::CPOOL - ) -> Result<(), ::co::error::Error> { - let scal_params: ::cudnn::utils::ScalParams<$t> = ::cudnn::utils::ScalParams::default(); - - Ok(try!(match CUDNN.pooling_max_backward( + x: &SharedTensor<$t>, + x_diff: &SharedTensor<$t>, + result: &SharedTensor<$t>, + result_diff: &mut SharedTensor<$t>, + config: &Self::CPOOL) -> Result<(), CoError> { + + let dr_desc = try!(result_diff.cudnn_tensor_desc_flat()); + let x_mem = read!(x, self); + let dx_mem = read!(x_diff, self); + let r_mem = read!(result, self); + let dr_mem = write_only!(result_diff, self); + exec!(pooling_max_backward, CUDNN.pooling_max_backward( config, - &try!(x.cudnn_tensor_desc()), // src_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr(x, self.device()) }), //src_data - &try!(x_diff.cudnn_tensor_desc()), // src_diff_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr(x_diff, self.device()) }), //src_diff_data - &try!(result.cudnn_tensor_desc()), // dest_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr(result, self.device()) }), // dest_data - &try!(result_diff.cudnn_tensor_desc()), // dest_diff_desc - try!(unsafe { ::frameworks::cuda::helper::receive_memory_ptr_mut(result_diff, self.device()) }), // dest_diff_data - scal_params - ) { - Ok(_) => Ok(()), - Err(_) => { - Err(::co::plugin::Error::Operation("Unable to execute CUDA cuDNN Activation pooling Backward.")) - } - })) + &try!(x.cudnn_tensor_desc()), + trans!(x_mem), + &try!(x_diff.cudnn_tensor_desc()), + trans!(dx_mem), + &try!(result.cudnn_tensor_desc()), + trans!(r_mem), + &dr_desc, + trans_mut!(dr_mem), + ScalParams::<$t>::default())) } } ) diff --git a/src/frameworks/cuda/mod.rs b/src/frameworks/cuda/mod.rs index baa5459..6088c38 100644 --- a/src/frameworks/cuda/mod.rs +++ b/src/frameworks/cuda/mod.rs @@ -1,9 +1,11 @@ //! Provides NN for a CUDA backend. #![allow(missing_docs)] use ::plugin::*; +use co::Error as CoError; use co::prelude::*; use co::plugin::Error as PluginError; use cudnn::*; +use cudnn::utils::ScalParams; #[macro_use] pub mod helper; @@ -316,10 +318,6 @@ impl_ops_log_softmax_for!(f32, Backend); impl_ops_lrn_for!(f32, Backend); impl_ops_pooling_for!(f32, Backend); -impl_ops_sigmoid_pointwise_for!(f32, Backend); -impl_ops_relu_pointwise_for!(f32, Backend); -impl_ops_tanh_pointwise_for!(f32, Backend); - impl NN for Backend { type CC = utils::ConvolutionConfig; type CLRN = utils::NormalizationConfig; @@ -337,7 +335,3 @@ impl_ops_softmax_for!(f64, Backend); impl_ops_log_softmax_for!(f64, Backend); impl_ops_lrn_for!(f64, Backend); impl_ops_pooling_for!(f64, Backend); - -impl_ops_sigmoid_pointwise_for!(f64, Backend); -impl_ops_relu_pointwise_for!(f64, Backend); -impl_ops_tanh_pointwise_for!(f64, Backend); diff --git a/src/frameworks/native/helper.rs b/src/frameworks/native/helper.rs index d411978..3af4799 100644 --- a/src/frameworks/native/helper.rs +++ b/src/frameworks/native/helper.rs @@ -1,7 +1,9 @@ //! Provides useful macros for easier NN implementation for native. +use co; use co::plugin::numeric_helpers::Float; use co::memory::MemoryType; +use co::plugin::Error as PluginError; #[derive(Debug, Copy, Clone)] #[allow(missing_docs)] @@ -13,6 +15,30 @@ pub struct NormalizationConfig; #[allow(missing_docs)] pub struct PoolingConfig; +macro_rules! read { + ($x:ident, $t:ident, $slf:ident) => ( + try!($x.read($slf.device())).as_native() + .expect("Broken invariant: not a CUDA memory") + .as_slice::<$t>() + ) +} + +macro_rules! read_write { + ($x:ident, $t: ident, $slf:ident) => ( + try!($x.read_write($slf.device())).as_mut_native() + .expect("Broken invariant: not a CUDA memory") + .as_mut_slice::<$t>() + ) +} + +macro_rules! write_only { + ($x:ident, $t: ident, $slf:ident) => ( + try!($x.write_only($slf.device())).as_mut_native() + .expect("Broken invariant: not a CUDA memory") + .as_mut_slice::<$t>() + ) +} + /// Just a helper function until SharedTensor has a nice interface for writing data pub fn write_to_memory(mem: &mut MemoryType, data: T) where T::Item: Clone { @@ -30,43 +56,43 @@ where T::Item: Clone { #[inline] /// Computes the Sigmoid Function on the CPU -pub fn sigmoid(x: &T) -> T { - (T::one()) / (T::one() + (-*x).exp()) +pub fn sigmoid(x: T) -> T { + (T::one()) / (T::one() + (-x).exp()) } #[inline] /// Computes the Sigmoid Gradient on the CPU -pub fn sigmoid_grad(x: &T, dx: &T) -> T { - *x * (T::one() -*x) * *dx +pub fn sigmoid_grad(x: T, dx: T) -> T { + x * (T::one() - x) * dx } #[inline] /// Computes the ReLU Function on the CPU -pub fn relu(x: &T) -> T { +pub fn relu(x: T) -> T { let x : T = x.clone(); x.max(T::zero()) } #[inline] /// Computes the ReLU Gradient on the CPU -pub fn relu_grad(x: &T, dx: &T) -> T { - if *x > T::zero() { - return *dx +pub fn relu_grad(x: T, dx: T) -> T { + if x > T::zero() { + return dx } T::zero() } #[inline] /// Computes the Tanh Function on the CPU -pub fn tanh(x: &T) -> T { +pub fn tanh(x: T) -> T { x.tanh() } #[inline] // d/dx tanh x = sech2 x = 1 + tanh2 x /// Computes the Tanh Gradient on the CPU -pub fn tanh_grad(x: &T, dx: &T) -> T { - (T::one() - x.powi(2)) * *dx +pub fn tanh_grad(x: T, dx: T) -> T { + (T::one() - x.powi(2)) * dx } macro_rules! impl_oconf_for_cc(($($t: ident), +) => ( @@ -91,61 +117,25 @@ macro_rules! impl_oconf_for_pooling(($($t: ident), +) => ( #[macro_export] macro_rules! impl_ops_sigmoid_for { ($t:ident, $b:ty) => ( - impl ::plugin::Sigmoid<$t> for $b { - fn sigmoid( - &self, - x: &mut ::co::tensor::SharedTensor<$t>, - result: &mut ::co::tensor::SharedTensor<$t> - ) -> Result<(), ::co::error::Error> { - match x.add_device(self.device()) { _ => try!(x.sync(self.device())) } - match result.add_device(self.device()) { _ => () } - self.sigmoid_plain(x, result) - } - - fn sigmoid_plain( - &self, - x: &::co::tensor::SharedTensor<$t>, - result: &mut ::co::tensor::SharedTensor<$t> - ) -> Result<(), ::co::error::Error> { - if let Some(input) = x.get(self.device()).unwrap().as_native() { - let res = input.as_slice::<$t>().iter().map(::frameworks::native::helper::sigmoid); - ::frameworks::native::helper::write_to_memory(result.get_mut(self.device()).unwrap(), res); - return Ok(()); - } - Err(Error::Plugin(PluginError::Operation("Unable to execute Native sigmoid Forward."))) + impl Sigmoid<$t> for $b { + fn sigmoid(&self, x: &SharedTensor<$t>, result: &mut SharedTensor<$t>) + -> Result<(), Error> { + map1(read!(x, $t, self), + write_only!(result, $t, self), + ::frameworks::native::helper::sigmoid) } fn sigmoid_grad( &self, - x: &mut ::co::tensor::SharedTensor<$t>, - x_diff: &mut ::co::tensor::SharedTensor<$t>, - result: &mut ::co::tensor::SharedTensor<$t>, - result_diff: &mut ::co::tensor::SharedTensor<$t> - ) -> Result<(), ::co::error::Error> { - match x.add_device(self.device()) { _ => try!(x.sync(self.device())) } - match x_diff.add_device(self.device()) { _ => try!(x_diff.sync(self.device())) } - match result.add_device(self.device()) { _ => try!(result.sync(self.device())) } - match result_diff.add_device(self.device()) { _ => () } - self.sigmoid_grad_plain(x, x_diff, result, result_diff) - } - - fn sigmoid_grad_plain( - &self, - x: &::co::tensor::SharedTensor<$t>, - x_diff: &::co::tensor::SharedTensor<$t>, - result: &::co::tensor::SharedTensor<$t>, - result_diff: &mut ::co::tensor::SharedTensor<$t> - ) -> Result<(), ::co::error::Error> { - if let Some(sig_data) = x.get(self.device()).unwrap().as_native() { - if let Some(sig_dx) = x_diff.get(self.device()).unwrap().as_native() { - let res = sig_data.as_slice::<$t>().iter() - .zip(sig_dx.as_slice::<$t>().iter()) - .map(|(t, dt)| ::frameworks::native::helper::sigmoid_grad(t, dt)); - ::frameworks::native::helper::write_to_memory(result_diff.get_mut(self.device()).unwrap(), res); - return Ok(()); - } - } - Err(Error::Plugin(PluginError::Operation("Unable to execute Native sigmoid grad Forward."))) + x: &SharedTensor<$t>, + x_diff: &SharedTensor<$t>, + result: &SharedTensor<$t>, + result_diff: &mut SharedTensor<$t>) + -> Result<(), Error> { + map2(read!(x, $t, self), + read!(x_diff, $t, self), + write_only!(result_diff, $t, self), + ::frameworks::native::helper::sigmoid_grad) } } ); @@ -154,61 +144,25 @@ macro_rules! impl_ops_sigmoid_for { #[macro_export] macro_rules! impl_ops_relu_for { ($t:ident, $b:ty) => ( - impl ::plugin::Relu<$t> for $b { - fn relu( - &self, - x: &mut ::co::tensor::SharedTensor<$t>, - result: &mut ::co::tensor::SharedTensor<$t> - ) -> Result<(), ::co::error::Error> { - match x.add_device(self.device()) { _ => try!(x.sync(self.device())) } - match result.add_device(self.device()) { _ => () } - self.relu_plain(x, result) - } - - fn relu_plain( - &self, - x: &::co::tensor::SharedTensor<$t>, - result: &mut ::co::tensor::SharedTensor<$t> - ) -> Result<(), ::co::error::Error> { - if let Some(input) = x.get(self.device()).unwrap().as_native() { - let res = input.as_slice::<$t>().iter().map(::frameworks::native::helper::relu); - ::frameworks::native::helper::write_to_memory(result.get_mut(self.device()).unwrap(), res); - return Ok(()); - } - Err(Error::Plugin(PluginError::Operation("Unable to execute Native ReLU Forward."))) + impl Relu<$t> for $b { + fn relu(&self, x: &SharedTensor<$t>, result: &mut SharedTensor<$t>) + -> Result<(), ::co::error::Error> { + map1(read!(x, $t, self), + write_only!(result, $t, self), + ::frameworks::native::helper::relu) } fn relu_grad( &self, - x: &mut ::co::tensor::SharedTensor<$t>, - x_diff: &mut ::co::tensor::SharedTensor<$t>, - result: &mut ::co::tensor::SharedTensor<$t>, - result_diff: &mut ::co::tensor::SharedTensor<$t> - ) -> Result<(), ::co::error::Error> { - match x.add_device(self.device()) { _ => try!(x.sync(self.device())) } - match x_diff.add_device(self.device()) { _ => try!(x_diff.sync(self.device())) } - match result.add_device(self.device()) { _ => try!(result.sync(self.device())) } - match result_diff.add_device(self.device()) { _ => () } - self.relu_grad_plain(x, x_diff, result, result_diff) - } - - fn relu_grad_plain( - &self, - x: &::co::tensor::SharedTensor<$t>, - x_diff: &::co::tensor::SharedTensor<$t>, - result: &::co::tensor::SharedTensor<$t>, - result_diff: &mut ::co::tensor::SharedTensor<$t> - ) -> Result<(), ::co::error::Error> { - if let Some(input) = x.get(self.device()).unwrap().as_native() { - if let Some(dx) = x_diff.get(self.device()).unwrap().as_native() { - let res = input.as_slice::<$t>().iter() - .zip(dx.as_slice::<$t>().iter()) - .map(|(x, dx)| ::frameworks::native::helper::relu_grad(x, dx)); - ::frameworks::native::helper::write_to_memory(result_diff.get_mut(self.device()).unwrap(), res); - return Ok(()); - } - } - Err(Error::Plugin(PluginError::Operation("Unable to execute Native ReLU grad Forward."))) + x: &SharedTensor<$t>, + x_diff: &SharedTensor<$t>, + result: &SharedTensor<$t>, + result_diff: &mut SharedTensor<$t>) + -> Result<(), Error> { + map2(read!(x, $t, self), + read!(x_diff, $t, self), + write_only!(result_diff, $t, self), + ::frameworks::native::helper::relu_grad) } } ); @@ -218,61 +172,24 @@ macro_rules! impl_ops_relu_for { macro_rules! impl_ops_tanh_for { ($t:ident, $b:ty) => ( impl ::plugin::Tanh<$t> for $b { - #[inline] - fn tanh( - &self, - x: &mut ::co::tensor::SharedTensor<$t>, - result: &mut ::co::tensor::SharedTensor<$t> - ) -> Result<(), ::co::error::Error> { - match x.add_device(self.device()) { _ => try!(x.sync(self.device())) } - match result.add_device(self.device()) { _ => () } - self.tanh_plain(x, result) - } - - fn tanh_plain( - &self, - x: &::co::tensor::SharedTensor<$t>, - result: &mut ::co::tensor::SharedTensor<$t> - ) -> Result<(), ::co::error::Error> { - if let Some(input) = x.get(self.device()).unwrap().as_native() { - let res = input.as_slice::<$t>().iter().map(::frameworks::native::helper::tanh); - ::frameworks::native::helper::write_to_memory(result.get_mut(self.device()).unwrap(), res); - return Ok(()); - } - Err(Error::Plugin(PluginError::Operation("Unable to execute Native tanh Forward."))) + fn tanh(&self, x: &SharedTensor<$t>, result: &mut SharedTensor<$t>) + -> Result<(), ::co::error::Error> { + map1(read!(x, $t, self), + write_only!(result, $t, self), + ::frameworks::native::helper::tanh) } fn tanh_grad( &self, - x: &mut ::co::tensor::SharedTensor<$t>, - x_diff: &mut ::co::tensor::SharedTensor<$t>, - result: &mut ::co::tensor::SharedTensor<$t>, - result_diff: &mut ::co::tensor::SharedTensor<$t> - ) -> Result<(), ::co::error::Error> { - match x.add_device(self.device()) { _ => try!(x.sync(self.device())) } - match x_diff.add_device(self.device()) { _ => try!(x_diff.sync(self.device())) } - match result.add_device(self.device()) { _ => try!(result.sync(self.device())) } - match result_diff.add_device(self.device()) { _ => () } - self.tanh_grad_plain(x, x_diff, result, result_diff) - } - - fn tanh_grad_plain( - &self, - x: &::co::tensor::SharedTensor<$t>, - x_diff: &::co::tensor::SharedTensor<$t>, - result: &::co::tensor::SharedTensor<$t>, - result_diff: &mut ::co::tensor::SharedTensor<$t> - ) -> Result<(), ::co::error::Error> { - if let Some(input) = x.get(self.device()).unwrap().as_native() { - if let Some(dx) = x_diff.get(self.device()).unwrap().as_native() { - let res = input.as_slice::<$t>().iter() - .zip(dx.as_slice::<$t>().iter()) - .map(|(x, dx)| ::frameworks::native::helper::tanh_grad(x, dx)); - ::frameworks::native::helper::write_to_memory(result_diff.get_mut(self.device()).unwrap(), res); - return Ok(()); - } - } - Err(Error::Plugin(PluginError::Operation("Unable to execute Native tanh_grad Forward."))) + x: &SharedTensor<$t>, + x_diff: &SharedTensor<$t>, + result: &SharedTensor<$t>, + result_diff: &mut SharedTensor<$t>) + -> Result<(), Error> { + map2(read!(x, $t, self), + read!(x_diff, $t, self), + write_only!(result_diff, $t, self), + ::frameworks::native::helper::tanh_grad) } } ); @@ -284,29 +201,20 @@ macro_rules! impl_ops_convolution_for { impl ::plugin::Convolution<$t> for $b { fn new_convolution_config( &self, - src: &::co::tensor::SharedTensor<$t>, - dest: &::co::tensor::SharedTensor<$t>, - filter: &mut ::co::tensor::SharedTensor<$t>, + src: &SharedTensor<$t>, + dest: &SharedTensor<$t>, + filter: &mut SharedTensor<$t>, stride: &[i32], zero_padding: &[i32] ) -> Result { unimplemented!(); Ok(helper::ConvolutionConfig) } - fn convolution( - &self, - x: &mut ::co::tensor::SharedTensor<$t>, - result: &mut ::co::tensor::SharedTensor<$t>, - config: &Self::CC - ) -> Result<(), ::co::error::Error> { - unimplemented!(); - Ok(()) - } - fn convolution_plain( + fn convolution( &self, - x: &::co::tensor::SharedTensor<$t>, - result: &mut ::co::tensor::SharedTensor<$t>, + x: &SharedTensor<$t>, + result: &mut SharedTensor<$t>, config: &Self::CC ) -> Result<(), ::co::error::Error> { unimplemented!(); @@ -315,22 +223,10 @@ macro_rules! impl_ops_convolution_for { fn convolution_grad( &self, - x: &mut ::co::tensor::SharedTensor<$t>, - x_diff: &mut ::co::tensor::SharedTensor<$t>, - result: &mut ::co::tensor::SharedTensor<$t>, - result_diff: &mut ::co::tensor::SharedTensor<$t>, - config: &Self::CC - ) -> Result<(), ::co::error::Error> { - unimplemented!(); - Ok(()) - } - - fn convolution_grad_plain( - &self, - x: &::co::tensor::SharedTensor<$t>, - x_diff: &::co::tensor::SharedTensor<$t>, - result: &::co::tensor::SharedTensor<$t>, - result_diff: &mut ::co::tensor::SharedTensor<$t>, + x: &SharedTensor<$t>, + x_diff: &SharedTensor<$t>, + result: &SharedTensor<$t>, + result_diff: &mut SharedTensor<$t>, config: &Self::CC ) -> Result<(), ::co::error::Error> { unimplemented!(); @@ -344,69 +240,40 @@ macro_rules! impl_ops_convolution_for { macro_rules! impl_ops_softmax_for { ($t:ident, $b:ty) => ( impl ::plugin::Softmax<$t> for $b { - fn softmax( - &self, - x: &mut ::co::tensor::SharedTensor<$t>, - result: &mut ::co::tensor::SharedTensor<$t> - ) -> Result<(), ::co::error::Error> { - match x.add_device(self.device()) { _ => try!(x.sync(self.device())) } - match result.add_device(self.device()) { _ => () } - self.softmax_plain(x, result) - } - fn softmax_plain( - &self, - x: &::co::tensor::SharedTensor<$t>, - result: &mut ::co::tensor::SharedTensor<$t> - ) -> Result<(), ::co::error::Error> { - if let Some(input) = x.get(self.device()).unwrap().as_native() { - let mut exps = Vec::with_capacity(x.capacity()); - let mut sum : $t = 0 as $t; - for exp in input.as_slice::<$t>().iter().map(|t|t.exp()) { - exps.push(exp); - sum += exp; - } - let res = exps.iter().map(|t| t / sum); - ::frameworks::native::helper::write_to_memory(result.get_mut(self.device()).unwrap(), res); - return Ok(()); + fn softmax(&self, x: &SharedTensor<$t>, result: &mut SharedTensor<$t>) + -> Result<(), Error> { + let xs = read!(x, $t, self); + let rs = write_only!(result, $t, self); + + try!(map1(xs, rs, |v| v.exp())); + + let mut sum: $t = 0.0; // iter_arith is not stable yet + for r in &*rs { + sum += *r; } - Err(Error::Plugin( - PluginError::Operation("Unable to execute Native softmax Forward."))) + for r in rs { + *r /= sum; + } + Ok(()) } + + // TODO: check fn softmax_grad( &self, - x: &mut ::co::tensor::SharedTensor<$t>, - x_diff: &mut ::co::tensor::SharedTensor<$t>, - result_diff: &mut ::co::tensor::SharedTensor<$t> - ) -> Result<(), ::co::error::Error> { - match x.add_device(self.device()) { _ => try!(x.sync(self.device())) } - match x_diff.add_device(self.device()) { _ => try!(x_diff.sync(self.device())) } - match result_diff.add_device(self.device()) { _ => () } - self.softmax_grad_plain(x, x_diff, result_diff) - } - fn softmax_grad_plain( - &self, - x: &::co::tensor::SharedTensor<$t>, - x_diff: &::co::tensor::SharedTensor<$t>, - result_diff: &mut ::co::tensor::SharedTensor<$t> - ) -> Result<(), ::co::error::Error> { - if let Some(sig_data) = x.get(self.device()).unwrap().as_native() { - if let Some(sig_dx) = x_diff.get(self.device()).unwrap().as_native() { - let mut dot : $t = 0 as $t; - let sig_data_slice = sig_data.as_slice::<$t>(); - let sig_dx_slice = sig_dx.as_slice::<$t>(); - for (t, dt) in sig_data_slice.iter().zip(sig_dx_slice.iter()) { - dot += t * dt; - } - let res = sig_data_slice.iter() - .zip(sig_dx_slice.iter()) - .map(|(t, dt)| t * (dt - dot)); - ::frameworks::native::helper::write_to_memory(result_diff.get_mut(self.device()).unwrap(), res); - return Ok(()); - } + x: &SharedTensor<$t>, + x_diff: &SharedTensor<$t>, + result_diff: &mut SharedTensor<$t>) -> Result<(), Error> { + + let xs = read!(x, $t, self); + let dxs = read!(x_diff, $t, self); + let drs = write_only!(result_diff, $t, self); + + let mut dot: $t = 0 as $t; + for (t, dt) in xs.iter().zip(dxs.iter()) { + dot += t * dt; } - Err(Error::Plugin( - PluginError::Operation("Unable to execute Native softmax Backward."))) + map2(xs, dxs, drs, |t, dt| t * (dt - dot)) } } ); @@ -416,76 +283,35 @@ macro_rules! impl_ops_softmax_for { macro_rules! impl_ops_log_softmax_for { ($t:ident, $b:ty) => ( impl ::plugin::LogSoftmax<$t> for $b { - fn log_softmax( - &self, - x: &mut ::co::tensor::SharedTensor<$t>, - result: &mut ::co::tensor::SharedTensor<$t> - ) -> Result<(), ::co::error::Error> { - match x.add_device(self.device()) { _ => try!(x.sync(self.device())) } - match result.add_device(self.device()) { _ => () } - self.log_softmax_plain(x, result) - } - fn log_softmax_plain( - &self, - x: &::co::tensor::SharedTensor<$t>, - result: &mut ::co::tensor::SharedTensor<$t> - ) -> Result<(), ::co::error::Error> { - if let Some(input) = x.get(self.device()).unwrap().as_native() { - let mut max_input = ::std::$t::NEG_INFINITY; - for &input_val in input.as_slice::<$t>() { - max_input = max_input.max(input_val); - } - - let mut logsum : $t = 0 as $t; - for exp in input.as_slice::<$t>().iter().map(|t| (-(max_input - t)).exp()) { - logsum += exp; - } - logsum = max_input + logsum.ln(); - - let res = input.as_slice::<$t>().iter().map(|t| t - logsum); - - ::frameworks::native::helper::write_to_memory(result.get_mut(self.device()).unwrap(), res); - return Ok(()); + fn log_softmax(&self, x: &SharedTensor<$t>, result: &mut SharedTensor<$t>) + -> Result<(), ::co::error::Error> { + let xs = read!(x, $t, self); + let rs = write_only!(result, $t, self); + + let max_x = xs.iter().fold(::std::$t::NEG_INFINITY, + |acc, &t| acc.max(t)); + + let mut logsum : $t = 0 as $t; + for t in xs { + logsum += (-(max_x - t)).exp(); } - Err(Error::Plugin( - PluginError::Operation("Unable to execute Native softmax Forward."))) - } - fn log_softmax_grad( - &self, - x: &mut ::co::tensor::SharedTensor<$t>, - x_diff: &mut ::co::tensor::SharedTensor<$t>, - result_diff: &mut ::co::tensor::SharedTensor<$t> - ) -> Result<(), ::co::error::Error> { - match x.add_device(self.device()) { _ => try!(x.sync(self.device())) } - match x_diff.add_device(self.device()) { _ => try!(x_diff.sync(self.device())) } - match result_diff.add_device(self.device()) { _ => () } - self.log_softmax_grad_plain(x, x_diff, result_diff) + logsum = max_x + logsum.ln(); + + map1(xs, rs, |t| t - logsum) } - fn log_softmax_grad_plain( - &self, - x: &::co::tensor::SharedTensor<$t>, - x_diff: &::co::tensor::SharedTensor<$t>, - result_diff: &mut ::co::tensor::SharedTensor<$t> - ) -> Result<(), ::co::error::Error> { - if let Some(sig_data) = x.get(self.device()).unwrap().as_native() { - if let Some(sig_dx) = x_diff.get(self.device()).unwrap().as_native() { - let x_slice = sig_data.as_slice::<$t>(); - let x_diff_slice = sig_dx.as_slice::<$t>(); - let mut sum = 0 as $t; - for &grad_val in x_diff_slice.iter() { - sum += grad_val; - } - let res = x_slice.iter().zip(x_diff_slice.iter()).map(|(x_val, x_diff_val)| { - x_diff_val - x_val.exp() * sum - }); - - ::frameworks::native::helper::write_to_memory(result_diff.get_mut(self.device()).unwrap(), res); - return Ok(()); - } - } - Err(Error::Plugin( - PluginError::Operation("Unable to execute Native softmax Backward."))) + fn log_softmax_grad(&self, x: &SharedTensor<$t>, x_diff: &SharedTensor<$t>, + result_diff: &mut SharedTensor<$t>) + -> Result<(), ::co::error::Error> { + let xs = read!(x, $t, self); + let dxs = read!(x_diff, $t, self); + let drs = write_only!(result_diff, $t, self); + + let mut sum = 0 as $t; + for &grad_val in dxs.iter() { + sum += grad_val; + } + map2(xs, dxs, drs, |t, dt| dt - t * sum) } } ); @@ -508,8 +334,8 @@ macro_rules! impl_ops_lrn_for { fn lrn( &self, - x: &mut ::co::tensor::SharedTensor<$t>, - result: &mut ::co::tensor::SharedTensor<$t>, + x: &mut SharedTensor<$t>, + result: &mut SharedTensor<$t>, config: &Self::CLRN ) -> Result<(), ::co::error::Error> { unimplemented!(); @@ -518,8 +344,8 @@ macro_rules! impl_ops_lrn_for { fn lrn_plain( &self, - x: &::co::tensor::SharedTensor<$t>, - result: &mut ::co::tensor::SharedTensor<$t>, + x: &SharedTensor<$t>, + result: &mut SharedTensor<$t>, config: &Self::CLRN ) -> Result<(), ::co::error::Error> { unimplemented!(); @@ -528,10 +354,10 @@ macro_rules! impl_ops_lrn_for { fn lrn_grad( &self, - x: &mut ::co::tensor::SharedTensor<$t>, - x_diff: &mut ::co::tensor::SharedTensor<$t>, - result: &mut ::co::tensor::SharedTensor<$t>, - result_diff: &mut ::co::tensor::SharedTensor<$t>, + x: &mut SharedTensor<$t>, + x_diff: &mut SharedTensor<$t>, + result: &mut SharedTensor<$t>, + result_diff: &mut SharedTensor<$t>, config: &Self::CLRN ) -> Result<(), ::co::error::Error> { unimplemented!(); @@ -540,10 +366,10 @@ macro_rules! impl_ops_lrn_for { fn lrn_grad_plain( &self, - x: &::co::tensor::SharedTensor<$t>, - x_diff: &::co::tensor::SharedTensor<$t>, - result: &::co::tensor::SharedTensor<$t>, - result_diff: &mut ::co::tensor::SharedTensor<$t>, + x: &SharedTensor<$t>, + x_diff: &SharedTensor<$t>, + result: &SharedTensor<$t>, + result_diff: &mut SharedTensor<$t>, config: &Self::CLRN ) -> Result<(), ::co::error::Error> { unimplemented!(); @@ -569,8 +395,8 @@ macro_rules! impl_ops_pooling_for { fn pooling_max( &self, - x: &mut ::co::tensor::SharedTensor<$t>, - result: &mut ::co::tensor::SharedTensor<$t>, + x: &mut SharedTensor<$t>, + result: &mut SharedTensor<$t>, config: &Self::CPOOL ) -> Result<(), ::co::error::Error> { unimplemented!(); @@ -579,8 +405,8 @@ macro_rules! impl_ops_pooling_for { fn pooling_max_plain( &self, - x: &::co::tensor::SharedTensor<$t>, - result: &mut ::co::tensor::SharedTensor<$t>, + x: &SharedTensor<$t>, + result: &mut SharedTensor<$t>, config: &Self::CPOOL ) -> Result<(), ::co::error::Error> { unimplemented!(); @@ -589,10 +415,10 @@ macro_rules! impl_ops_pooling_for { #[allow(unused_variables)] fn pooling_max_grad( &self, - x: &mut ::co::tensor::SharedTensor<$t>, - x_diff: &mut ::co::tensor::SharedTensor<$t>, - result: &mut ::co::tensor::SharedTensor<$t>, - result_diff: &mut ::co::tensor::SharedTensor<$t>, + x: &mut SharedTensor<$t>, + x_diff: &mut SharedTensor<$t>, + result: &mut SharedTensor<$t>, + result_diff: &mut SharedTensor<$t>, config: &Self::CPOOL ) -> Result<(), ::co::error::Error> { unimplemented!(); @@ -601,10 +427,10 @@ macro_rules! impl_ops_pooling_for { fn pooling_max_grad_plain( &self, - x: &::co::tensor::SharedTensor<$t>, - x_diff: &::co::tensor::SharedTensor<$t>, - result: &::co::tensor::SharedTensor<$t>, - result_diff: &mut ::co::tensor::SharedTensor<$t>, + x: &SharedTensor<$t>, + x_diff: &SharedTensor<$t>, + result: &SharedTensor<$t>, + result_diff: &mut SharedTensor<$t>, config: &Self::CPOOL ) -> Result<(), ::co::error::Error> { unimplemented!(); diff --git a/src/frameworks/native/mod.rs b/src/frameworks/native/mod.rs index 1e70e9a..d0e6adf 100644 --- a/src/frameworks/native/mod.rs +++ b/src/frameworks/native/mod.rs @@ -8,10 +8,42 @@ use ::plugin::*; use co::prelude::*; use co::Error; use co::plugin::Error as PluginError; +use co::plugin::numeric_helpers::Float; #[macro_use] pub mod helper; +// Those functions should be in helper.rs, but there is no point to make them +// public. +fn lens_eq(xs: &[T], ys: &[T]) -> Result<(), Error> { + if xs.len() != ys.len() { + return Err(PluginError::Operation("Tensor dimension mismatch").into()); + } + Ok(()) +} + +fn map1(src: &[T], dst: &mut [T], f: F) -> Result<(), Error> + where T: Float, + F: Fn(T) -> T { + try!(lens_eq(dst, src)); + for i in 0..dst.len() { + dst[i] = f(src[i]); + } + Ok(()) +} + +fn map2(src1: &[T], src2: &[T], dst: &mut [T], f: F) -> Result<(), Error> + where T: Float, + F: Fn(T, T) -> T { + try!(lens_eq(dst, src1)); + try!(lens_eq(dst, src2)); + for i in 0..dst.len() { + dst[i] = f(src1[i], src2[i]); + } + Ok(()) +} + + impl_oconf_for_cc!(f32, f64); impl_oconf_for_clrn!(f32, f64); impl_oconf_for_pooling!(f32, f64); diff --git a/src/plugin.rs b/src/plugin.rs index 80aaa5f..dd1ab71 100644 --- a/src/plugin.rs +++ b/src/plugin.rs @@ -156,44 +156,25 @@ pub trait NN { /// Provides the functionality for a Backend to support Sigmoid operations. pub trait Sigmoid : NN { - /// Computes the [Sigmoid function][sigmoid] over the input Tensor `x` with complete memory management. + /// Computes the [Sigmoid function][sigmoid] over the input Tensor `x`. /// [sigmoid]: https://en.wikipedia.org/wiki/Sigmoid_function /// /// Saves the result to `result`. - /// - /// For a no-memory managed version see `sigmoid_plain`. - fn sigmoid(&self, x: &mut SharedTensor, result: &mut SharedTensor) -> Result<(), ::co::error::Error>; - - /// Computes the Sigmoid function over the input Tensor `x` without any memory management. - /// - /// Saves the result to `result`. - /// - /// *Attention*:
- /// For a correct computation result, you need to manage the memory allocation and synchronization yourself.
- /// For a memory managed version see `sigmoid`. - fn sigmoid_plain(&self, x: &SharedTensor, result: &mut SharedTensor) -> Result<(), ::co::error::Error>; + fn sigmoid(&self, x: &SharedTensor, result: &mut SharedTensor) + -> Result<(), ::co::error::Error>; - /// Computes the gradient of a [Sigmoid function][sigmoid] over the input Tensor `x` with complete memory management. + /// Computes the gradient of a [Sigmoid function][sigmoid] over the input Tensor `x`. /// [sigmoid]: https://en.wikipedia.org/wiki/Sigmoid_function /// /// Saves the result to `result_diff`. - /// - /// For a no-memory managed version see `sigmoid_grad_plain`. - fn sigmoid_grad(&self, x: &mut SharedTensor, x_diff: &mut SharedTensor, result: &mut SharedTensor, result_diff: &mut SharedTensor) -> Result<(), ::co::error::Error>; - - /// Computes the gradient of a Sigmoid function over the input Tensor `x` without any memory management. - /// - /// Saves the result to `result_diff`. - /// - /// *Attention*:
- /// For a correct computation result, you need to manage the memory allocation and synchronization yourself.
- /// For a memory managed version see `sigmoid_grad`. - fn sigmoid_grad_plain(&self, x: &SharedTensor, x_diff: &SharedTensor, result: &SharedTensor, result_diff: &mut SharedTensor) -> Result<(), ::co::error::Error>; + fn sigmoid_grad(&self, x: &SharedTensor, x_diff: &SharedTensor, + result: &SharedTensor, result_diff: &mut SharedTensor) + -> Result<(), ::co::error::Error>; } /// Provides the functionality for pointwise Sigmoid operations (overwrites the input with the result of the operation). pub trait SigmoidPointwise : NN { - /// Computes the [Sigmoid function][sigmoid] over the input Tensor `x` with complete memory management. + /// Computes the [Sigmoid function][sigmoid] over the input Tensor `x`. /// [sigmoid]: https://en.wikipedia.org/wiki/Sigmoid_function /// /// Saves the result back to `x`. @@ -201,239 +182,132 @@ pub trait SigmoidPointwise : NN { /// For a no-memory managed version see `sigmoid_pointwise_plain`. fn sigmoid_pointwise(&self, x: &mut SharedTensor) -> Result<(), ::co::error::Error>; - /// Computes the Sigmoid function over the input Tensor `x` without any memory management. - /// - /// Saves the result back to `x`. - /// - /// *Attention*:
- /// For a correct computation result, you need to manage the memory allocation and synchronization yourself.
- /// For a memory managed version see `sigmoid_pointwise`. - fn sigmoid_pointwise_plain(&self, x: &mut SharedTensor) -> Result<(), ::co::error::Error>; - - /// Computes the gradient of a [Sigmoid function][sigmoid] over the input Tensor `x` with complete memory management. + /// Computes the gradient of a [Sigmoid function][sigmoid] over the input Tensor `x`. /// [sigmoid]: https://en.wikipedia.org/wiki/Sigmoid_function /// /// Saves the result back to `x_diff`. - /// - /// For a no-memory managed version see `sigmoid_pointwise_grad_plain`. - fn sigmoid_pointwise_grad(&self, x: &mut SharedTensor, x_diff: &mut SharedTensor) -> Result<(), ::co::error::Error>; - - /// Computes the gradient of a Sigmoid function over the input Tensor `x` without any memory management. - /// - /// Saves the result back to `x_diff`. - /// - /// *Attention*:
- /// For a correct computation result, you need to manage the memory allocation and synchronization yourself.
- /// For a memory managed version see `sigmoid_pointwise_grad`. - fn sigmoid_pointwise_grad_plain(&self, x: &SharedTensor, x_diff: &mut SharedTensor) -> Result<(), ::co::error::Error>; + fn sigmoid_pointwise_grad(&self, x: &SharedTensor, x_diff: &mut SharedTensor) -> Result<(), ::co::error::Error>; } /// Provides the functionality for a Backend to support ReLU operations. pub trait Relu : NN { - /// Computes the [Rectified linear units][relu] over the input Tensor `x` with complete memory management. + /// Computes the [Rectified linear units][relu] over the input Tensor `x`. /// [relu]: https://en.wikipedia.org/wiki/Rectifier_(neural_networks) /// /// Saves the result to `result`. - /// - /// For a no-memory managed version see `relu_plain`. - fn relu(&self, x: &mut SharedTensor, result: &mut SharedTensor) -> Result<(), ::co::error::Error>; + fn relu(&self, x: &SharedTensor, result: &mut SharedTensor) -> Result<(), ::co::error::Error>; - /// Computes the ReLU over the input Tensor `x` without any memory management. - /// - /// Saves the result to `result`. - /// - /// *Attention*:
- /// For a correct computation result, you need to manage the memory allocation and synchronization yourself.
- /// For a memory managed version see `relu`. - fn relu_plain(&self, x: &SharedTensor, result: &mut SharedTensor) -> Result<(), ::co::error::Error>; - - /// Computes the gradient of [ReLU][relu] over the input Tensor `x` with complete memory management. + /// Computes the gradient of [ReLU][relu] over the input Tensor `x`. /// [relu]: https://en.wikipedia.org/wiki/Rectifier_(neural_networks) /// /// Saves the result to `result_diff`. - /// - /// For a no-memory managed version see `relu_grad_plain`. - fn relu_grad(&self, x: &mut SharedTensor, x_diff: &mut SharedTensor, result: &mut SharedTensor, result_diff: &mut SharedTensor) -> Result<(), ::co::error::Error>; - - /// Computes the gradient of ReLU over the input Tensor `x` without any memory management. - /// - /// Saves the result to `result_diff`. - /// - /// *Attention*:
- /// For a correct computation result, you need to manage the memory allocation and synchronization yourself.
- /// For a memory managed version see `relu_grad`. - fn relu_grad_plain(&self, x: &SharedTensor, x_diff: &SharedTensor, result: &SharedTensor, result_diff: &mut SharedTensor) -> Result<(), ::co::error::Error>; + fn relu_grad(&self, x: &SharedTensor, x_diff: &SharedTensor, + result: &SharedTensor, result_diff: &mut SharedTensor) + -> Result<(), ::co::error::Error>; } /// Provides the functionality for pointwise ReLU operations (overwrites the input with the result of the operation). pub trait ReluPointwise : NN { - /// Computes the [Rectified linear units][relu] over the input Tensor `x` with complete memory management. + /// Computes the [Rectified linear units][relu] over the input Tensor `x`. /// [relu]: https://en.wikipedia.org/wiki/Rectifier_(neural_networks) /// /// Saves the result back to `x`. - /// - /// For a no-memory managed version see `relu_pointwise_plain`. fn relu_pointwise(&self, x: &mut SharedTensor) -> Result<(), ::co::error::Error>; - /// Computes the ReLU over the input Tensor `x` without any memory management. - /// - /// Saves the result back to `x`. - /// - /// *Attention*:
- /// For a correct computation result, you need to manage the memory allocation and synchronization yourself.
- /// For a memory managed version see `relu_pointwise`. - fn relu_pointwise_plain(&self, x: &mut SharedTensor) -> Result<(), ::co::error::Error>; - - /// Computes the gradient of [ReLU][relu] over the input Tensor `x` with complete memory management. + /// Computes the gradient of [ReLU][relu] over the input Tensor `x`. /// [relu]: https://en.wikipedia.org/wiki/Rectifier_(neural_networks) /// /// Saves the result back to `x_diff`. - /// - /// For a no-memory managed version see `relu_pointwise_grad_plain`. - fn relu_pointwise_grad(&self, x: &mut SharedTensor, x_diff: &mut SharedTensor) -> Result<(), ::co::error::Error>; - - /// Computes the gradient of ReLU over the input Tensor `x` without any memory management. - /// - /// Saves the result back to `x_diff`. - /// - /// *Attention*:
- /// For a correct computation result, you need to manage the memory allocation and synchronization yourself.
- /// For a memory managed version see `relu_pointwise_grad`. - fn relu_pointwise_grad_plain(&self, x: &SharedTensor, x_diff: &mut SharedTensor) -> Result<(), ::co::error::Error>; + fn relu_pointwise_grad(&self, x: &SharedTensor, x_diff: &mut SharedTensor) + -> Result<(), ::co::error::Error>; } /// Provides the functionality for a Backend to support TanH operations. pub trait Tanh : NN { - /// Computes the [hyperbolic Tangent][tanh] over the input Tensor `x` with complete memory management. + /// Computes the [hyperbolic Tangent][tanh] over the input Tensor `x`. /// [tanh]: https://en.wikipedia.org/wiki/Hyperbolic_function /// /// Saves the result to `result`. - /// - /// For a no-memory managed version see `tanh_plain`. - fn tanh(&self, x: &mut SharedTensor, result: &mut SharedTensor) -> Result<(), ::co::error::Error>; + fn tanh(&self, x: &SharedTensor, result: &mut SharedTensor) + -> Result<(), ::co::error::Error>; - /// Computes the tanh over the input Tensor `x` without any memory management. - /// - /// Saves the result to `result`. - /// - /// *Attention*:
- /// For a correct computation result, you need to manage the memory allocation and synchronization yourself.
- /// For a memory managed version see `tanh`. - fn tanh_plain(&self, x: &SharedTensor, result: &mut SharedTensor) -> Result<(), ::co::error::Error>; - - /// Computes the gradient of [tanh][tanh] over the input Tensor `x` with complete memory management. + /// Computes the gradient of [tanh][tanh] over the input Tensor `x`. /// [tanh]: https://en.wikipedia.org/wiki/Hyperbolic_function /// /// Saves the result to `result_diff`. - /// - /// For a no-memory managed version see `tanh_grad_plain`. - fn tanh_grad(&self, x: &mut SharedTensor, x_diff: &mut SharedTensor, result: &mut SharedTensor, result_diff: &mut SharedTensor) -> Result<(), ::co::error::Error>; - - /// Computes the gradient of tanh over the input Tensor `x` without any memory management. - /// - /// Saves the result to `result_diff`. - /// - /// *Attention*:
- /// For a correct computation result, you need to manage the memory allocation and synchronization yourself.
- /// For a memory managed version see `tanh_grad`. - fn tanh_grad_plain(&self, x: &SharedTensor, x_diff: &SharedTensor, result: &SharedTensor, result_diff: &mut SharedTensor) -> Result<(), ::co::error::Error>; + fn tanh_grad(&self, x: &SharedTensor, x_diff: &SharedTensor, + result: &SharedTensor, result_diff: &mut SharedTensor) + -> Result<(), ::co::error::Error>; } -/// Provides the functionality for pointwise ReLU operations (overwrites the input with the result of the operation). +/// Provides the functionality for pointwise ReLU operations (overwrites the input +/// with the result of the operation). pub trait TanhPointwise : NN { - /// Computes the [hyperbolic Tangent][tanh] over the input Tensor `x` with complete memory management. + /// Computes the [hyperbolic Tangent][tanh] over the input Tensor `x`. /// [tanh]: https://en.wikipedia.org/wiki/Hyperbolic_function /// /// Saves the result back to `x`. - /// - /// For a no-memory managed version see `tanh_pointwise_plain`. fn tanh_pointwise(&self, x: &mut SharedTensor) -> Result<(), ::co::error::Error>; - /// Computes the tanh over the input Tensor `x` without any memory management. - /// - /// Saves the result back to `x`. - /// - /// *Attention*:
- /// For a correct computation result, you need to manage the memory allocation and synchronization yourself.
- /// For a memory managed version see `tanh_pointwise`. - fn tanh_pointwise_plain(&self, x: &mut SharedTensor) -> Result<(), ::co::error::Error>; - - /// Computes the gradient of [tanh][tanh] over the input Tensor `x` with complete memory management. + /// Computes the gradient of [tanh][tanh] over the input Tensor `x`. /// [tanh]: https://en.wikipedia.org/wiki/Hyperbolic_function /// /// Saves the result back to `x_diff`. - /// - /// For a no-memory managed version see `tanh_pointwise_grad_plain`. - fn tanh_pointwise_grad(&self, x: &mut SharedTensor, x_diff: &mut SharedTensor) -> Result<(), ::co::error::Error>; - - /// Computes the gradient of tanh over the input Tensor `x` without any memory management. - /// - /// Saves the result back to `x_diff`. - /// - /// *Attention*:
- /// For a correct computation result, you need to manage the memory allocation and synchronization yourself.
- /// For a memory managed version see `tanh_pointwise_grad`. - fn tanh_pointwise_grad_plain(&self, x: &SharedTensor, x_diff: &mut SharedTensor) -> Result<(), ::co::error::Error>; + fn tanh_pointwise_grad(&self, x: &SharedTensor, x_diff: &mut SharedTensor) + -> Result<(), ::co::error::Error>; } /// Provides the functionality for a Backend to support Convolution operations. pub trait Convolution : NN { - /// Creates a new ConvolutionConfig, which needs to be passed to further convolution Operations. - fn new_convolution_config(&self, src: &SharedTensor, dest: &SharedTensor, filter: &mut SharedTensor, - algo_fwd: ConvForwardAlgo, algo_bwd_filter: ConvBackwardFilterAlgo, algo_bwd_data: ConvBackwardDataAlgo, - stride: &[i32], zero_padding: &[i32]) -> Result; - - /// Computes a [CNN convolution][convolution] over the input Tensor `x` with complete memory management. + /// Creates a new ConvolutionConfig, which needs to be passed to further + /// convolution Operations. + fn new_convolution_config(&self, + src: &SharedTensor, + dest: &SharedTensor, + filter: &SharedTensor, + algo_fwd: ConvForwardAlgo, + algo_bwd_filter: ConvBackwardFilterAlgo, + algo_bwd_data: ConvBackwardDataAlgo, + stride: &[i32], + zero_padding: &[i32]) + -> Result; + + /// Computes a [CNN convolution][convolution] over the input Tensor `x`. /// [convolution]: https://en.wikipedia.org/wiki/Convolutional_neural_network /// /// Saves the result to `result`. - /// - /// For a no-memory managed version see `convolution_plain`. - fn convolution(&self, filter: &mut SharedTensor, x: &mut SharedTensor, result: &mut SharedTensor, workspace: &mut SharedTensor, config: &Self::CC) -> Result<(), ::co::error::Error>; - - /// Computes the convolution over the input Tensor `x` without any memory management. - /// - /// Saves the result to `result`. - /// - /// *Attention*:
- /// For a correct computation result, you need to manage the memory allocation and synchronization yourself.
- /// For a memory managed version see `convolution`. - fn convolution_plain(&self, filter: &SharedTensor, x: &SharedTensor, result: &mut SharedTensor, workspace: &mut SharedTensor, config: &Self::CC) -> Result<(), ::co::error::Error>; - - /// Computes the gradient of a [CNN convolution][convolution] with respect to the filter and complete memory management. + fn convolution(&self, + filter: &SharedTensor, + x: &SharedTensor, + result: &mut SharedTensor, + workspace: &mut SharedTensor, + config: &Self::CC) + -> Result<(), ::co::error::Error>; + + /// Computes the gradient of a [CNN convolution][convolution] with respect to the filter. /// [convolution]: https://en.wikipedia.org/wiki/Convolutional_neural_network /// /// Saves the result to `filter_diff`. - /// - /// For a no-memory managed version see `convolution_grad_filter_plain`. - fn convolution_grad_filter(&self, src_data: &mut SharedTensor, dest_diff: &mut SharedTensor, filter_diff: &mut SharedTensor, workspace: &mut SharedTensor, config: &Self::CC) -> Result<(), ::co::error::Error>; - - /// Computes the gradient of a convolution with respect to the filter and without any memory management. - /// - /// Saves the result to `filter_diff`. - /// - /// *Attention*:
- /// For a correct computation result, you need to manage the memory allocation and synchronization yourself.
- /// For a memory managed version see `convolution_grad_filter`. - fn convolution_grad_filter_plain(&self, src_data: &SharedTensor, dest_diff: &SharedTensor, filter_diff: &mut SharedTensor, workspace: &mut SharedTensor, config: &Self::CC) -> Result<(), ::co::error::Error>; - - /// Computes the gradient of a [CNN convolution][convolution] over the input Tensor `x` with respect to the data and complete memory management. + fn convolution_grad_filter(&self, + src_data: &SharedTensor, + dest_diff: &SharedTensor, + filter_diff: &mut SharedTensor, + workspace: &mut SharedTensor, + config: &Self::CC) + -> Result<(), ::co::error::Error>; + + /// Computes the gradient of a [CNN convolution][convolution] over the input + /// Tensor `x` with respect to the data. /// [convolution]: https://en.wikipedia.org/wiki/Convolutional_neural_network /// /// Saves the result to `result_diff`. - /// - /// For a no-memory managed version see `convolution_grad_data_plain`. - fn convolution_grad_data(&self, filter: &mut SharedTensor, x_diff: &mut SharedTensor, result_diff: &mut SharedTensor, workspace: &mut SharedTensor, config: &Self::CC) -> Result<(), ::co::error::Error>; - - /// Computes the gradient of a convolution over the input Tensor `x` with respect to the data and without any memory management. - /// - /// Saves the result to `result_diff`. - /// - /// *Attention*:
- /// For a correct computation result, you need to manage the memory allocation and synchronization yourself.
- /// For a memory managed version see `convolution_grad_data`. - fn convolution_grad_data_plain(&self, filter: &SharedTensor, x_diff: &SharedTensor, result_diff: &mut SharedTensor, workspace: &mut SharedTensor, config: &Self::CC) -> Result<(), ::co::error::Error>; -} + fn convolution_grad_data(&self, + filter: &SharedTensor, + x_diff: &SharedTensor, + result_diff: &mut SharedTensor, + workspace: &mut SharedTensor, + config: &Self::CC) + -> Result<(), ::co::error::Error>; // /// Computes the backward Convolution function w.r.t the bias. // /// @@ -460,155 +334,85 @@ pub trait Convolution : NN { // filter_data: *mut ::libc::c_void, // scale: ScalParams, // } +} /// Provides the functionality for a Backend to support Softmax operations. pub trait Softmax : NN { - /// Computes a [Softmax][softmax] over the input Tensor `x` with complete memory management. + /// Computes a [Softmax][softmax] over the input Tensor `x`. /// [softmax]: https://en.wikipedia.org/wiki/Softmax_function /// /// Saves the result to `result`. - /// - /// For a no-memory managed version see `softmax_plain`. - fn softmax(&self, x: &mut SharedTensor, result: &mut SharedTensor) -> Result<(), ::co::error::Error>; + fn softmax(&self, x: &SharedTensor, result: &mut SharedTensor) + -> Result<(), ::co::error::Error>; - /// Computes the softmax over the input Tensor `x` without any memory management. - /// - /// Saves the result to `result`. - /// - /// *Attention*:
- /// For a correct computation result, you need to manage the memory allocation and synchronization yourself.
- /// For a memory managed version see `softmax`. - fn softmax_plain(&self, x: &SharedTensor, result: &mut SharedTensor) -> Result<(), ::co::error::Error>; - - /// Computes the gradient of a [Softmax][softmax] over the input Tensor `x` with complete memory management. + /// Computes the gradient of a [Softmax][softmax] over the input Tensor `x`. /// [softmax]: https://en.wikipedia.org/wiki/Softmax_function /// /// Saves the result to `result_diff`. - /// - /// For a no-memory managed version see `softmax_grad_plain`. - fn softmax_grad(&self, x: &mut SharedTensor, x_diff: &mut SharedTensor, result_diff: &mut SharedTensor) -> Result<(), ::co::error::Error>; - - /// Computes the gradient of a softmax over the input Tensor `x` without any memory management. - /// - /// Saves the result to `result_diff`. - /// - /// *Attention*:
- /// For a correct computation result, you need to manage the memory allocation and synchronization yourself.
- /// For a memory managed version see `softmax_grad`. - fn softmax_grad_plain(&self, x: &SharedTensor, x_diff: &SharedTensor, result_diff: &mut SharedTensor) -> Result<(), ::co::error::Error>; + fn softmax_grad(&self, x: &SharedTensor, x_diff: &SharedTensor, + result_diff: &mut SharedTensor) + -> Result<(), ::co::error::Error>; } /// Provides the functionality for a Backend to support LogSoftmax operations. pub trait LogSoftmax : NN { - /// Computes a logarithmic softmax over the input Tensor `x` with complete memory management. + /// Computes a logarithmic softmax over the input Tensor `x`. /// /// Saves the result to `result`. - /// - /// For a no-memory managed version see `log_softmax_plain`. - fn log_softmax(&self, x: &mut SharedTensor, result: &mut SharedTensor) -> Result<(), ::co::error::Error>; - - /// Computes the logarithmic softmax over the input Tensor `x` without any memory management. - /// - /// Saves the result to `result`. - /// - /// *Attention*:
- /// For a correct computation result, you need to manage the memory allocation and synchronization yourself.
- /// For a memory managed version see `log_softmax`. - fn log_softmax_plain(&self, x: &SharedTensor, result: &mut SharedTensor) -> Result<(), ::co::error::Error>; + fn log_softmax(&self, x: &SharedTensor, result: &mut SharedTensor) + -> Result<(), ::co::error::Error>; - /// Computes the gradient of a logarithmic softmax over the input Tensor `x` with complete memory management. + /// Computes the gradient of a logarithmic softmax over the input Tensor `x`. /// /// Saves the result to `result_diff`. - /// - /// For a no-memory managed version see `log_softmax_grad_plain`. - fn log_softmax_grad(&self, x: &mut SharedTensor, x_diff: &mut SharedTensor, result_diff: &mut SharedTensor) -> Result<(), ::co::error::Error>; - - /// Computes the gradient of a logarithmic softmax over the input Tensor `x` without any memory management. - /// - /// Saves the result to `result_diff`. - /// - /// *Attention*:
- /// For a correct computation result, you need to manage the memory allocation and synchronization yourself.
- /// For a memory managed version see `log_softmax_grad`. - fn log_softmax_grad_plain(&self, x: &SharedTensor, x_diff: &SharedTensor, result_diff: &mut SharedTensor) -> Result<(), ::co::error::Error>; + fn log_softmax_grad(&self, x: &SharedTensor, x_diff: &SharedTensor, + result_diff: &mut SharedTensor) + -> Result<(), ::co::error::Error>; } /// Provides the functionality for a Backend to support Local Response Normalization operations. pub trait LRN : NN { - /// Creates a new (Local Response Normalization) LRNConfig, which needs to be passed to further LRN Operations. - fn new_lrn_config(&self, n: u32, alpha: f64, beta: f64, k: f64) -> Result; + /// Creates a new (Local Response Normalization) LRNConfig, which needs to be + /// passed to further LRN Operations. + fn new_lrn_config(&self, n: u32, alpha: f64, beta: f64, k: f64) + -> Result; - /// Computes a [LRN][lrn] over the input Tensor `x` with complete memory management. + /// Computes a [LRN][lrn] over the input Tensor `x`. /// [lrn]: https://en.wikipedia.org/wiki/lrnal_neural_network /// /// Saves the result to `result`. - /// - /// For a no-memory managed version see `lrn_plain`. - fn lrn(&self, x: &mut SharedTensor, result: &mut SharedTensor, config: &Self::CLRN) -> Result<(), ::co::error::Error>; - - /// Computes the LRN over the input Tensor `x` without any memory management. - /// - /// Saves the result to `result`. - /// - /// *Attention*:
- /// For a correct computation result, you need to manage the memory allocation and synchronization yourself.
- /// For a memory managed version see `lrn`. - fn lrn_plain(&self, x: &SharedTensor, result: &mut SharedTensor, config: &Self::CLRN) -> Result<(), ::co::error::Error>; + fn lrn(&self, x: &SharedTensor, result: &mut SharedTensor, + config: &Self::CLRN) -> Result<(), ::co::error::Error>; - /// Computes the gradient of a [LRN][lrn] over the input Tensor `x` with complete memory management. + /// Computes the gradient of a [LRN][lrn] over the input Tensor `x`. /// [lrn]: https://en.wikipedia.org/wiki/lrnal_neural_network /// /// Saves the result to `result_diff`. - /// - /// For a no-memory managed version see `lrn_grad_plain`. - fn lrn_grad(&self, x: &mut SharedTensor, x_diff: &mut SharedTensor, result: &mut SharedTensor, result_diff: &mut SharedTensor, config: &Self::CLRN) -> Result<(), ::co::error::Error>; - - /// Computes the gradient of a LRN over the input Tensor `x` without any memory management. - /// - /// Saves the result to `result_diff`. - /// - /// *Attention*:
- /// For a correct computation result, you need to manage the memory allocation and synchronization yourself.
- /// For a memory managed version see `lrn_grad`. - fn lrn_grad_plain(&self, x: &SharedTensor, x_diff: &SharedTensor, result: &SharedTensor, result_diff: &mut SharedTensor, config: &Self::CLRN) -> Result<(), ::co::error::Error>; + fn lrn_grad(&self, + x: &SharedTensor, x_diff: &SharedTensor, + result: &SharedTensor, result_diff: &mut SharedTensor, + config: &Self::CLRN) + -> Result<(), ::co::error::Error>; } /// Provides the functionality for a Backend to support Pooling operations. pub trait Pooling : NN { /// Creates a new PoolingConfig, which needs to be passed to further pooling Operations. - fn new_pooling_config(&self, window: &[i32], padding: &[i32], stride: &[i32]) -> Result; + fn new_pooling_config(&self, window: &[i32], padding: &[i32], stride: &[i32]) + -> Result; - /// Computes non-linear down-sampling ([max Pooling][pooling]) over the input Tensor `x` with complete memory management. + /// Computes non-linear down-sampling ([max Pooling][pooling]) over the input Tensor `x`. /// [pooling]: https://en.wikipedia.org/wiki/Convolutional_neural_network#Pooling_layer /// /// Saves the result to `result`. - /// - /// For a no-memory managed version see `pooling_max_plain`. - fn pooling_max(&self, x: &mut SharedTensor, result: &mut SharedTensor, config: &Self::CPOOL) -> Result<(), ::co::error::Error>; + fn pooling_max(&self, x: &SharedTensor, result: &mut SharedTensor, + config: &Self::CPOOL) -> Result<(), ::co::error::Error>; - /// Computes the max pooling over the input Tensor `x` without any memory management. - /// - /// Saves the result to `result`. - /// - /// *Attention*:
- /// For a correct computation result, you need to manage the memory allocation and synchronization yourself.
- /// For a memory managed version see `pooling_max`. - fn pooling_max_plain(&self, x: &SharedTensor, result: &mut SharedTensor, config: &Self::CPOOL) -> Result<(), ::co::error::Error>; - - /// Computes the gradient of [max Pooling][pooling] over the input Tensor `x` with complete memory management. + /// Computes the gradient of [max Pooling][pooling] over the input Tensor `x`. /// [pooling]: https://en.wikipedia.org/wiki/Convolutional_neural_network#Pooling_layer /// /// Saves the result to `result_diff`. - /// - /// For a no-memory managed version see `pooling_max_grad_plain`. - fn pooling_max_grad(&self, x: &mut SharedTensor, x_diff: &mut SharedTensor, result: &mut SharedTensor, result_diff: &mut SharedTensor, config: &Self::CPOOL) -> Result<(), ::co::error::Error>; - - /// Computes the gradient of max pooling over the input Tensor `x` without any memory management. - /// - /// Saves the result to `result_diff`. - /// - /// *Attention*:
- /// For a correct computation result, you need to manage the memory allocation and synchronization yourself.
- /// For a memory managed version see `pooling_max_grad`. - fn pooling_max_grad_plain(&self, x: &SharedTensor, x_diff: &SharedTensor, result: &SharedTensor, result_diff: &mut SharedTensor, config: &Self::CPOOL) -> Result<(), ::co::error::Error>; + fn pooling_max_grad(&self, x: &SharedTensor, x_diff: &SharedTensor, + result: &SharedTensor, result_diff: &mut SharedTensor, + config: &Self::CPOOL) -> Result<(), ::co::error::Error>; }