Skip to content

Commit

Permalink
refactor/tests: restructure and convert to new memory API
Browse files Browse the repository at this point in the history
Refactor tests to be generic on backend and element type, use generic
functions to fill input vectors and check outputs. Use macros to instantiate
concrete tests for Native/Cuda and f32/f64. Add randomly generated test vectors
where it makes sense.

Tests are now more readable, about 5-10 times shorter and more reliable due to
better test vectors.
  • Loading branch information
alexandermorozov committed Apr 28, 2016
1 parent 7f84298 commit 0f92627
Show file tree
Hide file tree
Showing 11 changed files with 450 additions and 2,799 deletions.
16 changes: 7 additions & 9 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,22 +45,17 @@
//! // Usually you would not use CUDA but let Collenchyma pick what is available on the machine.
//! let backend = Backend::<Cuda>::default().unwrap();
//! // Initialize two SharedTensors.
//! let mut x = SharedTensor::<f32>::new(backend.device(), &(1, 1, 3)).unwrap();
//! let mut result = SharedTensor::<f32>::new(backend.device(), &(1, 1, 3)).unwrap();
//! let mut x = SharedTensor::<f32>::new(&(1, 1, 3));
//! let mut result = SharedTensor::<f32>::new(&(1, 1, 3));
//! // Fill `x` with some data.
//! let payload: &[f32] = &::std::iter::repeat(1f32).take(x.capacity()).collect::<Vec<f32>>();
//! let native = Native::new();
//! let cpu = native.new_device(native.hardwares()).unwrap();
//! x.add_device(&cpu).unwrap(); // Add native host memory
//! x.sync(&cpu).unwrap(); // Sync to native host memory
//! write_to_memory(x.get_mut(&cpu).unwrap(), payload); // Write to native host memory.
//! x.sync(backend.device()).unwrap(); // Sync the data to the CUDA device.
//! write_to_memory(x.write_only(&cpu).unwrap(), payload); // Write to native host memory.
//! // Run the sigmoid operation, provided by the NN Plugin, on your CUDA enabled GPU.
//! backend.sigmoid(&mut x, &mut result).unwrap();
//! // See the result.
//! result.add_device(&cpu).unwrap(); // Add native host memory
//! result.sync(&cpu).unwrap(); // Sync the result to host memory.
//! println!("{:?}", result.get(&cpu).unwrap().as_native().unwrap().as_slice::<f64>());
//! println!("{:?}", result.read(&cpu).unwrap().as_native().unwrap().as_slice::<f64>());
//! }
//! # }
//! # #[cfg(not(feature = "cuda"))]
Expand Down Expand Up @@ -120,6 +115,9 @@ extern crate lazy_static;
#[macro_use]
extern crate log;

#[cfg(test)]
extern crate rand;

pub use plugin::*;

mod plugin;
Expand Down
206 changes: 206 additions & 0 deletions src/tests/activation.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
// Code for relu, sigmoid and tanh is mostly the same, but ATM I can't get
// how to abstract it better. Generic function wouldn't be shorter... Macros
// would be, but they would have to accept ~15 parameters and that is qute
// evil by itself and they'd add another level of indirection. Not nice.
use std::fmt;

use co::prelude::*;
use co::plugin::numeric_helpers::Float;

use plugin::{Relu, ReluPointwise, Sigmoid, SigmoidPointwise, Tanh, TanhPointwise};
use tests::{Epsilon, filled_tensor, tensor_assert_eq};

//----------------------------------------------------------- relu

const RELU_DIMS: [usize; 3] = [1, 2, 2];
const RELU_IN: [f64; 4] = [-1.0, 1.0, 8.25, -3.3];
const RELU_OUT: [f64; 4] = [ 0.0, 1.0, 8.25, 0.0];
const RELU_OUT_GRAD: [f64; 4] = [ 4.6, 2.5, 3.5, 7.5]; // gradient at output
const RELU_IN_GRAD: [f64; 4] = [ 0.0, 2.5, 3.5, 0.0]; // backpropagated to input


pub fn test_relu<T, F: IFramework>(backend: Backend<F>)
where T: Float + Epsilon + fmt::Debug,
Backend<F>: Relu<T> + IBackend {

let x = filled_tensor(&RELU_DIMS, &RELU_IN);
let mut r = SharedTensor::<T>::new(&RELU_DIMS);

backend.relu(&x, &mut r).unwrap();
tensor_assert_eq(&r, &RELU_OUT, 3.0);
}

pub fn test_relu_grad<T, F: IFramework>(backend: Backend<F>)
where T: Float + Epsilon + fmt::Debug,
Backend<F>: Relu<T> + IBackend {

let x = filled_tensor(&RELU_DIMS, &RELU_OUT);
let dx = filled_tensor(&RELU_DIMS, &RELU_OUT_GRAD);
let r = filled_tensor(&RELU_DIMS, &RELU_IN);
let mut dr = SharedTensor::new(&RELU_DIMS);

backend.relu_grad(&x, &dx, &r, &mut dr).unwrap();
tensor_assert_eq(&dr, &RELU_IN_GRAD, 3.0);
}

pub fn test_relu_pointwise<T, F: IFramework>(backend: Backend<F>)
where T: Float + fmt::Debug + Epsilon,
Backend<F>: ReluPointwise<T> + IBackend {

let mut x = filled_tensor(&RELU_DIMS, &RELU_IN);
backend.relu_pointwise(&mut x).unwrap();
tensor_assert_eq(&x, &RELU_OUT, 3.0);
}

pub fn test_relu_pointwise_grad<T, F: IFramework>(backend: Backend<F>)
where T: Float + fmt::Debug + Epsilon,
Backend<F>: ReluPointwise<T> + IBackend {
let x = filled_tensor(&RELU_DIMS, &RELU_OUT);
let mut dx = filled_tensor(&RELU_DIMS, &RELU_OUT_GRAD);
backend.relu_pointwise_grad(&x, &mut dx).unwrap();
tensor_assert_eq(&dx, &RELU_IN_GRAD, 3.0);
}

//----------------------------------------------------------- sigmoid

const SIGMOID_DIMS: [usize; 2] = [1, 3];
const SIGMOID_IN: [f64; 3] = [-1.8301, 2.0, 8.33];
const SIGMOID_OUT: [f64; 3] = [0.13822636075814926,
0.8807970779778823,
0.9997588861050526];
const SIGMOID_OUT_GRAD: [f64; 3] = [3.1, -4.93, 1.239]; // gradient at output
const SIGMOID_IN_GRAD: [f64; 3] = [0.3692714852440924, // backpropagated to input
-0.5176183760392876,
0.00029866808544693255];

pub fn test_sigmoid<T, F: IFramework>(backend: Backend<F>)
where T: Float + Epsilon + fmt::Debug,
Backend<F>: Sigmoid<T> + IBackend {

let x = filled_tensor(&SIGMOID_DIMS, &SIGMOID_IN);
let mut r = SharedTensor::<T>::new(&SIGMOID_DIMS);

backend.sigmoid(&x, &mut r).unwrap();
tensor_assert_eq(&r, &SIGMOID_OUT, 3.0);
}

pub fn test_sigmoid_grad<T, F: IFramework>(backend: Backend<F>)
where T: Float + Epsilon + fmt::Debug,
Backend<F>: Sigmoid<T> + IBackend {

let x = filled_tensor(&SIGMOID_DIMS, &SIGMOID_OUT);
let dx = filled_tensor(&SIGMOID_DIMS, &SIGMOID_OUT_GRAD);
let r = filled_tensor(&SIGMOID_DIMS, &SIGMOID_IN);
let mut dr = SharedTensor::new(&SIGMOID_DIMS);

backend.sigmoid_grad(&x, &dx, &r, &mut dr).unwrap();
tensor_assert_eq(&dr, &SIGMOID_IN_GRAD, 3.0);
}

pub fn test_sigmoid_pointwise<T, F: IFramework>(backend: Backend<F>)
where T: Float + fmt::Debug + Epsilon,
Backend<F>: SigmoidPointwise<T> + IBackend {

let mut x = filled_tensor(&SIGMOID_DIMS, &SIGMOID_IN);
backend.sigmoid_pointwise(&mut x).unwrap();
tensor_assert_eq(&x, &SIGMOID_OUT, 3.0);
}

pub fn test_sigmoid_pointwise_grad<T, F: IFramework>(backend: Backend<F>)
where T: Float + fmt::Debug + Epsilon,
Backend<F>: SigmoidPointwise<T> + IBackend {
let x = filled_tensor(&SIGMOID_DIMS, &SIGMOID_OUT);
let mut dx = filled_tensor(&SIGMOID_DIMS, &SIGMOID_OUT_GRAD);
backend.sigmoid_pointwise_grad(&x, &mut dx).unwrap();
tensor_assert_eq(&dx, &SIGMOID_IN_GRAD, 3.0);
}

//----------------------------------------------------------- sigmoid

const TANH_DIMS: [usize; 2] = [2, 2];
const TANH_IN: [f64; 4] = [-1.9334, 0.23, 2.998, -0.9];
const TANH_OUT: [f64; 4] = [-0.9590073344404966,
0.22602835227867096,
0.9950349822915199,
-0.7162978701990245];
const TANH_OUT_GRAD: [f64; 4] = [1.7, 8.1, -4.33, -9.33];
const TANH_IN_GRAD: [f64; 4] = [0.13651838523186707,
7.68618059012613,
-0.04289031278977681,
-4.542938979514026];

pub fn test_tanh<T, F: IFramework>(backend: Backend<F>)
where T: Float + Epsilon + fmt::Debug,
Backend<F>: Tanh<T> + IBackend {

let x = filled_tensor(&TANH_DIMS, &TANH_IN);
let mut r = SharedTensor::<T>::new(&TANH_DIMS);

backend.tanh(&x, &mut r).unwrap();
tensor_assert_eq(&r, &TANH_OUT, 3.0);
}

pub fn test_tanh_grad<T, F: IFramework>(backend: Backend<F>)
where T: Float + Epsilon + fmt::Debug,
Backend<F>: Tanh<T> + IBackend {

let x = filled_tensor(&TANH_DIMS, &TANH_OUT);
let dx = filled_tensor(&TANH_DIMS, &TANH_OUT_GRAD);
let r = filled_tensor(&TANH_DIMS, &TANH_IN);
let mut dr = SharedTensor::new(&TANH_DIMS);

backend.tanh_grad(&x, &dx, &r, &mut dr).unwrap();
tensor_assert_eq(&dr, &TANH_IN_GRAD, 10.0);
}

pub fn test_tanh_pointwise<T, F: IFramework>(backend: Backend<F>)
where T: Float + fmt::Debug + Epsilon,
Backend<F>: TanhPointwise<T> + IBackend {

let mut x = filled_tensor(&TANH_DIMS, &TANH_IN);
backend.tanh_pointwise(&mut x).unwrap();
tensor_assert_eq(&x, &TANH_OUT, 3.0);
}

pub fn test_tanh_pointwise_grad<T, F: IFramework>(backend: Backend<F>)
where T: Float + fmt::Debug + Epsilon,
Backend<F>: TanhPointwise<T> + IBackend {
let x = filled_tensor(&TANH_DIMS, &TANH_OUT);
let mut dx = filled_tensor(&TANH_DIMS, &TANH_OUT_GRAD);
backend.tanh_pointwise_grad(&x, &mut dx).unwrap();
tensor_assert_eq(&dx, &TANH_IN_GRAD, 10.0);
}


mod native {
use super::*;
test_native!(test_relu, relu_f32, relu_f64);
test_native!(test_relu_grad, relu_grad_f32, relu_grad_f64);

test_native!(test_sigmoid, sigmoid_f32, sigmoid_f64);
test_native!(test_sigmoid_grad, sigmoid_grad_f32, sigmoid_grad_f64);

test_native!(test_tanh, tanh_f32, tanh_f64);
test_native!(test_tanh_grad, tanh_grad_f32, tanh_grad_f64);
}

mod cuda {
use super::*;
test_cuda!(test_relu, relu_f32, relu_f64);
test_cuda!(test_relu_grad, relu_grad_f32, relu_grad_f64);
test_cuda!(test_relu_pointwise, relu_pointwise_f32, relu_pointwise_f64);
test_cuda!(test_relu_pointwise_grad,
relu_pointwise_grad_f32, relu_pointwise_grad_f64);

test_cuda!(test_sigmoid, sigmoid_f32, sigmoid_f64);
test_cuda!(test_sigmoid_grad, sigmoid_grad_f32, sigmoid_grad_f64);
test_cuda!(test_sigmoid_pointwise, sigmoid_pointwise_f32, sigmoid_pointwise_f64);
test_cuda!(test_sigmoid_pointwise_grad,
sigmoid_pointwise_grad_f32, sigmoid_pointwise_grad_f64);

test_cuda!(test_tanh, tanh_f32, tanh_f64);
test_cuda!(test_tanh_grad, tanh_grad_f32, tanh_grad_f64);
test_cuda!(test_tanh_pointwise, tanh_pointwise_f32, tanh_pointwise_f64);
test_cuda!(test_tanh_pointwise_grad,
tanh_pointwise_grad_f32, tanh_pointwise_grad_f64);
}
129 changes: 129 additions & 0 deletions src/tests/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
use std;
use std::fmt;

use rand::thread_rng;
use rand::distributions::{range, IndependentSample, Range};

use co::prelude::*;
use co::plugin::numeric_helpers::{cast, NumCast};

pub trait Epsilon {
fn epsilon() -> Self;
}

impl Epsilon for f32 {
fn epsilon() -> Self { std::f32::EPSILON }
}

impl Epsilon for f64 {
fn epsilon() -> Self { std::f64::EPSILON }
}


#[cfg(feature = "native")]
fn get_native_backend() -> Backend<Native> {
Backend::<Native>::default().unwrap()
}

#[cfg(feature = "cuda")]
fn get_cuda_backend() -> Backend<Cuda> {
Backend::<Cuda>::default().unwrap()
}

#[cfg(feature = "native")]
pub fn write_to_tensor<T>(xs: &mut SharedTensor<T>, data: &[f64])
where T: ::std::marker::Copy + NumCast {

assert_eq!(xs.desc().size(), data.len());

let native = get_native_backend();
let mem = xs.write_only(native.device()).unwrap().as_mut_native().unwrap();
let mut mem_buffer = mem.as_mut_slice::<T>();
for (i, x) in data.iter().enumerate() {
mem_buffer[i] = cast::<_, T>(*x).unwrap();
}
}

#[cfg(feature = "native")]
pub fn filled_tensor<T>(dims: &[usize], data: &[f64]) -> SharedTensor<T>
where T: ::std::marker::Copy + NumCast {

let mut x = SharedTensor::new(&dims);
write_to_tensor(&mut x, data);
x
}

#[cfg(feature = "native")]
pub fn uniform_random_fill<T>(xs: &mut SharedTensor<T>, low: T, high: T)
where T: Copy + PartialEq + PartialOrd +
fmt::Debug + NumCast + range::SampleRange {

let native = get_native_backend();
let mut mem = xs.write_only(native.device()).unwrap().as_mut_native().unwrap();
let mem_slice = mem.as_mut_slice::<T>();

let mut rng = thread_rng();
let distr = Range::new(low, high);
for x in mem_slice {
*x = distr.ind_sample(&mut rng);
}
}

#[cfg(feature = "native")]
pub fn tensor_assert_eq<T>(xs: &SharedTensor<T>, data: &[f64], epsilon_mul: f64)
where T: Copy + fmt::Debug + PartialEq + NumCast + Epsilon {

let e = cast::<_, f64>(T::epsilon()).unwrap() * epsilon_mul;
let native = get_native_backend();
let mem = xs.read(native.device()).unwrap().as_native().unwrap();
let mem_slice = mem.as_slice::<T>();

assert_eq!(mem_slice.len(), data.len());
for (x1, x2) in mem_slice.iter().zip(data.iter()) {
let x1_t = cast::<_, f64>(*x1).unwrap();
if (x1_t - x2).abs() > e * (x1_t.abs() + x2.abs()) * 0.5 {
println!("Results differ: {:?} != {:?}", mem_slice, data);
assert!(false);
}
}
}

// All operations for Cuda and Native are provided for f32 and f64.
// Those macros remove boilerplate in test definitions.
// concat_idents! is behind feature gate at the moment, otherwise
// invocations could be made much less verbose.
macro_rules! test_cuda {
($test_name:ident, $f32_name:ident, $f64_name:ident) => {
#[cfg(feature = "cuda")]
#[test]
fn $f32_name() {
$test_name::<f32, _>(::tests::get_cuda_backend())
}

#[cfg(feature = "cuda")]
#[test]
fn $f64_name() {
$test_name::<f64, _>(::tests::get_cuda_backend())
}
}
}

macro_rules! test_native {
($test_name:ident, $f32_name:ident, $f64_name:ident) => {
#[cfg(feature = "native")]
#[test]
fn $f32_name() {
$test_name::<f32, _>(::tests::get_native_backend())
}

#[cfg(feature = "native")]
#[test]
fn $f64_name() {
$test_name::<f64, _>(::tests::get_native_backend())
}
}
}

mod activation;
mod softmax;

Loading

0 comments on commit 0f92627

Please sign in to comment.