From 63eeab1f66fd943420664148f0c58f05b7982855 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Wed, 14 Apr 2021 13:45:28 -0500 Subject: [PATCH 1/7] ENH: kernels for random.vonmisses --- dpnp/backend/kernels/dpnp_krnl_random.cpp | 56 +++++++++++++++-------- 1 file changed, 36 insertions(+), 20 deletions(-) diff --git a/dpnp/backend/kernels/dpnp_krnl_random.cpp b/dpnp/backend/kernels/dpnp_krnl_random.cpp index 3228b62c988..b4e05105e33 100644 --- a/dpnp/backend/kernels/dpnp_krnl_random.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_random.cpp @@ -1234,6 +1234,9 @@ void dpnp_rng_uniform_c(void* result, const long low, const long high, const siz #define M_PI 3.141592653589793238462643383279502884197 #endif +template +class dpnp_acceptance_vonmises_large_kappa_c_kernel; + template void dpnp_rng_vonmises_large_kappa_c(void* result, const _DataType mu, const _DataType kappa, const size_t size) { @@ -1307,25 +1310,33 @@ void dpnp_rng_vonmises_large_kappa_c(void* result, const _DataType mu, const _Da dpnp_memory_free_c(Uvec); mkl_rng::uniform<_DataType> uniform_distribution(d_zero, d_one); - auto event_out = mkl_rng::generate(uniform_distribution, DPNP_RNG_ENGINE, size, Vvec); - event_out.wait(); + auto uniform_distr_event = mkl_rng::generate(uniform_distribution, DPNP_RNG_ENGINE, size, Vvec); - // TODO - // kernel - for (size_t i = 0; i < size; i++) - { - _DataType mod, resi; + cl::sycl::range<1> gws(size); + auto kernel_acceptance = [=](cl::sycl::id<1> global_id) { + size_t i = global_id[0]; + double mod, resi; resi = (Vvec[i] < 0.5) ? mu - result1[i] : mu + result1[i]; - mod = fabs(resi); - mod = (fmod(mod + M_PI, 2 * M_PI) - M_PI); + mod = cl::sycl::fabs(resi); + mod = (cl::sycl::fmod(mod + M_PI, 2 * M_PI) - M_PI); result1[i] = (resi < 0) ? -mod : mod; - } + }; + + auto paral_kernel_acceptance = [&](cl::sycl::handler& cgh) { + cgh.depends_on({uniform_distr_event}); + cgh.parallel_for>(gws, kernel_acceptance); + }; + auto acceptance_event = DPNP_QUEUE.submit(paral_kernel_acceptance); + acceptance_event.wait(); dpnp_memory_free_c(Vvec); return; } +template +class dpnp_acceptance_vonmises_small_kappa_c_kernel; + template void dpnp_rng_vonmises_small_kappa_c(void* result, const _DataType mu, const _DataType kappa, const size_t size) { @@ -1384,20 +1395,25 @@ void dpnp_rng_vonmises_small_kappa_c(void* result, const _DataType mu, const _Da dpnp_memory_free_c(Uvec); mkl_rng::uniform<_DataType> uniform_distribution(d_zero, d_one); - auto event_out = mkl_rng::generate(uniform_distribution, DPNP_RNG_ENGINE, size, Vvec); - event_out.wait(); + auto uniform_distr_event = mkl_rng::generate(uniform_distribution, DPNP_RNG_ENGINE, size, Vvec); - // TODO - // kernel - for (size_t i = 0; i < size; i++) - { - double mod, resi; + cl::sycl::range<1> gws(size); + auto kernel_acceptance = [=](cl::sycl::id<1> global_id) { + size_t i = global_id[0]; + double mod, resi; resi = (Vvec[i] < 0.5) ? mu - result1[i] : mu + result1[i]; - mod = fabs(resi); - mod = (fmod(mod + M_PI, 2 * M_PI) - M_PI); + mod = cl::sycl::fabs(resi); + mod = (cl::sycl::fmod(mod + M_PI, 2 * M_PI) - M_PI); result1[i] = (resi < 0) ? -mod : mod; - } + }; + + auto paral_kernel_acceptance = [&](cl::sycl::handler& cgh) { + cgh.depends_on({uniform_distr_event}); + cgh.parallel_for>(gws, kernel_acceptance); + }; + auto acceptance_event = DPNP_QUEUE.submit(paral_kernel_acceptance); + acceptance_event.wait(); dpnp_memory_free_c(Vvec); return; From 5e6086cfba9ebaeb032197d0f04153db6d6abb63 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Wed, 14 Apr 2021 15:18:49 -0500 Subject: [PATCH 2/7] update --- dpnp/backend/kernels/dpnp_krnl_random.cpp | 164 +++++++++++----------- 1 file changed, 79 insertions(+), 85 deletions(-) diff --git a/dpnp/backend/kernels/dpnp_krnl_random.cpp b/dpnp/backend/kernels/dpnp_krnl_random.cpp index b4e05105e33..35f0ef60375 100644 --- a/dpnp/backend/kernels/dpnp_krnl_random.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_random.cpp @@ -1234,9 +1234,6 @@ void dpnp_rng_uniform_c(void* result, const long low, const long high, const siz #define M_PI 3.141592653589793238462643383279502884197 #endif -template -class dpnp_acceptance_vonmises_large_kappa_c_kernel; - template void dpnp_rng_vonmises_large_kappa_c(void* result, const _DataType mu, const _DataType kappa, const size_t size) { @@ -1265,67 +1262,67 @@ void dpnp_rng_vonmises_large_kappa_c(void* result, const _DataType mu, const _Da Uvec = reinterpret_cast<_DataType*>(dpnp_memory_alloc_c(size * sizeof(_DataType))); Vvec = reinterpret_cast<_DataType*>(dpnp_memory_alloc_c(size * sizeof(_DataType))); - - for (size_t n = 0; n < size;) + size_t* n = reinterpret_cast(dpnp_memory_alloc_c(sizeof(size_t))); + for (n[0] = 0; n[0] < size;) { - size_t diff_size = size - n; + size_t diff_size = size - n[0]; mkl_rng::uniform<_DataType> uniform_distribution_u(d_zero, 0.5 * M_PI); - auto event_out = mkl_rng::generate(uniform_distribution_u, DPNP_RNG_ENGINE, diff_size, Uvec); - event_out.wait(); - // TODO - // use deps case + auto uniform_distr_u_event = mkl_rng::generate(uniform_distribution_u, DPNP_RNG_ENGINE, diff_size, Uvec); mkl_rng::uniform<_DataType> uniform_distribution_v(d_zero, d_one); - event_out = mkl_rng::generate(uniform_distribution_v, DPNP_RNG_ENGINE, diff_size, Vvec); - event_out.wait(); + auto uniform_distr_v_event = mkl_rng::generate(uniform_distribution_v, DPNP_RNG_ENGINE, diff_size, Vvec); - // TODO - // kernel - for (size_t i = 0; i < diff_size; i++) - { - _DataType sn, cn, sn2, cn2; - _DataType neg_W_minus_one, V, Y; + cl::sycl::range<1> diff_gws(diff_size); - sn = sin(Uvec[i]); - cn = cos(Uvec[i]); - V = Vvec[i]; - sn2 = sn * sn; - cn2 = cn * cn; + auto paral_kernel_some = [&](cl::sycl::handler& cgh) { + cgh.depends_on({uniform_distr_u_event, uniform_distr_v_event}); + cgh.parallel_for(diff_gws, [=](cl::sycl::id<1> global_id) { + size_t i = global_id[0]; - neg_W_minus_one = s_minus_one * sn2 / (0.5 * s_minus_one + cn2); - Y = kappa * (s_minus_one + neg_W_minus_one); + _DataType sn, cn, sn2, cn2; + _DataType neg_W_minus_one, V, Y; - if ((Y * (2 - Y) >= V) || (log(Y / V) + 1 >= Y)) - { - Y = neg_W_minus_one * (2 - neg_W_minus_one); - if (Y < 0) - Y = 0.0; - else if (Y > 1.0) - Y = 1.0; + sn = cl::sycl::sin(Uvec[i]); + cn = cl::sycl::cos(Uvec[i]); + V = Vvec[i]; + sn2 = sn * sn; + cn2 = cn * cn; - result1[n++] = asin(sqrt(Y)); - } - } - } + neg_W_minus_one = s_minus_one * sn2 / (0.5 * s_minus_one + cn2); + Y = kappa * (s_minus_one + neg_W_minus_one); + if ((Y * (2 - Y) >= V) || (cl::sycl::log(Y / V) + 1 >= Y)) + { + Y = neg_W_minus_one * (2 - neg_W_minus_one); + if (Y < 0) + Y = 0.0; + else if (Y > 1.0) + Y = 1.0; + n[0] = n[0] + 1; + result1[n[0]] = cl::sycl::asin(cl::sycl::sqrt(Y)); + } + }); + }; + auto some_event = DPNP_QUEUE.submit(paral_kernel_some); + some_event.wait(); + } dpnp_memory_free_c(Uvec); + dpnp_memory_free_c(n); mkl_rng::uniform<_DataType> uniform_distribution(d_zero, d_one); auto uniform_distr_event = mkl_rng::generate(uniform_distribution, DPNP_RNG_ENGINE, size, Vvec); cl::sycl::range<1> gws(size); - auto kernel_acceptance = [=](cl::sycl::id<1> global_id) { - size_t i = global_id[0]; - double mod, resi; - resi = (Vvec[i] < 0.5) ? mu - result1[i] : mu + result1[i]; - mod = cl::sycl::fabs(resi); - mod = (cl::sycl::fmod(mod + M_PI, 2 * M_PI) - M_PI); - result1[i] = (resi < 0) ? -mod : mod; - }; - auto paral_kernel_acceptance = [&](cl::sycl::handler& cgh) { cgh.depends_on({uniform_distr_event}); - cgh.parallel_for>(gws, kernel_acceptance); + cgh.parallel_for(gws, [=](cl::sycl::id<1> global_id) { + size_t i = global_id[0]; + double mod, resi; + resi = (Vvec[i] < 0.5) ? mu - result1[i] : mu + result1[i]; + mod = cl::sycl::fabs(resi); + mod = (cl::sycl::fmod(mod + M_PI, 2 * M_PI) - M_PI); + result1[i] = (resi < 0) ? -mod : mod; + }); }; auto acceptance_event = DPNP_QUEUE.submit(paral_kernel_acceptance); acceptance_event.wait(); @@ -1334,13 +1331,10 @@ void dpnp_rng_vonmises_large_kappa_c(void* result, const _DataType mu, const _Da return; } -template -class dpnp_acceptance_vonmises_small_kappa_c_kernel; - template void dpnp_rng_vonmises_small_kappa_c(void* result, const _DataType mu, const _DataType kappa, const size_t size) { - if (!size) + if (!size || !result) { return; } @@ -1363,54 +1357,55 @@ void dpnp_rng_vonmises_small_kappa_c(void* result, const _DataType mu, const _Da Uvec = reinterpret_cast<_DataType*>(dpnp_memory_alloc_c(size * sizeof(_DataType))); Vvec = reinterpret_cast<_DataType*>(dpnp_memory_alloc_c(size * sizeof(_DataType))); + size_t* n = reinterpret_cast(dpnp_memory_alloc_c(sizeof(size_t))); - for (size_t n = 0; n < size;) + for (n[0] = 0; n[0] < size;) { - size_t diff_size = size - n; + size_t diff_size = size - n[0]; mkl_rng::uniform<_DataType> uniform_distribution_u(d_zero, M_PI); - auto event_out = mkl_rng::generate(uniform_distribution_u, DPNP_RNG_ENGINE, diff_size, Uvec); - event_out.wait(); - // TODO - // use deps case + auto uniform_distr_u_event = mkl_rng::generate(uniform_distribution_u, DPNP_RNG_ENGINE, diff_size, Uvec); mkl_rng::uniform<_DataType> uniform_distribution_v(d_zero, d_one); - event_out = mkl_rng::generate(uniform_distribution_v, DPNP_RNG_ENGINE, diff_size, Vvec); - event_out.wait(); + auto uniform_distr_v_event = mkl_rng::generate(uniform_distribution_v, DPNP_RNG_ENGINE, diff_size, Vvec); - // TODO - // kernel - for (size_t i = 0; i < diff_size; i++) - { - _DataType Z, W, Y, V; - Z = cos(Uvec[i]); - V = Vvec[i]; - W = (kappa + s_kappa * Z) / (s_kappa + kappa * Z); - Y = s_kappa - kappa * W; - if ((Y * (2 - Y) >= V) || (log(Y / V) + 1 >= Y)) - { - result1[n++] = acos(W); - } - } - } + cl::sycl::range<1> diff_gws(diff_size); + + auto paral_kernel_some = [&](cl::sycl::handler& cgh) { + cgh.depends_on({uniform_distr_u_event, uniform_distr_v_event}); + cgh.parallel_for(diff_gws, [=](cl::sycl::id<1> global_id) { + size_t i = global_id[0]; + _DataType Z, W, Y, V; + Z = cl::sycl::cos(Uvec[i]); + V = Vvec[i]; + W = (kappa + s_kappa * Z) / (s_kappa + kappa * Z); + Y = s_kappa - kappa * W; + if ((Y * (2 - Y) >= V) || (cl::sycl::log(Y / V) + 1 >= Y)) + { + n[0] = n[0] + 1; + result1[n[0]] = cl::sycl::acos(W); + } + }); + }; + auto some_event = DPNP_QUEUE.submit(paral_kernel_some); + some_event.wait(); + } dpnp_memory_free_c(Uvec); + dpnp_memory_free_c(n); mkl_rng::uniform<_DataType> uniform_distribution(d_zero, d_one); auto uniform_distr_event = mkl_rng::generate(uniform_distribution, DPNP_RNG_ENGINE, size, Vvec); cl::sycl::range<1> gws(size); - - auto kernel_acceptance = [=](cl::sycl::id<1> global_id) { - size_t i = global_id[0]; - double mod, resi; - resi = (Vvec[i] < 0.5) ? mu - result1[i] : mu + result1[i]; - mod = cl::sycl::fabs(resi); - mod = (cl::sycl::fmod(mod + M_PI, 2 * M_PI) - M_PI); - result1[i] = (resi < 0) ? -mod : mod; - }; - auto paral_kernel_acceptance = [&](cl::sycl::handler& cgh) { cgh.depends_on({uniform_distr_event}); - cgh.parallel_for>(gws, kernel_acceptance); + cgh.parallel_for(gws, [=](cl::sycl::id<1> global_id) { + size_t i = global_id[0]; + double mod, resi; + resi = (Vvec[i] < 0.5) ? mu - result1[i] : mu + result1[i]; + mod = cl::sycl::fabs(resi); + mod = (cl::sycl::fmod(mod + M_PI, 2 * M_PI) - M_PI); + result1[i] = (resi < 0) ? -mod : mod; + }); }; auto acceptance_event = DPNP_QUEUE.submit(paral_kernel_acceptance); acceptance_event.wait(); @@ -1432,7 +1427,6 @@ void dpnp_rng_vonmises_c(void* result, const _DataType mu, const _DataType kappa dpnp_rng_vonmises_large_kappa_c<_DataType>(result, mu, kappa, size); else dpnp_rng_vonmises_small_kappa_c<_DataType>(result, mu, kappa, size); - // TODO case when kappa < kappa < 1e-8 (very small) } template From 4268517153c4c74ccad3f4648795dfd9838379ad Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Wed, 14 Apr 2021 15:24:23 -0500 Subject: [PATCH 3/7] refactoring --- dpnp/backend/kernels/dpnp_krnl_random.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/dpnp/backend/kernels/dpnp_krnl_random.cpp b/dpnp/backend/kernels/dpnp_krnl_random.cpp index 35f0ef60375..fb8d1b1e668 100644 --- a/dpnp/backend/kernels/dpnp_krnl_random.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_random.cpp @@ -1237,7 +1237,7 @@ void dpnp_rng_uniform_c(void* result, const long low, const long high, const siz template void dpnp_rng_vonmises_large_kappa_c(void* result, const _DataType mu, const _DataType kappa, const size_t size) { - if (!size) + if (!size || !result) { return; } @@ -1247,6 +1247,7 @@ void dpnp_rng_vonmises_large_kappa_c(void* result, const _DataType mu, const _Da _DataType s_minus_one, hpt, r_over_two_kappa_minus_one, rho_minus_one; _DataType* Uvec = nullptr; _DataType* Vvec = nullptr; + size_t* n = nullptr; const _DataType d_zero = 0.0, d_one = 1.0; assert(kappa > 1.0); @@ -1262,7 +1263,7 @@ void dpnp_rng_vonmises_large_kappa_c(void* result, const _DataType mu, const _Da Uvec = reinterpret_cast<_DataType*>(dpnp_memory_alloc_c(size * sizeof(_DataType))); Vvec = reinterpret_cast<_DataType*>(dpnp_memory_alloc_c(size * sizeof(_DataType))); - size_t* n = reinterpret_cast(dpnp_memory_alloc_c(sizeof(size_t))); + n = reinterpret_cast(dpnp_memory_alloc_c(sizeof(size_t))); for (n[0] = 0; n[0] < size;) { size_t diff_size = size - n[0]; @@ -1343,6 +1344,7 @@ void dpnp_rng_vonmises_small_kappa_c(void* result, const _DataType mu, const _Da _DataType rho_over_kappa, rho, r, s_kappa; _DataType* Uvec = nullptr; _DataType* Vvec = nullptr; + size_t* n = nullptr; const _DataType d_zero = 0.0, d_one = 1.0; @@ -1357,7 +1359,7 @@ void dpnp_rng_vonmises_small_kappa_c(void* result, const _DataType mu, const _Da Uvec = reinterpret_cast<_DataType*>(dpnp_memory_alloc_c(size * sizeof(_DataType))); Vvec = reinterpret_cast<_DataType*>(dpnp_memory_alloc_c(size * sizeof(_DataType))); - size_t* n = reinterpret_cast(dpnp_memory_alloc_c(sizeof(size_t))); + n = reinterpret_cast(dpnp_memory_alloc_c(sizeof(size_t))); for (n[0] = 0; n[0] < size;) { From e9c17c75d96cc79dce89f0ac6a4903cc04d0b17f Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Fri, 23 Apr 2021 07:33:00 -0500 Subject: [PATCH 4/7] disabled tests on CPU --- tests/skipped_tests.tbl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl index 72c4deff396..7d443e5dd6d 100644 --- a/tests/skipped_tests.tbl +++ b/tests/skipped_tests.tbl @@ -140,6 +140,8 @@ tests/test_linalg.py::test_qr[(5,3)-int32] tests/test_linalg.py::test_qr[(5,3)-int64] tests/test_linalg.py::test_svd[(3,4)-complex128] tests/test_linalg.py::test_svd[(5,3)-complex128] +tests/test_random.py::TestDistributionsVonmises::test_seed[large_kappa] +tests/test_random.py::TestDistributionsVonmises::test_seed[small_kappa] tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[ lambda x: x] tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[ lambda x: dpnp.asarray(x).astype(dpnp.int8)] tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[ lambda x: dpnp.asarray(x).astype(dpnp.complex64)] From df3160f49748c0a85042dbb0cfbfcaa9901be721 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Wed, 26 May 2021 06:17:52 -0500 Subject: [PATCH 5/7] tmp solution --- dpnp/backend/kernels/dpnp_krnl_random.cpp | 110 ++++++++++++++++------ tests/skipped_tests.tbl | 2 - 2 files changed, 83 insertions(+), 29 deletions(-) diff --git a/dpnp/backend/kernels/dpnp_krnl_random.cpp b/dpnp/backend/kernels/dpnp_krnl_random.cpp index 6d4bcbd3703..1eec8244920 100644 --- a/dpnp/backend/kernels/dpnp_krnl_random.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_random.cpp @@ -1275,21 +1275,18 @@ void dpnp_rng_vonmises_large_kappa_c(void* result, const _DataType mu, const _Da Uvec = reinterpret_cast<_DataType*>(dpnp_memory_alloc_c(size * sizeof(_DataType))); Vvec = reinterpret_cast<_DataType*>(dpnp_memory_alloc_c(size * sizeof(_DataType))); n = reinterpret_cast(dpnp_memory_alloc_c(sizeof(size_t))); - for (n[0] = 0; n[0] < size;) + for (*n = 0; *n < size;) { - size_t diff_size = size - n[0]; + size_t diff_size = size - *n; mkl_rng::uniform<_DataType> uniform_distribution_u(d_zero, 0.5 * M_PI); auto uniform_distr_u_event = mkl_rng::generate(uniform_distribution_u, DPNP_RNG_ENGINE, diff_size, Uvec); mkl_rng::uniform<_DataType> uniform_distribution_v(d_zero, d_one); auto uniform_distr_v_event = mkl_rng::generate(uniform_distribution_v, DPNP_RNG_ENGINE, diff_size, Vvec); - cl::sycl::range<1> diff_gws(diff_size); - - auto paral_kernel_some = [&](cl::sycl::handler& cgh) { - cgh.depends_on({uniform_distr_u_event, uniform_distr_v_event}); - cgh.parallel_for(diff_gws, [=](cl::sycl::id<1> global_id) { - size_t i = global_id[0]; - + if (dpnp_queue_is_cpu_c()) + { + for (size_t i = 0; i < diff_size; i++) + { _DataType sn, cn, sn2, cn2; _DataType neg_W_minus_one, V, Y; @@ -1309,13 +1306,49 @@ void dpnp_rng_vonmises_large_kappa_c(void* result, const _DataType mu, const _Da Y = 0.0; else if (Y > 1.0) Y = 1.0; - n[0] = n[0] + 1; - result1[n[0]] = cl::sycl::asin(cl::sycl::sqrt(Y)); + + *n = *n + 1; + result1[*n] = cl::sycl::asin(sqrt(Y)); } - }); - }; - auto some_event = DPNP_QUEUE.submit(paral_kernel_some); - some_event.wait(); + } + } + else + { + // TODO + // Failed tests for checking the same seed on CPU + cl::sycl::range<1> diff_gws(diff_size); + auto paral_kernel_some = [&](cl::sycl::handler& cgh) { + cgh.depends_on({uniform_distr_u_event, uniform_distr_v_event}); + cgh.parallel_for(diff_gws, [=](cl::sycl::id<1> global_id) { + size_t i = global_id[0]; + + _DataType sn, cn, sn2, cn2; + _DataType neg_W_minus_one, V, Y; + + sn = cl::sycl::sin(Uvec[i]); + cn = cl::sycl::cos(Uvec[i]); + V = Vvec[i]; + sn2 = sn * sn; + cn2 = cn * cn; + + neg_W_minus_one = s_minus_one * sn2 / (0.5 * s_minus_one + cn2); + Y = kappa * (s_minus_one + neg_W_minus_one); + + if ((Y * (2 - Y) >= V) || (cl::sycl::log(Y / V) + 1 >= Y)) + { + Y = neg_W_minus_one * (2 - neg_W_minus_one); + if (Y < 0) + Y = 0.0; + else if (Y > 1.0) + Y = 1.0; + *n = *n + 1; + result1[*n] = cl::sycl::asin(cl::sycl::sqrt(Y)); + } + }); + }; + auto some_event = DPNP_QUEUE.submit(paral_kernel_some); + some_event.wait(); + } } dpnp_memory_free_c(Uvec); dpnp_memory_free_c(n); @@ -1372,21 +1405,21 @@ void dpnp_rng_vonmises_small_kappa_c(void* result, const _DataType mu, const _Da Vvec = reinterpret_cast<_DataType*>(dpnp_memory_alloc_c(size * sizeof(_DataType))); n = reinterpret_cast(dpnp_memory_alloc_c(sizeof(size_t))); - for (n[0] = 0; n[0] < size;) + for (*n = 0; *n < size;) { - size_t diff_size = size - n[0]; + size_t diff_size = size - *n; mkl_rng::uniform<_DataType> uniform_distribution_u(d_zero, M_PI); auto uniform_distr_u_event = mkl_rng::generate(uniform_distribution_u, DPNP_RNG_ENGINE, diff_size, Uvec); mkl_rng::uniform<_DataType> uniform_distribution_v(d_zero, d_one); auto uniform_distr_v_event = mkl_rng::generate(uniform_distribution_v, DPNP_RNG_ENGINE, diff_size, Vvec); - cl::sycl::range<1> diff_gws(diff_size); - - auto paral_kernel_some = [&](cl::sycl::handler& cgh) { - cgh.depends_on({uniform_distr_u_event, uniform_distr_v_event}); - cgh.parallel_for(diff_gws, [=](cl::sycl::id<1> global_id) { - size_t i = global_id[0]; + if (dpnp_queue_is_cpu_c()) + { + uniform_distr_u_event.wait(); + uniform_distr_v_event.wait(); + for (size_t i = 0; i < diff_size; i++) + { _DataType Z, W, Y, V; Z = cl::sycl::cos(Uvec[i]); V = Vvec[i]; @@ -1394,13 +1427,36 @@ void dpnp_rng_vonmises_small_kappa_c(void* result, const _DataType mu, const _Da Y = s_kappa - kappa * W; if ((Y * (2 - Y) >= V) || (cl::sycl::log(Y / V) + 1 >= Y)) { - n[0] = n[0] + 1; - result1[n[0]] = cl::sycl::acos(W); + *n = *n + 1; + result1[*n] = cl::sycl::acos(W); } - }); - }; + } + } + else + { + cl::sycl::range<1> diff_gws((diff_size)); + + // TODO + // Failed tests for checking the same seed on CPU + auto paral_kernel_some = [&](cl::sycl::handler& cgh) { + cgh.depends_on({uniform_distr_u_event, uniform_distr_v_event}); + cgh.parallel_for(diff_gws, [=](cl::sycl::id<1> global_id) { + size_t i = global_id[0]; + _DataType Z, W, Y, V; + Z = cl::sycl::cos(Uvec[i]); + V = Vvec[i]; + W = (kappa + s_kappa * Z) / (s_kappa + kappa * Z); + Y = s_kappa - kappa * W; + if ((Y * (2 - Y) >= V) || (cl::sycl::log(Y / V) + 1 >= Y)) + { + *n = *n + 1; + result1[*n] = cl::sycl::acos(W); + } + }); + }; auto some_event = DPNP_QUEUE.submit(paral_kernel_some); some_event.wait(); + } } dpnp_memory_free_c(Uvec); dpnp_memory_free_c(n); diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl index 73f8f1011c7..049f5c85431 100644 --- a/tests/skipped_tests.tbl +++ b/tests/skipped_tests.tbl @@ -14,8 +14,6 @@ tests/test_linalg.py::test_qr[(5,3)-float32] tests/test_linalg.py::test_qr[(5,3)-float64] tests/test_linalg.py::test_qr[(5,3)-int32] tests/test_linalg.py::test_qr[(5,3)-int64] -tests/test_random.py::TestDistributionsVonmises::test_seed[large_kappa] -tests/test_random.py::TestDistributionsVonmises::test_seed[small_kappa] tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[ lambda x: x] tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[ lambda x: dpnp.asarray(x).astype(dpnp.int8)] tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[ lambda x: dpnp.asarray(x).astype(dpnp.complex64)] From 0e743d28e41ed4205a26f9bcf9ff82f995774b49 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Wed, 26 May 2021 18:17:24 -0500 Subject: [PATCH 6/7] revert last changes on dpnp_krnl_random.cpp --- dpnp/backend/kernels/dpnp_krnl_random.cpp | 94 +++++------------------ tests/test_random.py | 2 +- 2 files changed, 19 insertions(+), 77 deletions(-) diff --git a/dpnp/backend/kernels/dpnp_krnl_random.cpp b/dpnp/backend/kernels/dpnp_krnl_random.cpp index 1eec8244920..afa04c3ad14 100644 --- a/dpnp/backend/kernels/dpnp_krnl_random.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_random.cpp @@ -1283,10 +1283,12 @@ void dpnp_rng_vonmises_large_kappa_c(void* result, const _DataType mu, const _Da mkl_rng::uniform<_DataType> uniform_distribution_v(d_zero, d_one); auto uniform_distr_v_event = mkl_rng::generate(uniform_distribution_v, DPNP_RNG_ENGINE, diff_size, Vvec); - if (dpnp_queue_is_cpu_c()) - { - for (size_t i = 0; i < diff_size; i++) - { + cl::sycl::range<1> diff_gws(diff_size); + auto paral_kernel_some = [&](cl::sycl::handler& cgh) { + cgh.depends_on({uniform_distr_u_event, uniform_distr_v_event}); + cgh.parallel_for(diff_gws, [=](cl::sycl::id<1> global_id) { + size_t i = global_id[0]; + _DataType sn, cn, sn2, cn2; _DataType neg_W_minus_one, V, Y; @@ -1306,49 +1308,13 @@ void dpnp_rng_vonmises_large_kappa_c(void* result, const _DataType mu, const _Da Y = 0.0; else if (Y > 1.0) Y = 1.0; - *n = *n + 1; - result1[*n] = cl::sycl::asin(sqrt(Y)); + result1[*n] = cl::sycl::asin(cl::sycl::sqrt(Y)); } - } - } - else - { - // TODO - // Failed tests for checking the same seed on CPU - cl::sycl::range<1> diff_gws(diff_size); - auto paral_kernel_some = [&](cl::sycl::handler& cgh) { - cgh.depends_on({uniform_distr_u_event, uniform_distr_v_event}); - cgh.parallel_for(diff_gws, [=](cl::sycl::id<1> global_id) { - size_t i = global_id[0]; - - _DataType sn, cn, sn2, cn2; - _DataType neg_W_minus_one, V, Y; - - sn = cl::sycl::sin(Uvec[i]); - cn = cl::sycl::cos(Uvec[i]); - V = Vvec[i]; - sn2 = sn * sn; - cn2 = cn * cn; - - neg_W_minus_one = s_minus_one * sn2 / (0.5 * s_minus_one + cn2); - Y = kappa * (s_minus_one + neg_W_minus_one); - - if ((Y * (2 - Y) >= V) || (cl::sycl::log(Y / V) + 1 >= Y)) - { - Y = neg_W_minus_one * (2 - neg_W_minus_one); - if (Y < 0) - Y = 0.0; - else if (Y > 1.0) - Y = 1.0; - *n = *n + 1; - result1[*n] = cl::sycl::asin(cl::sycl::sqrt(Y)); - } - }); - }; - auto some_event = DPNP_QUEUE.submit(paral_kernel_some); - some_event.wait(); - } + }); + }; + auto some_event = DPNP_QUEUE.submit(paral_kernel_some); + some_event.wait(); } dpnp_memory_free_c(Uvec); dpnp_memory_free_c(n); @@ -1413,13 +1379,12 @@ void dpnp_rng_vonmises_small_kappa_c(void* result, const _DataType mu, const _Da mkl_rng::uniform<_DataType> uniform_distribution_v(d_zero, d_one); auto uniform_distr_v_event = mkl_rng::generate(uniform_distribution_v, DPNP_RNG_ENGINE, diff_size, Vvec); - if (dpnp_queue_is_cpu_c()) - { - uniform_distr_u_event.wait(); - uniform_distr_v_event.wait(); + cl::sycl::range<1> diff_gws((diff_size)); - for (size_t i = 0; i < diff_size; i++) - { + auto paral_kernel_some = [&](cl::sycl::handler& cgh) { + cgh.depends_on({uniform_distr_u_event, uniform_distr_v_event}); + cgh.parallel_for(diff_gws, [=](cl::sycl::id<1> global_id) { + size_t i = global_id[0]; _DataType Z, W, Y, V; Z = cl::sycl::cos(Uvec[i]); V = Vvec[i]; @@ -1430,33 +1395,10 @@ void dpnp_rng_vonmises_small_kappa_c(void* result, const _DataType mu, const _Da *n = *n + 1; result1[*n] = cl::sycl::acos(W); } - } - } - else - { - cl::sycl::range<1> diff_gws((diff_size)); - - // TODO - // Failed tests for checking the same seed on CPU - auto paral_kernel_some = [&](cl::sycl::handler& cgh) { - cgh.depends_on({uniform_distr_u_event, uniform_distr_v_event}); - cgh.parallel_for(diff_gws, [=](cl::sycl::id<1> global_id) { - size_t i = global_id[0]; - _DataType Z, W, Y, V; - Z = cl::sycl::cos(Uvec[i]); - V = Vvec[i]; - W = (kappa + s_kappa * Z) / (s_kappa + kappa * Z); - Y = s_kappa - kappa * W; - if ((Y * (2 - Y) >= V) || (cl::sycl::log(Y / V) + 1 >= Y)) - { - *n = *n + 1; - result1[*n] = cl::sycl::acos(W); - } - }); - }; + }); + }; auto some_event = DPNP_QUEUE.submit(paral_kernel_some); some_event.wait(); - } } dpnp_memory_free_c(Uvec); dpnp_memory_free_c(n); diff --git a/tests/test_random.py b/tests/test_random.py index 647ee9a8f76..aafeb25168d 100644 --- a/tests/test_random.py +++ b/tests/test_random.py @@ -874,7 +874,7 @@ def test_invalid_args(self): @pytest.mark.parametrize("kappa", [5.0, 0.5], ids=['large_kappa', 'small_kappa']) def test_seed(self, kappa): seed = 28041990 - size = 10 + size = 1000 mu = 2. dpnp.random.seed(seed) a1 = numpy.asarray(dpnp.random.vonmises(mu, kappa, size=size)) From b2b3c42f23b31ba06b6eb6d6cce932c92e0d5042 Mon Sep 17 00:00:00 2001 From: Lukicheva Polina <63358667+LukichevaPolina@users.noreply.github.com> Date: Thu, 7 Oct 2021 16:07:16 +0300 Subject: [PATCH 7/7] Plukiche/vonmisses random (#998) * Fix race condition in dpnp_rng_vonmises_small_kappa_c and dpnp_rng_vonmises_large_kappa_c * Rename arrays and change if condition from kernels in dpnp_rng_vonmises_large_kappa_c and dpnp_rng_vonmises_small_kappa_c * Add space * Fix indices in dpnp_rng_vonmises_small_kappa_c and dpnp_rng_vonmises_large_kappa_c --- dpnp/backend/kernels/dpnp_krnl_random.cpp | 111 +++++++++++++--------- 1 file changed, 65 insertions(+), 46 deletions(-) diff --git a/dpnp/backend/kernels/dpnp_krnl_random.cpp b/dpnp/backend/kernels/dpnp_krnl_random.cpp index 0d8290ec50b..a205e98a6b9 100644 --- a/dpnp/backend/kernels/dpnp_krnl_random.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_random.cpp @@ -1261,7 +1261,8 @@ void dpnp_rng_vonmises_large_kappa_c(void* result, const _DataType mu, const _Da _DataType s_minus_one, hpt, r_over_two_kappa_minus_one, rho_minus_one; _DataType* Uvec = nullptr; _DataType* Vvec = nullptr; - size_t* n = nullptr; + bool* result_ready = nullptr; + bool* result_mask = nullptr; const _DataType d_zero = 0.0, d_one = 1.0; assert(kappa > 1.0); @@ -1277,50 +1278,59 @@ void dpnp_rng_vonmises_large_kappa_c(void* result, const _DataType mu, const _Da Uvec = reinterpret_cast<_DataType*>(dpnp_memory_alloc_c(size * sizeof(_DataType))); Vvec = reinterpret_cast<_DataType*>(dpnp_memory_alloc_c(size * sizeof(_DataType))); - n = reinterpret_cast(dpnp_memory_alloc_c(sizeof(size_t))); - for (*n = 0; *n < size;) + + result_ready = reinterpret_cast(dpnp_memory_alloc_c(1 * sizeof(bool))); + result_ready[0] = false; + result_mask = reinterpret_cast(dpnp_memory_alloc_c(size * sizeof(bool))); + dpnp_full_c(result_ready, result_mask, size); + + while(!result_ready[0]) { - size_t diff_size = size - *n; mkl_rng::uniform<_DataType> uniform_distribution_u(d_zero, 0.5 * M_PI); - auto uniform_distr_u_event = mkl_rng::generate(uniform_distribution_u, DPNP_RNG_ENGINE, diff_size, Uvec); + auto uniform_distr_u_event = mkl_rng::generate(uniform_distribution_u, DPNP_RNG_ENGINE, size, Uvec); mkl_rng::uniform<_DataType> uniform_distribution_v(d_zero, d_one); - auto uniform_distr_v_event = mkl_rng::generate(uniform_distribution_v, DPNP_RNG_ENGINE, diff_size, Vvec); + auto uniform_distr_v_event = mkl_rng::generate(uniform_distribution_v, DPNP_RNG_ENGINE, size, Vvec); - cl::sycl::range<1> diff_gws(diff_size); + cl::sycl::range<1> gws(size); auto paral_kernel_some = [&](cl::sycl::handler& cgh) { cgh.depends_on({uniform_distr_u_event, uniform_distr_v_event}); - cgh.parallel_for(diff_gws, [=](cl::sycl::id<1> global_id) { + cgh.parallel_for(gws, [=](cl::sycl::id<1> global_id) { size_t i = global_id[0]; + if (!result_mask[i]) { + _DataType sn, cn, sn2, cn2; + _DataType neg_W_minus_one, V, Y; - _DataType sn, cn, sn2, cn2; - _DataType neg_W_minus_one, V, Y; - - sn = cl::sycl::sin(Uvec[i]); - cn = cl::sycl::cos(Uvec[i]); - V = Vvec[i]; - sn2 = sn * sn; - cn2 = cn * cn; + sn = cl::sycl::sin(Uvec[i]); + cn = cl::sycl::cos(Uvec[i]); + V = Vvec[i]; + sn2 = sn * sn; + cn2 = cn * cn; - neg_W_minus_one = s_minus_one * sn2 / (0.5 * s_minus_one + cn2); - Y = kappa * (s_minus_one + neg_W_minus_one); + neg_W_minus_one = s_minus_one * sn2 / (0.5 * s_minus_one + cn2); + Y = kappa * (s_minus_one + neg_W_minus_one); - if ((Y * (2 - Y) >= V) || (cl::sycl::log(Y / V) + 1 >= Y)) - { - Y = neg_W_minus_one * (2 - neg_W_minus_one); - if (Y < 0) - Y = 0.0; - else if (Y > 1.0) - Y = 1.0; - *n = *n + 1; - result1[*n] = cl::sycl::asin(cl::sycl::sqrt(Y)); + if ((Y * (2 - Y) >= V) || (cl::sycl::log(Y / V) + 1 >= Y)) + { + Y = neg_W_minus_one * (2 - neg_W_minus_one); + if (Y < 0) + Y = 0.0; + else if (Y > 1.0) + Y = 1.0; + + result1[i] = cl::sycl::asin(cl::sycl::sqrt(Y)); + result_mask[i] = true; + } } }); }; auto some_event = DPNP_QUEUE.submit(paral_kernel_some); some_event.wait(); + + dpnp_all_c(result_mask, result_ready, size); } dpnp_memory_free_c(Uvec); - dpnp_memory_free_c(n); + dpnp_memory_free_c(result_ready); + dpnp_memory_free_c(result_mask); mkl_rng::uniform<_DataType> uniform_distribution(d_zero, d_one); auto uniform_distr_event = mkl_rng::generate(uniform_distribution, DPNP_RNG_ENGINE, size, Vvec); @@ -1359,7 +1369,8 @@ void dpnp_rng_vonmises_small_kappa_c(void* result, const _DataType mu, const _Da _DataType rho_over_kappa, rho, r, s_kappa; _DataType* Uvec = nullptr; _DataType* Vvec = nullptr; - size_t* n = nullptr; + bool* result_ready = nullptr; + bool* result_mask = nullptr; const _DataType d_zero = 0.0, d_one = 1.0; @@ -1374,39 +1385,47 @@ void dpnp_rng_vonmises_small_kappa_c(void* result, const _DataType mu, const _Da Uvec = reinterpret_cast<_DataType*>(dpnp_memory_alloc_c(size * sizeof(_DataType))); Vvec = reinterpret_cast<_DataType*>(dpnp_memory_alloc_c(size * sizeof(_DataType))); - n = reinterpret_cast(dpnp_memory_alloc_c(sizeof(size_t))); - for (*n = 0; *n < size;) + result_ready = reinterpret_cast(dpnp_memory_alloc_c(1 * sizeof(bool))); + result_ready[0] = false; + result_mask = reinterpret_cast(dpnp_memory_alloc_c(size * sizeof(bool))); + dpnp_full_c(result_ready, result_mask, size); + + while (!result_ready[0]) { - size_t diff_size = size - *n; mkl_rng::uniform<_DataType> uniform_distribution_u(d_zero, M_PI); - auto uniform_distr_u_event = mkl_rng::generate(uniform_distribution_u, DPNP_RNG_ENGINE, diff_size, Uvec); + auto uniform_distr_u_event = mkl_rng::generate(uniform_distribution_u, DPNP_RNG_ENGINE, size, Uvec); mkl_rng::uniform<_DataType> uniform_distribution_v(d_zero, d_one); - auto uniform_distr_v_event = mkl_rng::generate(uniform_distribution_v, DPNP_RNG_ENGINE, diff_size, Vvec); + auto uniform_distr_v_event = mkl_rng::generate(uniform_distribution_v, DPNP_RNG_ENGINE, size, Vvec); - cl::sycl::range<1> diff_gws((diff_size)); + cl::sycl::range<1> gws((size)); auto paral_kernel_some = [&](cl::sycl::handler& cgh) { cgh.depends_on({uniform_distr_u_event, uniform_distr_v_event}); - cgh.parallel_for(diff_gws, [=](cl::sycl::id<1> global_id) { + cgh.parallel_for(gws, [=](cl::sycl::id<1> global_id) { size_t i = global_id[0]; - _DataType Z, W, Y, V; - Z = cl::sycl::cos(Uvec[i]); - V = Vvec[i]; - W = (kappa + s_kappa * Z) / (s_kappa + kappa * Z); - Y = s_kappa - kappa * W; - if ((Y * (2 - Y) >= V) || (cl::sycl::log(Y / V) + 1 >= Y)) - { - *n = *n + 1; - result1[*n] = cl::sycl::acos(W); + if (!result_mask[i]) { + _DataType Z, W, Y, V; + Z = cl::sycl::cos(Uvec[i]); + V = Vvec[i]; + W = (kappa + s_kappa * Z) / (s_kappa + kappa * Z); + Y = s_kappa - kappa * W; + if ((Y * (2 - Y) >= V) || (cl::sycl::log(Y / V) + 1 >= Y)) + { + result1[i] = cl::sycl::acos(W); + result_mask[i] = true; + } } }); }; auto some_event = DPNP_QUEUE.submit(paral_kernel_some); some_event.wait(); + + dpnp_all_c(result_mask, result_ready, size); } dpnp_memory_free_c(Uvec); - dpnp_memory_free_c(n); + dpnp_memory_free_c(result_ready); + dpnp_memory_free_c(result_mask); mkl_rng::uniform<_DataType> uniform_distribution(d_zero, d_one); auto uniform_distr_event = mkl_rng::generate(uniform_distribution, DPNP_RNG_ENGINE, size, Vvec);