diff --git a/include/simsimd/probability.h b/include/simsimd/probability.h index ca10b782..b043299c 100644 --- a/include/simsimd/probability.h +++ b/include/simsimd/probability.h @@ -225,7 +225,7 @@ SIMSIMD_PUBLIC void simsimd_js_f32_neon(simsimd_f32_t const *a, simsimd_f32_t co simsimd_f32_t log2_normalizer = 0.693147181f; simsimd_f32_t sum = vaddvq_f32(sum_vec) * log2_normalizer / 2; - *result = SIMSIMD_SQRT(sum); + *result = _simsimd_sqrt_f32_neon(sum); } #pragma clang attribute pop @@ -298,7 +298,7 @@ SIMSIMD_PUBLIC void simsimd_js_f16_neon(simsimd_f16_t const *a, simsimd_f16_t co simsimd_f32_t log2_normalizer = 0.693147181f; simsimd_f32_t sum = vaddvq_f32(sum_vec) * log2_normalizer / 2; - *result = SIMSIMD_SQRT(sum); + *result = _simsimd_sqrt_f32_neon(sum); } #pragma clang attribute pop @@ -404,8 +404,8 @@ SIMSIMD_PUBLIC void simsimd_js_f16_haswell(simsimd_f16_t const *a, simsimd_f16_t simsimd_f32_t log2_normalizer = 0.693147181f; simsimd_f32_t sum = _simsimd_reduce_f32x8_haswell(sum_vec); - sum *= log2_normalizer; - *result = sum / 2; + sum *= log2_normalizer / 2; + *result = _simsimd_sqrt_f32_haswell(sum); } #pragma clang attribute pop @@ -497,7 +497,9 @@ SIMSIMD_PUBLIC void simsimd_js_f32_skylake(simsimd_f32_t const *a, simsimd_f32_t if (n) goto simsimd_js_f32_skylake_cycle; simsimd_f32_t log2_normalizer = 0.693147181f; - *result = _mm512_reduce_add_ps(_mm512_add_ps(sum_a_vec, sum_b_vec)) * log2_normalizer / 2; + simsimd_f32_t sum = _mm512_reduce_add_ps(_mm512_add_ps(sum_a_vec, sum_b_vec)); + sum *= log2_normalizer / 2; + *result = _simsimd_sqrt_f32_haswell(sum); } #pragma clang attribute pop @@ -587,7 +589,9 @@ SIMSIMD_PUBLIC void simsimd_js_f16_sapphire(simsimd_f16_t const *a, simsimd_f16_ if (n) goto simsimd_js_f16_sapphire_cycle; simsimd_f32_t log2_normalizer = 0.693147181f; - *result = _mm512_reduce_add_ph(_mm512_add_ph(sum_a_vec, sum_b_vec)) * log2_normalizer / 2; + simsimd_f32_t sum = _mm512_reduce_add_ph(_mm512_add_ph(sum_a_vec, sum_b_vec)); + sum *= log2_normalizer / 2; + *result = _simsimd_sqrt_f32_haswell(sum); } #pragma clang attribute pop