Skip to content

Commit

Permalink
Enable compiling arm/neon with MSVC for windows on arm64
Browse files Browse the repository at this point in the history
  • Loading branch information
niyas-sait committed Oct 19, 2021
1 parent 07453a2 commit 663b06b
Show file tree
Hide file tree
Showing 6 changed files with 417 additions and 282 deletions.
498 changes: 246 additions & 252 deletions include/xsimd/arch/xsimd_neon.hpp

Large diffs are not rendered by default.

40 changes: 14 additions & 26 deletions include/xsimd/arch/xsimd_neon64.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

#include "../types/xsimd_neon64_register.hpp"
#include "../types/xsimd_utils.hpp"
#include "xsimd_neon_dispatcher.hpp"

namespace xsimd
{
Expand Down Expand Up @@ -779,8 +780,8 @@ namespace xsimd

#define WRAP_CAST(SUFFIX, TYPE) \
namespace wrap { \
inline float64x2_t vreinterpretq_f64_##SUFFIX(TYPE a) { return ::vreinterpretq_f64_##SUFFIX(a); } \
inline TYPE vreinterpretq_##SUFFIX##_f64(float64x2_t a) { return ::vreinterpretq_##SUFFIX##_f64(a); } \
inline float64x2_t _vreinterpretq_f64_##SUFFIX(TYPE a) { return vreinterpretq_f64_##SUFFIX(a); } \
inline TYPE _vreinterpretq_##SUFFIX##_f64(float64x2_t a) { return vreinterpretq_##SUFFIX##_f64(a); } \
}

WRAP_CAST(u8, uint8x16_t)
Expand All @@ -798,19 +799,13 @@ namespace xsimd
template <class A, class T>
batch<double, A> bitwise_cast(batch<T, A> const& arg, batch<double, A> const&, requires_arch<neon64>)
{
using caster_type = detail::bitwise_caster_impl<float64x2_t,
uint8x16_t, int8x16_t,
uint16x8_t, int16x8_t,
uint32x4_t, int32x4_t,
uint64x2_t, int64x2_t,
float32x4_t>;
const caster_type caster = {
std::make_tuple(wrap::vreinterpretq_f64_u8, wrap::vreinterpretq_f64_s8, wrap::vreinterpretq_f64_u16, wrap::vreinterpretq_f64_s16,
wrap::vreinterpretq_f64_u32, wrap::vreinterpretq_f64_s32, wrap::vreinterpretq_f64_u64, wrap::vreinterpretq_f64_s64,
wrap::vreinterpretq_f64_f32)
};
using register_type = typename batch<T, A>::register_type;
return caster.apply(register_type(arg));
register_type result;
NEON_DISPATCHER_UNARY(wrap::_vreinterpretq_f64_u8, wrap::_vreinterpretq_f64_s8, wrap::_vreinterpretq_f64_u16,
wrap::_vreinterpretq_f64_s16, wrap::_vreinterpretq_f64_u32, wrap::_vreinterpretq_f64_s32,
wrap::_vreinterpretq_f64_u64, wrap::_vreinterpretq_f64_s64, wrap::_vreinterpretq_f64_f32,
T, register_type(arg), result);
return result;
}

namespace detail
Expand All @@ -834,20 +829,13 @@ namespace xsimd
template <class A, class R>
batch<R, A> bitwise_cast(batch<double, A> const& arg, batch<R, A> const&, requires_arch<neon64>)
{
using caster_type = detail::bitwise_caster_neon64<float64x2_t,
uint8x16_t, int8x16_t,
uint16x8_t, int16x8_t,
uint32x4_t, int32x4_t,
uint64x2_t, int64x2_t,
float32x4_t>;
const caster_type caster = {
std::make_tuple(wrap::vreinterpretq_u8_f64, wrap::vreinterpretq_s8_f64, wrap::vreinterpretq_u16_f64, wrap::vreinterpretq_s16_f64,
wrap::vreinterpretq_u32_f64, wrap::vreinterpretq_s32_f64, wrap::vreinterpretq_u64_f64, wrap::vreinterpretq_s64_f64,
wrap::vreinterpretq_f32_f64)
};
using src_register_type = typename batch<double, A>::register_type;
using dst_register_type = typename batch<R, A>::register_type;
return caster.apply<dst_register_type>(src_register_type(arg));
src_register_type result;
NEON_DISPATCHER_UNARY(wrap::_vreinterpretq_u8_f64, wrap::_vreinterpretq_s8_f64, wrap::_vreinterpretq_u16_f64, wrap::_vreinterpretq_s16_f64,
wrap::_vreinterpretq_u32_f64, wrap::_vreinterpretq_s32_f64, wrap::_vreinterpretq_u64_f64, wrap::_vreinterpretq_s64_f64,
wrap::_vreinterpretq_f32_f64, R, src_register_type(arg), result);
return dst_register_type(result);
}

template <class A>
Expand Down
119 changes: 119 additions & 0 deletions include/xsimd/arch/xsimd_neon_dispatcher.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
/***************************************************************************
* Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
* Martin Renou *
* Copyright (c) QuantStack *
* Copyright (c) Serge Guelton *
* *
* Distributed under the terms of the BSD 3-Clause License. *
* *
* The full license is in the file LICENSE, distributed with this software. *
****************************************************************************/

// Few macros to select neon intrinsic function based on the scalar type
#define NEON_DISPATCHER_BINARY(U8, S8, U16, S16, U32, S32, U64, S64, F32, type, arg1, arg2, result)\
if (std::is_same<type, uint8_t>::value) {\
result = U8(arg1, arg2);\
} else if(std::is_same<type, int8_t>::value) {\
result = S8(arg1, arg2);\
} else if(std::is_same<type, uint16_t>::value) {\
result = U16(arg1, arg2);\
} else if(std::is_same<type, int16_t>::value) {\
result = S16(arg1, arg2);\
} else if(std::is_same<type, uint32_t>::value) {\
result = U32(arg1, arg2);\
} else if(std::is_same<type, int32_t>::value) {\
result = S32(arg1, arg2);\
} else if(std::is_same<type, uint64_t>::value) {\
result = U64(arg1, arg2);\
} else if(std::is_same<type, int64_t>::value) {\
result = S64(arg1, arg2);\
} else if(std::is_same<type, float32_t>::value) {\
result = F32(arg1, arg2);\
} else {\
assert(false && "unsupported type");\
}

#define NEON_DISPATCHER_BINARY_EXCLUDE_64(U8, S8, U16, S16, U32, S32, F32, type, arg1, arg2, result)\
if (std::is_same<type, uint8_t>::value) {\
result = U8(arg1, arg2);\
} else if(std::is_same<type, int8_t>::value) {\
result = S8(arg1, arg2);\
} else if(std::is_same<type, uint16_t>::value) {\
result = U16(arg1, arg2);\
} else if(std::is_same<type, int16_t>::value) {\
result = S16(arg1, arg2);\
} else if(std::is_same<type, uint32_t>::value) {\
result = U32(arg1, arg2);\
} else if(std::is_same<type, int32_t>::value) {\
result = S32(arg1, arg2);\
} else if(std::is_same<type, float32_t>::value) {\
result = F32(arg1, arg2);\
} else {\
assert(false && "unsupported type");\
}

#define NEON_DISPATCHER_UNARY(U8, S8, U16, S16, U32, S32, U64, S64, F32, type, arg, result)\
if (std::is_same<type, uint8_t>::value) {\
result = U8(arg);\
} else if(std::is_same<type, int8_t>::value) {\
result = S8(arg);\
} else if(std::is_same<type, uint16_t>::value) {\
result = U16(arg);\
} else if(std::is_same<type, int16_t>::value) {\
result = S16(arg);\
} else if(std::is_same<type, uint32_t>::value) {\
result = U32(arg);\
} else if(std::is_same<type, int32_t>::value) {\
result = S32(arg);\
} else if(std::is_same<type, uint64_t>::value) {\
result = U64(arg);\
} else if(std::is_same<type, int64_t>::value) {\
result = S64(arg);\
} else if(std::is_same<type, float32_t>::value) {\
result = F32(arg);\
} else {\
assert(false && "unsupported type");\
}

#define NEON_DISPATCHER_UNARY_EXCLUDE_64(U8, S8, U16, S16, U32, S32, F32, type, arg, result)\
if (std::is_same<type, uint8_t>::value) {\
result = U8(arg);\
} else if(std::is_same<type, int8_t>::value) {\
result = S8(arg);\
} else if(std::is_same<type, uint16_t>::value) {\
result = U16(arg);\
} else if(std::is_same<type, int16_t>::value) {\
result = S16(arg);\
} else if(std::is_same<type, uint32_t>::value) {\
result = U32(arg);\
} else if(std::is_same<type, int32_t>::value) {\
result = S32(arg);\
} else if(std::is_same<type, float32_t>::value) {\
result = F32(arg);\
} else {\
assert(false && "unsupported type");\
}

#define NEON_DISPATCHER_SELECT(U8, S8, U16, S16, U32, S32, U64, S64, F32, type, cond, arg1, arg2, result)\
if (std::is_same<type, uint8_t>::value) {\
result = U8(cond, arg1, arg2);\
} else if(std::is_same<type, int8_t>::value) {\
result = S8(cond, arg1, arg2);\
} else if(std::is_same<type, uint16_t>::value) {\
result = U16(cond, arg1, arg2);\
} else if(std::is_same<type, int16_t>::value) {\
result = S16(cond, arg1, arg2);\
} else if(std::is_same<type, uint32_t>::value) {\
result = U32(cond, arg1, arg2);\
} else if(std::is_same<type, int32_t>::value) {\
result = S32(cond, arg1, arg2);\
} else if(std::is_same<type, uint64_t>::value) {\
result = U64(cond, arg1, arg2);\
} else if(std::is_same<type, int64_t>::value) {\
result = S64(cond, arg1, arg2);\
} else if(std::is_same<type, float32_t>::value) {\
result = F32(cond, arg1, arg2);\
} else {\
assert(false && "unsupported type");\
}

3 changes: 3 additions & 0 deletions include/xsimd/config/xsimd_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,9 @@
#else
#define XSIMD_WITH_NEON64 0
#endif
#elif defined(_MSC_VER) && defined(_M_ARM64)
#define XSIMD_WITH_NEON 1
#define XSIMD_WITH_NEON64 1
#else
#define XSIMD_WITH_NEON 0
#define XSIMD_WITH_NEON64 0
Expand Down
8 changes: 4 additions & 4 deletions include/xsimd/types/xsimd_batch.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ namespace xsimd
private:

template<size_t... Is>
batch(T const* data, detail::index_sequence<Is...>);
batch(T const* data, xsimd::detail::index_sequence<Is...>);

batch logical_and(batch const& other) const;
batch logical_or(batch const& other) const;
Expand Down Expand Up @@ -242,13 +242,13 @@ namespace xsimd
private:

template<size_t... Is>
batch_bool(bool const* data, detail::index_sequence<Is...>);
batch_bool(bool const* data, xsimd::detail::index_sequence<Is...>);

template <class U, class... V, size_t I, size_t... Is>
static register_type make_register(detail::index_sequence<I, Is...>, U u, V... v);
static register_type make_register(xsimd::detail::index_sequence<I, Is...>, U u, V... v);

template <class... V>
static register_type make_register(detail::index_sequence<>, V... v);
static register_type make_register(xsimd::detail::index_sequence<>, V... v);
};

template <class T, class A>
Expand Down
31 changes: 31 additions & 0 deletions include/xsimd/types/xsimd_neon_register.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,37 @@ namespace xsimd
: detail::neon_bool_simd_register<T, neon>
{
};

// Few macros and function to support MSVC
#if defined(_MSC_VER) && !defined(__clang__)
#define INITIALIZER_LIST_TO_NEON_VECTOR(T, args) (neon_vector_initializer_constructor<T>(args))
// Convert an initialiser list to neon vector type
// Note: MSVC does not provide a initialiser_list constructor for neon vector type.
template<class S, class T>
S neon_vector_initializer_constructor(std::initializer_list<T> data){
S target;
if (std::is_signed<T>::value) {
switch(data.size()) {
case 16: std::copy(data.begin(), data.end(), target.n128_i8); break;
case 8: std::copy(data.begin(), data.end(), target.n128_i16); break;
case 4: std::copy(data.begin(), data.end(), target.n128_i32); break;
case 2: std::copy(data.begin(), data.end(), target.n128_i64); break;
}
} else {
switch(data.size()) {
case 16: std::copy(data.begin(), data.end(), target.n128_u8); break;
case 8: std::copy(data.begin(), data.end(), target.n128_u16); break;
case 4: std::copy(data.begin(), data.end(), target.n128_u32); break;
case 2: std::copy(data.begin(), data.end(), target.n128_u64); break;
}
}
return target;
}
#define REINTERPRET_CAST(T, R) (R)
#else
#define INITIALIZER_LIST_TO_NEON_VECTOR(T, args) (T args)
#define REINTERPRET_CAST(T, R) reinterpret_cast<T>(R)
#endif

}
#endif
Expand Down

0 comments on commit 663b06b

Please sign in to comment.