diff --git a/.clang-tidy b/.clang-tidy index c2e216a99..73af17322 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -11,6 +11,7 @@ # readability-identifier-length: We use short identifiers sparingly and in places where they are more readable than long ones. # bugprone-easily-swappable-parameters: Many methods unavoidably take multiple float parameters. # performance-enum-size: There is little to be gained switching from 4-byt ints to 1 byte enums. +# portability-simd-intrinsics: Allow intrinsics to improve performance. Checks: 'bugprone-*, cert-*, @@ -33,4 +34,5 @@ Checks: 'bugprone-*, -readability-identifier-length, -bugprone-easily-swappable-parameters, -performance-enum-size, + -portability-simd-intrinsics, ' diff --git a/freud/locality/AABB.h b/freud/locality/AABB.h index 153e40022..4fc2c278c 100644 --- a/freud/locality/AABB.h +++ b/freud/locality/AABB.h @@ -4,8 +4,6 @@ #ifndef AABB_H #define AABB_H -#include - #include "VectorMath.h" /*! \file AABB.h @@ -25,18 +23,18 @@ #if defined(__SSE__) inline __m128 sse_load_vec3_float(const vec3& value) { - float in[4]; + std::array in; in[0] = value.x; in[1] = value.y; in[2] = value.z; - in[3] = 0.0f; - return _mm_loadu_ps(in); + in[3] = 0.0F; + return _mm_loadu_ps(in.data()); } inline vec3 sse_unload_vec3_float(const __m128& v) { - float out[4]; - _mm_storeu_ps(out, v); + std::array out; + _mm_storeu_ps(out.data(), v); return vec3(out[0], out[1], out[2]); } #endif @@ -75,7 +73,7 @@ struct CACHE_ALIGN AABB AABB() : tag(0) { #if defined(__SSE__) - float in = 0.0f; + const float in = 0.0F; lower_v = _mm_load_ps1(&in); upper_v = _mm_load_ps1(&in); @@ -147,9 +145,9 @@ struct CACHE_ALIGN AABB vec3 getPosition() const { #if defined(__SSE__) - float half = 0.5f; - __m128 half_v = _mm_load_ps1(&half); - __m128 pos_v = _mm_mul_ps(half_v, _mm_add_ps(lower_v, upper_v)); + const float half = 0.5F; + const __m128 half_v = _mm_load_ps1(&half); + const __m128 pos_v = _mm_mul_ps(half_v, _mm_add_ps(lower_v, upper_v)); return sse_unload_vec3_float(pos_v); #else @@ -186,7 +184,7 @@ struct CACHE_ALIGN AABB void translate(const vec3& v) { #if defined(__SSE__) - __m128 v_v = sse_load_vec3_float(v); + const __m128 v_v = sse_load_vec3_float(v); lower_v = _mm_add_ps(lower_v, v_v); upper_v = _mm_add_ps(upper_v, v_v); @@ -215,7 +213,7 @@ struct CACHE_ALIGN AABBSphere AABBSphere() : radius(0), tag(0) { #if defined(__SSE__) - float in = 0.0f; + const float in = 0.0F; position_v = _mm_load_ps1(&in); #endif @@ -269,7 +267,7 @@ struct CACHE_ALIGN AABBSphere void translate(const vec3& v) { #if defined(__SSE__) - __m128 v_v = sse_load_vec3_float(v); + const __m128 v_v = sse_load_vec3_float(v); position_v = _mm_add_ps(position_v, v_v); #else @@ -287,9 +285,9 @@ struct CACHE_ALIGN AABBSphere inline bool overlap(const AABB& a, const AABB& b) { #if defined(__SSE__) - int r0 = _mm_movemask_ps(_mm_cmplt_ps(b.upper_v, a.lower_v)); - int r1 = _mm_movemask_ps(_mm_cmpgt_ps(b.lower_v, a.upper_v)); - return !(r0 || r1); + const int r0 = _mm_movemask_ps(_mm_cmplt_ps(b.upper_v, a.lower_v)); + const int r1 = _mm_movemask_ps(_mm_cmpgt_ps(b.lower_v, a.upper_v)); + return !(r0 != 0 || r1 != 0); #else return b.upper.x >= a.lower.x && b.lower.x <= a.upper.x && b.upper.y >= a.lower.y && b.lower.y <= a.upper.y @@ -306,8 +304,8 @@ inline bool overlap(const AABB& a, const AABB& b) inline bool overlap(const AABB& a, const AABBSphere& b) { #if defined(__SSE__) - __m128 dr_v = _mm_sub_ps(_mm_min_ps(_mm_max_ps(b.position_v, a.lower_v), a.upper_v), b.position_v); - __m128 dr2_v = _mm_mul_ps(dr_v, dr_v); + const __m128 dr_v = _mm_sub_ps(_mm_min_ps(_mm_max_ps(b.position_v, a.lower_v), a.upper_v), b.position_v); + const __m128 dr2_v = _mm_mul_ps(dr_v, dr_v); // See https://stackoverflow.com/questions/6996764/fastest-way-to-do-horizontal-float-vector-sum-on-x86 __m128 shuf = _mm_shuffle_ps(dr2_v, dr2_v, _MM_SHUFFLE(2, 3, 0, 1)); __m128 sums = _mm_add_ps(dr2_v, shuf); @@ -333,8 +331,8 @@ inline bool overlap(const AABB& a, const AABBSphere& b) inline bool contains(const AABB& a, const AABB& b) { #if defined(__SSE__) - int r0 = _mm_movemask_ps(_mm_cmpge_ps(b.lower_v, a.lower_v)); - int r1 = _mm_movemask_ps(_mm_cmple_ps(b.upper_v, a.upper_v)); + const int r0 = _mm_movemask_ps(_mm_cmpge_ps(b.lower_v, a.lower_v)); + const int r1 = _mm_movemask_ps(_mm_cmple_ps(b.upper_v, a.upper_v)); return ((r0 & r1) == 0xF); #else diff --git a/freud/util/Histogram.h b/freud/util/Histogram.h index f80638004..ab078e808 100644 --- a/freud/util/Histogram.h +++ b/freud/util/Histogram.h @@ -161,7 +161,7 @@ class RegularAxis : public Axis float const val = (value - m_min) * m_inverse_bin_width; // fast float to int conversion with truncation #ifdef __SSE2__ - size_t bin = _mm_cvtt_ss2si(_mm_load_ss(&val)); + const size_t bin = _mm_cvtt_ss2si(_mm_load_ss(&val)); #else auto const bin = (size_t) (val); #endif