From 8a90eeaf561ba5bafd05e5a0e5ae6b99f136d0c1 Mon Sep 17 00:00:00 2001
From: "Joshua A. Anderson" <joaander@umich.edu>
Date: Wed, 4 Sep 2024 10:44:30 -0400
Subject: [PATCH] AABB.h passes clang-tidy on Linux.

---
 .clang-tidy            |  2 ++
 freud/locality/AABB.h  | 40 +++++++++++++++++++---------------------
 freud/util/Histogram.h |  2 +-
 3 files changed, 22 insertions(+), 22 deletions(-)
diff --git a/.clang-tidy b/.clang-tidy
index c2e216a99..73af17322 100644
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -11,6 +11,7 @@
 # readability-identifier-length: We use short identifiers sparingly and in places where they are more readable than long ones.
 # bugprone-easily-swappable-parameters: Many methods unavoidably take multiple float parameters.
 # performance-enum-size: There is little to be gained switching from 4-byt ints to 1 byte enums.
+# portability-simd-intrinsics: Allow intrinsics to improve performance.
 
 Checks: 'bugprone-*,
          cert-*,
@@ -33,4 +34,5 @@ Checks: 'bugprone-*,
          -readability-identifier-length,
          -bugprone-easily-swappable-parameters,
          -performance-enum-size,
+         -portability-simd-intrinsics,
          '
diff --git a/freud/locality/AABB.h b/freud/locality/AABB.h
index 153e40022..4fc2c278c 100644
--- a/freud/locality/AABB.h
+++ b/freud/locality/AABB.h
@@ -4,8 +4,6 @@
 #ifndef AABB_H
 #define AABB_H
 
-#include <algorithm>
-
 #include "VectorMath.h"
 
 /*! \file AABB.h
@@ -25,18 +23,18 @@
 #if defined(__SSE__)
 inline __m128 sse_load_vec3_float(const vec3<float>& value)
 {
-    float in[4];
+    std::array<float, 4> in;
     in[0] = value.x;
     in[1] = value.y;
     in[2] = value.z;
-    in[3] = 0.0f;
-    return _mm_loadu_ps(in);
+    in[3] = 0.0F;
+    return _mm_loadu_ps(in.data());
 }
 
 inline vec3<float> sse_unload_vec3_float(const __m128& v)
 {
-    float out[4];
-    _mm_storeu_ps(out, v);
+    std::array<float, 4> out;
+    _mm_storeu_ps(out.data(), v);
     return vec3<float>(out[0], out[1], out[2]);
 }
 #endif
@@ -75,7 +73,7 @@ struct CACHE_ALIGN AABB
     AABB() : tag(0)
     {
 #if defined(__SSE__)
-        float in = 0.0f;
+        const float in = 0.0F;
         lower_v = _mm_load_ps1(&in);
         upper_v = _mm_load_ps1(&in);
 
@@ -147,9 +145,9 @@ struct CACHE_ALIGN AABB
     vec3<float> getPosition() const
     {
 #if defined(__SSE__)
-        float half = 0.5f;
-        __m128 half_v = _mm_load_ps1(&half);
-        __m128 pos_v = _mm_mul_ps(half_v, _mm_add_ps(lower_v, upper_v));
+        const float half = 0.5F;
+        const __m128 half_v = _mm_load_ps1(&half);
+        const __m128 pos_v = _mm_mul_ps(half_v, _mm_add_ps(lower_v, upper_v));
         return sse_unload_vec3_float(pos_v);
 
 #else
@@ -186,7 +184,7 @@ struct CACHE_ALIGN AABB
     void translate(const vec3<float>& v)
     {
 #if defined(__SSE__)
-        __m128 v_v = sse_load_vec3_float(v);
+        const __m128 v_v = sse_load_vec3_float(v);
         lower_v = _mm_add_ps(lower_v, v_v);
         upper_v = _mm_add_ps(upper_v, v_v);
 
@@ -215,7 +213,7 @@ struct CACHE_ALIGN AABBSphere
     AABBSphere() : radius(0), tag(0)
     {
 #if defined(__SSE__)
-        float in = 0.0f;
+        const float in = 0.0F;
         position_v = _mm_load_ps1(&in);
 
 #endif
@@ -269,7 +267,7 @@ struct CACHE_ALIGN AABBSphere
     void translate(const vec3<float>& v)
     {
 #if defined(__SSE__)
-        __m128 v_v = sse_load_vec3_float(v);
+        const __m128 v_v = sse_load_vec3_float(v);
         position_v = _mm_add_ps(position_v, v_v);
 
 #else
@@ -287,9 +285,9 @@ struct CACHE_ALIGN AABBSphere
 inline bool overlap(const AABB& a, const AABB& b)
 {
 #if defined(__SSE__)
-    int r0 = _mm_movemask_ps(_mm_cmplt_ps(b.upper_v, a.lower_v));
-    int r1 = _mm_movemask_ps(_mm_cmpgt_ps(b.lower_v, a.upper_v));
-    return !(r0 || r1);
+    const int r0 = _mm_movemask_ps(_mm_cmplt_ps(b.upper_v, a.lower_v));
+    const int r1 = _mm_movemask_ps(_mm_cmpgt_ps(b.lower_v, a.upper_v));
+    return !(r0 != 0 || r1 != 0);
 
 #else
     return b.upper.x >= a.lower.x && b.lower.x <= a.upper.x && b.upper.y >= a.lower.y && b.lower.y <= a.upper.y
@@ -306,8 +304,8 @@ inline bool overlap(const AABB& a, const AABB& b)
 inline bool overlap(const AABB& a, const AABBSphere& b)
 {
 #if defined(__SSE__)
-    __m128 dr_v = _mm_sub_ps(_mm_min_ps(_mm_max_ps(b.position_v, a.lower_v), a.upper_v), b.position_v);
-    __m128 dr2_v = _mm_mul_ps(dr_v, dr_v);
+    const __m128 dr_v = _mm_sub_ps(_mm_min_ps(_mm_max_ps(b.position_v, a.lower_v), a.upper_v), b.position_v);
+    const __m128 dr2_v = _mm_mul_ps(dr_v, dr_v);
     // See https://stackoverflow.com/questions/6996764/fastest-way-to-do-horizontal-float-vector-sum-on-x86
     __m128 shuf = _mm_shuffle_ps(dr2_v, dr2_v, _MM_SHUFFLE(2, 3, 0, 1));
     __m128 sums = _mm_add_ps(dr2_v, shuf);
@@ -333,8 +331,8 @@ inline bool overlap(const AABB& a, const AABBSphere& b)
 inline bool contains(const AABB& a, const AABB& b)
 {
 #if defined(__SSE__)
-    int r0 = _mm_movemask_ps(_mm_cmpge_ps(b.lower_v, a.lower_v));
-    int r1 = _mm_movemask_ps(_mm_cmple_ps(b.upper_v, a.upper_v));
+    const int r0 = _mm_movemask_ps(_mm_cmpge_ps(b.lower_v, a.lower_v));
+    const int r1 = _mm_movemask_ps(_mm_cmple_ps(b.upper_v, a.upper_v));
     return ((r0 & r1) == 0xF);
 
 #else
diff --git a/freud/util/Histogram.h b/freud/util/Histogram.h
index f80638004..ab078e808 100644
--- a/freud/util/Histogram.h
+++ b/freud/util/Histogram.h
@@ -161,7 +161,7 @@ class RegularAxis : public Axis
         float const val = (value - m_min) * m_inverse_bin_width;
         // fast float to int conversion with truncation
 #ifdef __SSE2__
-        size_t bin = _mm_cvtt_ss2si(_mm_load_ss(&val));
+        const size_t bin = _mm_cvtt_ss2si(_mm_load_ss(&val));
 #else
         auto const bin = (size_t) (val);
 #endif