Merge pull request #83 from DrTimothyAldenDavis/master

Master
DrTimothyAldenDavis · Dec 29, 2021 · 7d54a26 · 7d54a26
2 parents 74daf51 + 599a6cb
commit 7d54a26
Show file tree

Hide file tree

Showing 1,081 changed files with 19,271 additions and 8,560 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -26,10 +26,10 @@ endif ( )
 set ( CMAKE_MACOSX_RPATH TRUE )
 
 # version of SuiteSparse:GraphBLAS
-set ( GraphBLAS_DATE "Dec 26, 2021")
+set ( GraphBLAS_DATE "Dec 28, 2021")
 set ( GraphBLAS_VERSION_MAJOR 6 )
 set ( GraphBLAS_VERSION_MINOR 1 )
-set ( GraphBLAS_VERSION_SUB   0 )
+set ( GraphBLAS_VERSION_SUB   1 )
 
 message ( STATUS "Building SuiteSparse:GraphBLAS version: v" ${GraphBLAS_VERSION_MAJOR}.${GraphBLAS_VERSION_MINOR}.${GraphBLAS_VERSION_SUB}  " date: " ${GraphBLAS_DATE} )
 

diff --git a/Doc/ChangeLog b/Doc/ChangeLog
@@ -1,8 +1,13 @@
-Versions 6.1.0 and 5.3.0, Dec 26, 2021
+Version 6.1.1, Dec 28, 2021
+
+    * minor revision to AVX2 and AVX512f selection
+    * cpu_features/Makefile: remove test of list_cpu_features
+
+Version 6.1.0, Dec 26, 2021
 
     * added GxB_get options: compiler name and version
     * added package: https://github.com/google/cpu_features,
-        Oct 29, 2021 version
+        Nov 30, 2021 version
     * performance: faster C+=A*B when C is full, A is bitmap/full, and B is
         sparse/hyper; added saxpy5 kernel.  faster C+=A'*B (dot4 kernel).
     * bug fix: deserialization of iso and empty matrices/vectors was broken

diff --git a/Doc/GraphBLAS_UserGuide.pdf b/Doc/GraphBLAS_UserGuide.pdf
diff --git a/Doc/GraphBLAS_UserGuide.tex b/Doc/GraphBLAS_UserGuide.tex
@@ -148,12 +148,20 @@ \subsection{Release Notes}
 
 \begin{itemize}
 
-\item Versions 6.1.0 and 5.3.0 (Dec 26, 2021)
+\item Version 6.1.1 (Dec 28, 2021)  % FIXME
+
+    \begin{packed_itemize}
+    \item minor revision to AVX2 and AVX512f selection
+    \item \verb'cpu_features/Makefile': remove test of \verb'list_cpu_features'
+        so that the package can be built when cross-compiling
+    \end{packed_itemize}
+
+\item Versions 6.1.0 (Dec 26, 2021)
 
     \begin{packed_itemize}
     \item added \verb'GxB_get' options: compiler name and version.
     \item added package: \url{https://github.com/google/cpu_features},
-        Oct 29, 2021 version.
+        Nov 30, 2021 version.
     \item performance: faster \verb'C+=A*B' when \verb'C' is full,
         \verb'A' is bitmap/full, and \verb'B' is sparse/hyper.  % saxpy5
         Faster \verb"C+=A'*B" when

diff --git a/Doc/GraphBLAS_version.tex b/Doc/GraphBLAS_version.tex
@@ -1,5 +1,5 @@
 % version of SuiteSparse:GraphBLAS
 \date{VERSION
-6.1.0,
-Dec 26, 2021}
+6.1.1,
+Dec 28, 2021}
 
diff --git a/GraphBLAS/CMakeLists.txt b/GraphBLAS/CMakeLists.txt
@@ -29,10 +29,10 @@ endif ( )
 set ( CMAKE_MACOSX_RPATH TRUE )
 
 # version of SuiteSparse:GraphBLAS (must match ../CMakeLists.txt)
-set ( GraphBLAS_DATE "Dec 26, 2021")
+set ( GraphBLAS_DATE "Dec 28, 2021")
 set ( GraphBLAS_VERSION_MAJOR 6 )
 set ( GraphBLAS_VERSION_MINOR 1 )
-set ( GraphBLAS_VERSION_SUB   0 )
+set ( GraphBLAS_VERSION_SUB   1 )
 
 message ( STATUS "Building SuiteSparse:GraphBLAS version: v" ${GraphBLAS_VERSION_MAJOR}.${GraphBLAS_VERSION_MINOR}.${GraphBLAS_VERSION_SUB}  " date: " ${GraphBLAS_DATE} )
 

diff --git a/Include/GraphBLAS.h b/Include/GraphBLAS.h
@@ -206,10 +206,10 @@
 
 // The version of this implementation, and the GraphBLAS API version:
 #define GxB_IMPLEMENTATION_NAME "SuiteSparse:GraphBLAS"
-#define GxB_IMPLEMENTATION_DATE "Dec 26, 2021"
+#define GxB_IMPLEMENTATION_DATE "Dec 28, 2021"
 #define GxB_IMPLEMENTATION_MAJOR 6
 #define GxB_IMPLEMENTATION_MINOR 1
-#define GxB_IMPLEMENTATION_SUB   0
+#define GxB_IMPLEMENTATION_SUB   1
 #define GxB_SPEC_DATE "Nov 15, 2021"
 #define GxB_SPEC_MAJOR 2
 #define GxB_SPEC_MINOR 0

diff --git a/README.md b/README.md
@@ -8,7 +8,7 @@ For the GraphBLAS/GraphBLAS Octave/MATLAB interface *only*:
 SPDX-License-Identifier: GPL-3.0-or-later
 (see below for a discussion of the licensing of this package).
 
-VERSION 6.1.0, Dec 26, 2021
+VERSION 6.1.1, Dec 28, 2021
 
 SuiteSparse:GraphBLAS is a complete implementation of the GraphBLAS standard,
 which defines a set of sparse matrix operations on an extended algebra of

diff --git a/Source/GB_Global.c b/Source/GB_Global.c
@@ -390,7 +390,7 @@ bool GB_Global_GrB_init_called_get (void)
 GB_PUBLIC
 void GB_Global_cpu_features_query (void)
 { 
-    #if defined ( CPU_FEATURES_ARCH_X86 )
+    #if defined ( CPU_FEATURES_ARCH_X86_64 )
     X86Features features = GetX86Info ( ).features ;
     GB_Global.cpu_features_avx2 = (bool) (features.avx2) ;
     GB_Global.cpu_features_avx512f = (bool) (features.avx512f) ;

diff --git a/Source/GB_compiler.h b/Source/GB_compiler.h
@@ -245,7 +245,8 @@
 //------------------------------------------------------------------------------
 
 // gcc 7.5.0 cannot compile code with __attribute__ ((target ("avx512f"))), or
-// avx2, but those targets fine with gcc 9.3.0 or later.
+// avx2, but those targets are fine with gcc 9.3.0 or later.  It might be OK
+// on gcc 8.x but I haven't tested this.
 
 #if defined ( CPU_FEATURES_ARCH_X86_64 )
 
@@ -259,10 +260,16 @@
             #define GB_COMPILER_SUPPORTS_AVX512F 0
             #define GB_COMPILER_SUPPORTS_AVX2 0
         #endif
-    #else
-        // assume all other compilers can handle AVX512F and AVX2 on x86
+    #elif GB_COMPILER_ICX || GB_COMPILER_ICC || GB_COMPILER_CLANG || \
+          GB_COMPILER_GCC || GB_COMPILER_MSC
+        // all these compilers can handle AVX512F and AVX2 on x86
         #define GB_COMPILER_SUPPORTS_AVX512F 1
         #define GB_COMPILER_SUPPORTS_AVX2 1
+    #else
+        // unsure if xlc can handle AVX, but it is not likely to be used on
+        // the x86 anyay
+        #define GB_COMPILER_SUPPORTS_AVX512F 0
+        #define GB_COMPILER_SUPPORTS_AVX2 0
     #endif
 
 #else

diff --git a/Source/Generated1/GB_AxB__any_pair_iso.c b/Source/Generated1/GB_AxB__any_pair_iso.c
@@ -433,12 +433,16 @@ GrB_Info GB (_AsaxbitB__any_pair_iso)
         // AVX512F: vector registers are 512 bits, or 64 bytes, which can hold
         // 16 floats or 8 doubles.
 
-        #define GB_V16 (16 * GB_CNBITS <= 512)
-        #define GB_V8  ( 8 * GB_CNBITS <= 512)
-        #define GB_V4  ( 4 * GB_CNBITS <= 512)
+        #define GB_V16_512 (16 * GB_CNBITS <= 512)
+        #define GB_V8_512  ( 8 * GB_CNBITS <= 512)
+        #define GB_V4_512  ( 4 * GB_CNBITS <= 512)
+
+        #define GB_V16 GB_V16_512
+        #define GB_V8  GB_V8_512
+        #define GB_V4  GB_V4_512
 
         #if GB_SEMIRING_HAS_AVX_IMPLEMENTATION && GB_COMPILER_SUPPORTS_AVX512F \
-            && GB_V4
+            && GB_V4_512
 
             GB_TARGET_AVX512F static inline void GB_AxB_saxpy5_unrolled_avx512f
             (
@@ -463,15 +467,20 @@ GrB_Info GB (_AsaxbitB__any_pair_iso)
         // AVX2: vector registers are 256 bits, or 32 bytes, which can hold
         // 8 floats or 4 doubles.
 
+        #define GB_V16_256 (16 * GB_CNBITS <= 256)
+        #define GB_V8_256  ( 8 * GB_CNBITS <= 256)
+        #define GB_V4_256  ( 4 * GB_CNBITS <= 256)
+
         #undef  GB_V16
         #undef  GB_V8
         #undef  GB_V4
-        #define GB_V16 (16 * GB_CNBITS <= 256)
-        #define GB_V8  ( 8 * GB_CNBITS <= 256)
-        #define GB_V4  ( 4 * GB_CNBITS <= 256)
+
+        #define GB_V16 GB_V16_256
+        #define GB_V8  GB_V8_256
+        #define GB_V4  GB_V4_256
 
         #if GB_SEMIRING_HAS_AVX_IMPLEMENTATION && GB_COMPILER_SUPPORTS_AVX2 \
-            && GB_V4
+            && GB_V4_256
 
             GB_TARGET_AVX2 static inline void GB_AxB_saxpy5_unrolled_avx2
             (
@@ -496,6 +505,7 @@ GrB_Info GB (_AsaxbitB__any_pair_iso)
         #undef  GB_V16
         #undef  GB_V8
         #undef  GB_V4
+
         #define GB_V16 0
         #define GB_V8  0
         #define GB_V4  0

diff --git a/Source/Generated2/GB_AxB__any_div_fc32.c b/Source/Generated2/GB_AxB__any_div_fc32.c
@@ -433,12 +433,16 @@ GrB_Info GB (_AsaxbitB__any_div_fc32)
         // AVX512F: vector registers are 512 bits, or 64 bytes, which can hold
         // 16 floats or 8 doubles.
 
-        #define GB_V16 (16 * GB_CNBITS <= 512)
-        #define GB_V8  ( 8 * GB_CNBITS <= 512)
-        #define GB_V4  ( 4 * GB_CNBITS <= 512)
+        #define GB_V16_512 (16 * GB_CNBITS <= 512)
+        #define GB_V8_512  ( 8 * GB_CNBITS <= 512)
+        #define GB_V4_512  ( 4 * GB_CNBITS <= 512)
+
+        #define GB_V16 GB_V16_512
+        #define GB_V8  GB_V8_512
+        #define GB_V4  GB_V4_512
 
         #if GB_SEMIRING_HAS_AVX_IMPLEMENTATION && GB_COMPILER_SUPPORTS_AVX512F \
-            && GB_V4
+            && GB_V4_512
 
             GB_TARGET_AVX512F static inline void GB_AxB_saxpy5_unrolled_avx512f
             (
@@ -463,15 +467,20 @@ GrB_Info GB (_AsaxbitB__any_div_fc32)
         // AVX2: vector registers are 256 bits, or 32 bytes, which can hold
         // 8 floats or 4 doubles.
 
+        #define GB_V16_256 (16 * GB_CNBITS <= 256)
+        #define GB_V8_256  ( 8 * GB_CNBITS <= 256)
+        #define GB_V4_256  ( 4 * GB_CNBITS <= 256)
+
         #undef  GB_V16
         #undef  GB_V8
         #undef  GB_V4
-        #define GB_V16 (16 * GB_CNBITS <= 256)
-        #define GB_V8  ( 8 * GB_CNBITS <= 256)
-        #define GB_V4  ( 4 * GB_CNBITS <= 256)
+
+        #define GB_V16 GB_V16_256
+        #define GB_V8  GB_V8_256
+        #define GB_V4  GB_V4_256
 
         #if GB_SEMIRING_HAS_AVX_IMPLEMENTATION && GB_COMPILER_SUPPORTS_AVX2 \
-            && GB_V4
+            && GB_V4_256
 
             GB_TARGET_AVX2 static inline void GB_AxB_saxpy5_unrolled_avx2
             (
@@ -496,6 +505,7 @@ GrB_Info GB (_AsaxbitB__any_div_fc32)
         #undef  GB_V16
         #undef  GB_V8
         #undef  GB_V4
+
         #define GB_V16 0
         #define GB_V8  0
         #define GB_V4  0

diff --git a/Source/Generated2/GB_AxB__any_div_fc64.c b/Source/Generated2/GB_AxB__any_div_fc64.c
@@ -433,12 +433,16 @@ GrB_Info GB (_AsaxbitB__any_div_fc64)
         // AVX512F: vector registers are 512 bits, or 64 bytes, which can hold
         // 16 floats or 8 doubles.
 
-        #define GB_V16 (16 * GB_CNBITS <= 512)
-        #define GB_V8  ( 8 * GB_CNBITS <= 512)
-        #define GB_V4  ( 4 * GB_CNBITS <= 512)
+        #define GB_V16_512 (16 * GB_CNBITS <= 512)
+        #define GB_V8_512  ( 8 * GB_CNBITS <= 512)
+        #define GB_V4_512  ( 4 * GB_CNBITS <= 512)
+
+        #define GB_V16 GB_V16_512
+        #define GB_V8  GB_V8_512
+        #define GB_V4  GB_V4_512
 
         #if GB_SEMIRING_HAS_AVX_IMPLEMENTATION && GB_COMPILER_SUPPORTS_AVX512F \
-            && GB_V4
+            && GB_V4_512
 
             GB_TARGET_AVX512F static inline void GB_AxB_saxpy5_unrolled_avx512f
             (
@@ -463,15 +467,20 @@ GrB_Info GB (_AsaxbitB__any_div_fc64)
         // AVX2: vector registers are 256 bits, or 32 bytes, which can hold
         // 8 floats or 4 doubles.
 
+        #define GB_V16_256 (16 * GB_CNBITS <= 256)
+        #define GB_V8_256  ( 8 * GB_CNBITS <= 256)
+        #define GB_V4_256  ( 4 * GB_CNBITS <= 256)
+
         #undef  GB_V16
         #undef  GB_V8
         #undef  GB_V4
-        #define GB_V16 (16 * GB_CNBITS <= 256)
-        #define GB_V8  ( 8 * GB_CNBITS <= 256)
-        #define GB_V4  ( 4 * GB_CNBITS <= 256)
+
+        #define GB_V16 GB_V16_256
+        #define GB_V8  GB_V8_256
+        #define GB_V4  GB_V4_256
 
         #if GB_SEMIRING_HAS_AVX_IMPLEMENTATION && GB_COMPILER_SUPPORTS_AVX2 \
-            && GB_V4
+            && GB_V4_256
 
             GB_TARGET_AVX2 static inline void GB_AxB_saxpy5_unrolled_avx2
             (
@@ -496,6 +505,7 @@ GrB_Info GB (_AsaxbitB__any_div_fc64)
         #undef  GB_V16
         #undef  GB_V8
         #undef  GB_V4
+
         #define GB_V16 0
         #define GB_V8  0
         #define GB_V4  0

diff --git a/Source/Generated2/GB_AxB__any_div_fp32.c b/Source/Generated2/GB_AxB__any_div_fp32.c
@@ -433,12 +433,16 @@ GrB_Info GB (_AsaxbitB__any_div_fp32)
         // AVX512F: vector registers are 512 bits, or 64 bytes, which can hold
         // 16 floats or 8 doubles.
 
-        #define GB_V16 (16 * GB_CNBITS <= 512)
-        #define GB_V8  ( 8 * GB_CNBITS <= 512)
-        #define GB_V4  ( 4 * GB_CNBITS <= 512)
+        #define GB_V16_512 (16 * GB_CNBITS <= 512)
+        #define GB_V8_512  ( 8 * GB_CNBITS <= 512)
+        #define GB_V4_512  ( 4 * GB_CNBITS <= 512)
+
+        #define GB_V16 GB_V16_512
+        #define GB_V8  GB_V8_512
+        #define GB_V4  GB_V4_512
 
         #if GB_SEMIRING_HAS_AVX_IMPLEMENTATION && GB_COMPILER_SUPPORTS_AVX512F \
-            && GB_V4
+            && GB_V4_512
 
             GB_TARGET_AVX512F static inline void GB_AxB_saxpy5_unrolled_avx512f
             (
@@ -463,15 +467,20 @@ GrB_Info GB (_AsaxbitB__any_div_fp32)
         // AVX2: vector registers are 256 bits, or 32 bytes, which can hold
         // 8 floats or 4 doubles.
 
+        #define GB_V16_256 (16 * GB_CNBITS <= 256)
+        #define GB_V8_256  ( 8 * GB_CNBITS <= 256)
+        #define GB_V4_256  ( 4 * GB_CNBITS <= 256)
+
         #undef  GB_V16
         #undef  GB_V8
         #undef  GB_V4
-        #define GB_V16 (16 * GB_CNBITS <= 256)
-        #define GB_V8  ( 8 * GB_CNBITS <= 256)
-        #define GB_V4  ( 4 * GB_CNBITS <= 256)
+
+        #define GB_V16 GB_V16_256
+        #define GB_V8  GB_V8_256
+        #define GB_V4  GB_V4_256
 
         #if GB_SEMIRING_HAS_AVX_IMPLEMENTATION && GB_COMPILER_SUPPORTS_AVX2 \
-            && GB_V4
+            && GB_V4_256
 
             GB_TARGET_AVX2 static inline void GB_AxB_saxpy5_unrolled_avx2
             (
@@ -496,6 +505,7 @@ GrB_Info GB (_AsaxbitB__any_div_fp32)
         #undef  GB_V16
         #undef  GB_V8
         #undef  GB_V4
+
         #define GB_V16 0
         #define GB_V8  0
         #define GB_V4  0