Skip to content

Commit

Permalink
Merge pull request #83 from DrTimothyAldenDavis/master
Browse files Browse the repository at this point in the history
Master
  • Loading branch information
DrTimothyAldenDavis authored Dec 29, 2021
2 parents 74daf51 + 599a6cb commit 7d54a26
Show file tree
Hide file tree
Showing 1,081 changed files with 19,271 additions and 8,560 deletions.
4 changes: 2 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ endif ( )
set ( CMAKE_MACOSX_RPATH TRUE )

# version of SuiteSparse:GraphBLAS
set ( GraphBLAS_DATE "Dec 26, 2021")
set ( GraphBLAS_DATE "Dec 28, 2021")
set ( GraphBLAS_VERSION_MAJOR 6 )
set ( GraphBLAS_VERSION_MINOR 1 )
set ( GraphBLAS_VERSION_SUB 0 )
set ( GraphBLAS_VERSION_SUB 1 )

message ( STATUS "Building SuiteSparse:GraphBLAS version: v" ${GraphBLAS_VERSION_MAJOR}.${GraphBLAS_VERSION_MINOR}.${GraphBLAS_VERSION_SUB} " date: " ${GraphBLAS_DATE} )

Expand Down
9 changes: 7 additions & 2 deletions Doc/ChangeLog
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
Versions 6.1.0 and 5.3.0, Dec 26, 2021
Version 6.1.1, Dec 28, 2021

* minor revision to AVX2 and AVX512f selection
* cpu_features/Makefile: remove test of list_cpu_features

Version 6.1.0, Dec 26, 2021

* added GxB_get options: compiler name and version
* added package: https://github.com/google/cpu_features,
Oct 29, 2021 version
Nov 30, 2021 version
* performance: faster C+=A*B when C is full, A is bitmap/full, and B is
sparse/hyper; added saxpy5 kernel. faster C+=A'*B (dot4 kernel).
* bug fix: deserialization of iso and empty matrices/vectors was broken
Expand Down
Binary file modified Doc/GraphBLAS_UserGuide.pdf
Binary file not shown.
12 changes: 10 additions & 2 deletions Doc/GraphBLAS_UserGuide.tex
Original file line number Diff line number Diff line change
Expand Up @@ -148,12 +148,20 @@ \subsection{Release Notes}

\begin{itemize}

\item Versions 6.1.0 and 5.3.0 (Dec 26, 2021)
\item Version 6.1.1 (Dec 28, 2021) % FIXME

\begin{packed_itemize}
\item minor revision to AVX2 and AVX512f selection
\item \verb'cpu_features/Makefile': remove test of \verb'list_cpu_features'
so that the package can be built when cross-compiling
\end{packed_itemize}

\item Versions 6.1.0 (Dec 26, 2021)

\begin{packed_itemize}
\item added \verb'GxB_get' options: compiler name and version.
\item added package: \url{https://github.com/google/cpu_features},
Oct 29, 2021 version.
Nov 30, 2021 version.
\item performance: faster \verb'C+=A*B' when \verb'C' is full,
\verb'A' is bitmap/full, and \verb'B' is sparse/hyper. % saxpy5
Faster \verb"C+=A'*B" when
Expand Down
4 changes: 2 additions & 2 deletions Doc/GraphBLAS_version.tex
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
% version of SuiteSparse:GraphBLAS
\date{VERSION
6.1.0,
Dec 26, 2021}
6.1.1,
Dec 28, 2021}

4 changes: 2 additions & 2 deletions GraphBLAS/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@ endif ( )
set ( CMAKE_MACOSX_RPATH TRUE )

# version of SuiteSparse:GraphBLAS (must match ../CMakeLists.txt)
set ( GraphBLAS_DATE "Dec 26, 2021")
set ( GraphBLAS_DATE "Dec 28, 2021")
set ( GraphBLAS_VERSION_MAJOR 6 )
set ( GraphBLAS_VERSION_MINOR 1 )
set ( GraphBLAS_VERSION_SUB 0 )
set ( GraphBLAS_VERSION_SUB 1 )

message ( STATUS "Building SuiteSparse:GraphBLAS version: v" ${GraphBLAS_VERSION_MAJOR}.${GraphBLAS_VERSION_MINOR}.${GraphBLAS_VERSION_SUB} " date: " ${GraphBLAS_DATE} )

Expand Down
4 changes: 2 additions & 2 deletions Include/GraphBLAS.h
Original file line number Diff line number Diff line change
Expand Up @@ -206,10 +206,10 @@

// The version of this implementation, and the GraphBLAS API version:
#define GxB_IMPLEMENTATION_NAME "SuiteSparse:GraphBLAS"
#define GxB_IMPLEMENTATION_DATE "Dec 26, 2021"
#define GxB_IMPLEMENTATION_DATE "Dec 28, 2021"
#define GxB_IMPLEMENTATION_MAJOR 6
#define GxB_IMPLEMENTATION_MINOR 1
#define GxB_IMPLEMENTATION_SUB 0
#define GxB_IMPLEMENTATION_SUB 1
#define GxB_SPEC_DATE "Nov 15, 2021"
#define GxB_SPEC_MAJOR 2
#define GxB_SPEC_MINOR 0
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ For the GraphBLAS/GraphBLAS Octave/MATLAB interface *only*:
SPDX-License-Identifier: GPL-3.0-or-later
(see below for a discussion of the licensing of this package).

VERSION 6.1.0, Dec 26, 2021
VERSION 6.1.1, Dec 28, 2021

SuiteSparse:GraphBLAS is a complete implementation of the GraphBLAS standard,
which defines a set of sparse matrix operations on an extended algebra of
Expand Down
2 changes: 1 addition & 1 deletion Source/GB_Global.c
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,7 @@ bool GB_Global_GrB_init_called_get (void)
GB_PUBLIC
void GB_Global_cpu_features_query (void)
{
#if defined ( CPU_FEATURES_ARCH_X86 )
#if defined ( CPU_FEATURES_ARCH_X86_64 )
X86Features features = GetX86Info ( ).features ;
GB_Global.cpu_features_avx2 = (bool) (features.avx2) ;
GB_Global.cpu_features_avx512f = (bool) (features.avx512f) ;
Expand Down
13 changes: 10 additions & 3 deletions Source/GB_compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,8 @@
//------------------------------------------------------------------------------

// gcc 7.5.0 cannot compile code with __attribute__ ((target ("avx512f"))), or
// avx2, but those targets fine with gcc 9.3.0 or later.
// avx2, but those targets are fine with gcc 9.3.0 or later. It might be OK
// on gcc 8.x but I haven't tested this.

#if defined ( CPU_FEATURES_ARCH_X86_64 )

Expand All @@ -259,10 +260,16 @@
#define GB_COMPILER_SUPPORTS_AVX512F 0
#define GB_COMPILER_SUPPORTS_AVX2 0
#endif
#else
// assume all other compilers can handle AVX512F and AVX2 on x86
#elif GB_COMPILER_ICX || GB_COMPILER_ICC || GB_COMPILER_CLANG || \
GB_COMPILER_GCC || GB_COMPILER_MSC
// all these compilers can handle AVX512F and AVX2 on x86
#define GB_COMPILER_SUPPORTS_AVX512F 1
#define GB_COMPILER_SUPPORTS_AVX2 1
#else
// unsure if xlc can handle AVX, but it is not likely to be used on
// the x86 anyay
#define GB_COMPILER_SUPPORTS_AVX512F 0
#define GB_COMPILER_SUPPORTS_AVX2 0
#endif

#else
Expand Down
26 changes: 18 additions & 8 deletions Source/Generated1/GB_AxB__any_pair_iso.c
Original file line number Diff line number Diff line change
Expand Up @@ -433,12 +433,16 @@ GrB_Info GB (_AsaxbitB__any_pair_iso)
// AVX512F: vector registers are 512 bits, or 64 bytes, which can hold
// 16 floats or 8 doubles.

#define GB_V16 (16 * GB_CNBITS <= 512)
#define GB_V8 ( 8 * GB_CNBITS <= 512)
#define GB_V4 ( 4 * GB_CNBITS <= 512)
#define GB_V16_512 (16 * GB_CNBITS <= 512)
#define GB_V8_512 ( 8 * GB_CNBITS <= 512)
#define GB_V4_512 ( 4 * GB_CNBITS <= 512)

#define GB_V16 GB_V16_512
#define GB_V8 GB_V8_512
#define GB_V4 GB_V4_512

#if GB_SEMIRING_HAS_AVX_IMPLEMENTATION && GB_COMPILER_SUPPORTS_AVX512F \
&& GB_V4
&& GB_V4_512

GB_TARGET_AVX512F static inline void GB_AxB_saxpy5_unrolled_avx512f
(
Expand All @@ -463,15 +467,20 @@ GrB_Info GB (_AsaxbitB__any_pair_iso)
// AVX2: vector registers are 256 bits, or 32 bytes, which can hold
// 8 floats or 4 doubles.

#define GB_V16_256 (16 * GB_CNBITS <= 256)
#define GB_V8_256 ( 8 * GB_CNBITS <= 256)
#define GB_V4_256 ( 4 * GB_CNBITS <= 256)

#undef GB_V16
#undef GB_V8
#undef GB_V4
#define GB_V16 (16 * GB_CNBITS <= 256)
#define GB_V8 ( 8 * GB_CNBITS <= 256)
#define GB_V4 ( 4 * GB_CNBITS <= 256)

#define GB_V16 GB_V16_256
#define GB_V8 GB_V8_256
#define GB_V4 GB_V4_256

#if GB_SEMIRING_HAS_AVX_IMPLEMENTATION && GB_COMPILER_SUPPORTS_AVX2 \
&& GB_V4
&& GB_V4_256

GB_TARGET_AVX2 static inline void GB_AxB_saxpy5_unrolled_avx2
(
Expand All @@ -496,6 +505,7 @@ GrB_Info GB (_AsaxbitB__any_pair_iso)
#undef GB_V16
#undef GB_V8
#undef GB_V4

#define GB_V16 0
#define GB_V8 0
#define GB_V4 0
Expand Down
26 changes: 18 additions & 8 deletions Source/Generated2/GB_AxB__any_div_fc32.c
Original file line number Diff line number Diff line change
Expand Up @@ -433,12 +433,16 @@ GrB_Info GB (_AsaxbitB__any_div_fc32)
// AVX512F: vector registers are 512 bits, or 64 bytes, which can hold
// 16 floats or 8 doubles.

#define GB_V16 (16 * GB_CNBITS <= 512)
#define GB_V8 ( 8 * GB_CNBITS <= 512)
#define GB_V4 ( 4 * GB_CNBITS <= 512)
#define GB_V16_512 (16 * GB_CNBITS <= 512)
#define GB_V8_512 ( 8 * GB_CNBITS <= 512)
#define GB_V4_512 ( 4 * GB_CNBITS <= 512)

#define GB_V16 GB_V16_512
#define GB_V8 GB_V8_512
#define GB_V4 GB_V4_512

#if GB_SEMIRING_HAS_AVX_IMPLEMENTATION && GB_COMPILER_SUPPORTS_AVX512F \
&& GB_V4
&& GB_V4_512

GB_TARGET_AVX512F static inline void GB_AxB_saxpy5_unrolled_avx512f
(
Expand All @@ -463,15 +467,20 @@ GrB_Info GB (_AsaxbitB__any_div_fc32)
// AVX2: vector registers are 256 bits, or 32 bytes, which can hold
// 8 floats or 4 doubles.

#define GB_V16_256 (16 * GB_CNBITS <= 256)
#define GB_V8_256 ( 8 * GB_CNBITS <= 256)
#define GB_V4_256 ( 4 * GB_CNBITS <= 256)

#undef GB_V16
#undef GB_V8
#undef GB_V4
#define GB_V16 (16 * GB_CNBITS <= 256)
#define GB_V8 ( 8 * GB_CNBITS <= 256)
#define GB_V4 ( 4 * GB_CNBITS <= 256)

#define GB_V16 GB_V16_256
#define GB_V8 GB_V8_256
#define GB_V4 GB_V4_256

#if GB_SEMIRING_HAS_AVX_IMPLEMENTATION && GB_COMPILER_SUPPORTS_AVX2 \
&& GB_V4
&& GB_V4_256

GB_TARGET_AVX2 static inline void GB_AxB_saxpy5_unrolled_avx2
(
Expand All @@ -496,6 +505,7 @@ GrB_Info GB (_AsaxbitB__any_div_fc32)
#undef GB_V16
#undef GB_V8
#undef GB_V4

#define GB_V16 0
#define GB_V8 0
#define GB_V4 0
Expand Down
26 changes: 18 additions & 8 deletions Source/Generated2/GB_AxB__any_div_fc64.c
Original file line number Diff line number Diff line change
Expand Up @@ -433,12 +433,16 @@ GrB_Info GB (_AsaxbitB__any_div_fc64)
// AVX512F: vector registers are 512 bits, or 64 bytes, which can hold
// 16 floats or 8 doubles.

#define GB_V16 (16 * GB_CNBITS <= 512)
#define GB_V8 ( 8 * GB_CNBITS <= 512)
#define GB_V4 ( 4 * GB_CNBITS <= 512)
#define GB_V16_512 (16 * GB_CNBITS <= 512)
#define GB_V8_512 ( 8 * GB_CNBITS <= 512)
#define GB_V4_512 ( 4 * GB_CNBITS <= 512)

#define GB_V16 GB_V16_512
#define GB_V8 GB_V8_512
#define GB_V4 GB_V4_512

#if GB_SEMIRING_HAS_AVX_IMPLEMENTATION && GB_COMPILER_SUPPORTS_AVX512F \
&& GB_V4
&& GB_V4_512

GB_TARGET_AVX512F static inline void GB_AxB_saxpy5_unrolled_avx512f
(
Expand All @@ -463,15 +467,20 @@ GrB_Info GB (_AsaxbitB__any_div_fc64)
// AVX2: vector registers are 256 bits, or 32 bytes, which can hold
// 8 floats or 4 doubles.

#define GB_V16_256 (16 * GB_CNBITS <= 256)
#define GB_V8_256 ( 8 * GB_CNBITS <= 256)
#define GB_V4_256 ( 4 * GB_CNBITS <= 256)

#undef GB_V16
#undef GB_V8
#undef GB_V4
#define GB_V16 (16 * GB_CNBITS <= 256)
#define GB_V8 ( 8 * GB_CNBITS <= 256)
#define GB_V4 ( 4 * GB_CNBITS <= 256)

#define GB_V16 GB_V16_256
#define GB_V8 GB_V8_256
#define GB_V4 GB_V4_256

#if GB_SEMIRING_HAS_AVX_IMPLEMENTATION && GB_COMPILER_SUPPORTS_AVX2 \
&& GB_V4
&& GB_V4_256

GB_TARGET_AVX2 static inline void GB_AxB_saxpy5_unrolled_avx2
(
Expand All @@ -496,6 +505,7 @@ GrB_Info GB (_AsaxbitB__any_div_fc64)
#undef GB_V16
#undef GB_V8
#undef GB_V4

#define GB_V16 0
#define GB_V8 0
#define GB_V4 0
Expand Down
26 changes: 18 additions & 8 deletions Source/Generated2/GB_AxB__any_div_fp32.c
Original file line number Diff line number Diff line change
Expand Up @@ -433,12 +433,16 @@ GrB_Info GB (_AsaxbitB__any_div_fp32)
// AVX512F: vector registers are 512 bits, or 64 bytes, which can hold
// 16 floats or 8 doubles.

#define GB_V16 (16 * GB_CNBITS <= 512)
#define GB_V8 ( 8 * GB_CNBITS <= 512)
#define GB_V4 ( 4 * GB_CNBITS <= 512)
#define GB_V16_512 (16 * GB_CNBITS <= 512)
#define GB_V8_512 ( 8 * GB_CNBITS <= 512)
#define GB_V4_512 ( 4 * GB_CNBITS <= 512)

#define GB_V16 GB_V16_512
#define GB_V8 GB_V8_512
#define GB_V4 GB_V4_512

#if GB_SEMIRING_HAS_AVX_IMPLEMENTATION && GB_COMPILER_SUPPORTS_AVX512F \
&& GB_V4
&& GB_V4_512

GB_TARGET_AVX512F static inline void GB_AxB_saxpy5_unrolled_avx512f
(
Expand All @@ -463,15 +467,20 @@ GrB_Info GB (_AsaxbitB__any_div_fp32)
// AVX2: vector registers are 256 bits, or 32 bytes, which can hold
// 8 floats or 4 doubles.

#define GB_V16_256 (16 * GB_CNBITS <= 256)
#define GB_V8_256 ( 8 * GB_CNBITS <= 256)
#define GB_V4_256 ( 4 * GB_CNBITS <= 256)

#undef GB_V16
#undef GB_V8
#undef GB_V4
#define GB_V16 (16 * GB_CNBITS <= 256)
#define GB_V8 ( 8 * GB_CNBITS <= 256)
#define GB_V4 ( 4 * GB_CNBITS <= 256)

#define GB_V16 GB_V16_256
#define GB_V8 GB_V8_256
#define GB_V4 GB_V4_256

#if GB_SEMIRING_HAS_AVX_IMPLEMENTATION && GB_COMPILER_SUPPORTS_AVX2 \
&& GB_V4
&& GB_V4_256

GB_TARGET_AVX2 static inline void GB_AxB_saxpy5_unrolled_avx2
(
Expand All @@ -496,6 +505,7 @@ GrB_Info GB (_AsaxbitB__any_div_fp32)
#undef GB_V16
#undef GB_V8
#undef GB_V4

#define GB_V16 0
#define GB_V8 0
#define GB_V4 0
Expand Down
Loading

0 comments on commit 7d54a26

Please sign in to comment.