From ef290bd25a2e28bdc42948a6e798cf1be44933a8 Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Sat, 17 Aug 2019 14:35:58 -0500 Subject: [PATCH 01/16] Work around gcc 9 compiler bug. --- src/external/marray/include/varray_base.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/external/marray/include/varray_base.hpp b/src/external/marray/include/varray_base.hpp index dfd6c0214..d8389c1f1 100644 --- a/src/external/marray/include/varray_base.hpp +++ b/src/external/marray/include/varray_base.hpp @@ -332,7 +332,7 @@ class varray_base std::initializer_list stride) { return is_contiguous, - std::initializer_list>(len, stride); + std::initializer_list,void>(len, stride); } template Date: Wed, 1 Apr 2020 12:08:12 -0500 Subject: [PATCH 02/16] Update .travis.yml --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index c92212065..f0783b44a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -74,5 +74,5 @@ script: - if [ $RUN_TEST -eq 1 ]; then travis_wait 30 bin/test; fi - 'if [ $TEST_INSTALL -eq 1 ]; then make install; - $CXX -std=c++11 -o test_install -I./install/include test/test_install.cxx -L./install/lib -ltblis; + $CXX -std=c++14 -o test_install -I./install/include test/test_install.cxx -L./install/lib -ltblis; fi' From 519a9367c0a1c2c5465485e44decd18a408d38f9 Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Tue, 5 May 2020 14:45:45 -0500 Subject: [PATCH 03/16] Add missing #include. --- src/nodes/matrify.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/nodes/matrify.hpp b/src/nodes/matrify.hpp index ab981857e..45a0972b5 100644 --- a/src/nodes/matrify.hpp +++ b/src/nodes/matrify.hpp @@ -11,6 +11,8 @@ #include "configs/configs.hpp" +#include + namespace tblis { From cbd8a8e2d66cd7b38e5f4e216615de5ae0baba81 Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Tue, 5 May 2020 15:27:39 -0500 Subject: [PATCH 04/16] Bump llvm version on Travis macOS. --- .travis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index f0783b44a..2405e852b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -33,9 +33,9 @@ matrix: before_install: - 'if [ "$TRAVIS_OS_NAME" = "osx" ]; then rm /usr/local/include/c++; - brew update && brew install gcc@6 tbb llvm@3.9; + brew update && brew install gcc@6 tbb llvm@9; rm /usr/local/include/c++; - brew link --force --overwrite llvm@3.9; + brew link --force --overwrite llvm@9; fi' addons: @@ -60,7 +60,7 @@ install: if [ "$TRAVIS_OS_NAME" = "linux" ]; then export CC="clang-3.9 -fopenmp=libiomp5" CXX="clang++-3.9 -fopenmp=libiomp5"; else - export CC="clang-3.9" CXX="clang++"; + export CC="clang-9" CXX="clang++"; fi; fi' From 0b1686640876fffb003ba2e217f8d0eaa09f120b Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Wed, 27 May 2020 15:32:52 -0500 Subject: [PATCH 05/16] Separate _check() functions and kernel definitions... ...and use generic flags for the former. Fixes #24. --- Makefile.am | 53 +- Makefile.in | 716 ++++++++++++++----------- src/configs/bulldozer/config.cxx | 2 - src/configs/bulldozer/config_ker.cxx | 9 + src/configs/core2/config.cxx | 2 - src/configs/core2/config_ker.cxx | 9 + src/configs/excavator/config.cxx | 2 - src/configs/excavator/config_ker.cxx | 9 + src/configs/haswell/config.cxx | 11 +- src/configs/haswell/config_ker.cxx | 12 + src/configs/knl/config.cxx | 118 +--- src/configs/knl/config_ker.cxx | 113 ++++ src/configs/piledriver/config.cxx | 12 +- src/configs/piledriver/config_ker.cxx | 9 + src/configs/sandybridge/config.cxx | 6 +- src/configs/sandybridge/config_ker.cxx | 9 + src/configs/skx1/config.cxx | 22 +- src/configs/skx1/config_ker.cxx | 10 + src/configs/skx2/config.cxx | 38 +- src/configs/skx2/config_ker.cxx | 26 + src/configs/zen/config.cxx | 12 +- src/configs/zen/config_ker.cxx | 10 + 22 files changed, 678 insertions(+), 532 deletions(-) create mode 100644 src/configs/bulldozer/config_ker.cxx create mode 100644 src/configs/core2/config_ker.cxx create mode 100644 src/configs/excavator/config_ker.cxx create mode 100644 src/configs/haswell/config_ker.cxx create mode 100644 src/configs/knl/config_ker.cxx create mode 100644 src/configs/piledriver/config_ker.cxx create mode 100644 src/configs/sandybridge/config_ker.cxx create mode 100644 src/configs/skx1/config_ker.cxx create mode 100644 src/configs/skx2/config_ker.cxx create mode 100644 src/configs/zen/config_ker.cxx diff --git a/Makefile.am b/Makefile.am index 1ed8183a9..8dc5e0af7 100644 --- a/Makefile.am +++ b/Makefile.am @@ -222,8 +222,9 @@ endif if ENABLE_BULLDOZER noinst_LTLIBRARIES += lib/libbulldozer.la lib_libtblis_la_LIBADD += lib/libbulldozer.la +lib_libtblis_la_SOURCES += src/configs/bulldozer/config.cxx lib_libbulldozer_la_SOURCES = src/configs/bulldozer/bli_gemm_asm_d4x6_fma4.c \ - src/configs/bulldozer/config.cxx + src/configs/bulldozer/config_ker.cxx lib_libbulldozer_la_CFLAGS = -O3 -mavx -mfma4 -march=bdver1 -mfpmath=sse lib_libbulldozer_la_CXXFLAGS = -O3 -mavx -mfma4 -march=bdver1 -mfpmath=sse endif @@ -231,8 +232,9 @@ endif if ENABLE_PILEDRIVER noinst_LTLIBRARIES += lib/libpiledriver.la lib_libtblis_la_LIBADD += lib/libpiledriver.la +lib_libtblis_la_SOURCES += src/configs/piledriver/config.cxx lib_libpiledriver_la_SOURCES = src/configs/piledriver/bli_gemm_asm_d8x3.c \ - src/configs/piledriver/config.cxx + src/configs/piledriver/config_ker.cxx lib_libpiledriver_la_CFLAGS = -O3 -mavx -mfma -mfma4 -march=bdver2 -mfpmath=sse lib_libpiledriver_la_CXXFLAGS = -O3 -mavx -mfma -mfma4 -march=bdver2 -mfpmath=sse endif @@ -240,11 +242,10 @@ endif if ENABLE_EXCAVATOR noinst_LTLIBRARIES += lib/libexcavator.la lib_libtblis_la_LIBADD += lib/libexcavator.la +lib_libtblis_la_SOURCES += src/configs/excavator/config.cxx +lib_libexcavator_la_SOURCES = src/configs/excavator/config_ker.cxx if !ENABLE_PILEDRIVER -lib_libexcavator_la_SOURCES = src/configs/excavator/bli_gemm_asm_d8x3.c \ - src/configs/excavator/config.cxx -else -lib_libexcavator_la_SOURCES = src/configs/excavator/config.cxx +lib_libexcavator_la_SOURCES += src/configs/excavator/bli_gemm_asm_d8x3.c endif lib_libexcavator_la_CFLAGS = -O3 -mavx -mavx2 -mfma -march=bdver4 -mfpmath=sse lib_libexcavator_la_CXXFLAGS = -O3 -mavx -mavx2 -mfma -march=bdver4 -mfpmath=sse @@ -253,11 +254,10 @@ endif if ENABLE_ZEN noinst_LTLIBRARIES += lib/libzen.la lib_libtblis_la_LIBADD += lib/libzen.la +lib_libtblis_la_SOURCES += src/configs/zen/config.cxx +lib_libzen_la_SOURCES = src/configs/zen/config_ker.cxx if !ENABLE_HASWELL -lib_libzen_la_SOURCES = src/configs/haswell/bli_gemm_asm_d6x8.c \ - src/configs/zen/config.cxx -else -lib_libzen_la_SOURCES = src/configs/zen/config.cxx +lib_libzen_la_SOURCES += src/configs/haswell/bli_gemm_asm_d6x8.c endif lib_libzen_la_CFLAGS = -O3 -mavx -mavx2 -mfma -march=znver1 -mfpmath=sse lib_libzen_la_CXXFLAGS = -O3 -mavx -mavx2 -mfma -march=znver1 -mfpmath=sse @@ -270,8 +270,9 @@ endif if ENABLE_CORE2 noinst_LTLIBRARIES += lib/libcore2.la lib_libtblis_la_LIBADD += lib/libcore2.la +lib_libtblis_la_SOURCES += src/configs/core2/config.cxx lib_libcore2_la_SOURCES = src/configs/core2/bli_gemm_asm_d4x4.c \ - src/configs/core2/config.cxx + src/configs/core2/config_ker.cxx if ENABLE_INTEL_COMPILER lib_libcore2_la_CFLAGS = -O3 -xSSSE3 lib_libcore2_la_CXXFLAGS = -O3 -xSSSE3 @@ -284,8 +285,9 @@ endif if ENABLE_SANDYBRIDGE noinst_LTLIBRARIES += lib/libsandybridge.la lib_libtblis_la_LIBADD += lib/libsandybridge.la +lib_libtblis_la_SOURCES += src/configs/sandybridge/config.cxx lib_libsandybridge_la_SOURCES = src/configs/sandybridge/bli_gemm_asm_d8x4.c \ - src/configs/sandybridge/config.cxx + src/configs/sandybridge/config_ker.cxx if ENABLE_INTEL_COMPILER lib_libsandybridge_la_CFLAGS = -O3 -xAVX lib_libsandybridge_la_CXXFLAGS = -O3 -xAVX @@ -298,8 +300,9 @@ endif if ENABLE_HASWELL noinst_LTLIBRARIES += lib/libhaswell.la lib_libtblis_la_LIBADD += lib/libhaswell.la +lib_libtblis_la_SOURCES += src/configs/haswell/config.cxx lib_libhaswell_la_SOURCES = src/configs/haswell/bli_gemm_asm_d6x8.c \ - src/configs/haswell/config.cxx + src/configs/haswell/config_ker.cxx # src/configs/haswell/bli_gemm_asm_d12x4.c \ # src/configs/haswell/bli_gemm_asm_d8x6.c \ # src/configs/haswell/bli_gemm_asm_d4x12.c @@ -315,12 +318,13 @@ endif if ENABLE_KNL noinst_LTLIBRARIES += lib/libknl.la lib_libtblis_la_LIBADD += lib/libknl.la +lib_libtblis_la_SOURCES += src/configs/knl/config.cxx lib_libknl_la_SOURCES = src/configs/knl/bli_spackm_opt_24x16.c \ src/configs/knl/bli_dpackm_opt_24x8.c \ src/configs/knl/bli_dpackm_opt_30x8.c \ src/configs/knl/bli_sgemm_opt_24x16.c \ src/configs/knl/bli_dgemm_opt_24x8.c \ - src/configs/knl/config.cxx + src/configs/knl/config_ker.cxx # src/configs/knl/bli_dgemm_opt_12x16.c \ # src/configs/knl/bli_dgemm_opt_30x8.c \ # src/configs/knl/bli_dgemm_opt_8x24.c \ @@ -343,22 +347,13 @@ endif if ENABLE_SKX1 noinst_LTLIBRARIES += lib/libskx1.la lib_libtblis_la_LIBADD += lib/libskx1.la +lib_libtblis_la_SOURCES += src/configs/skx1/config.cxx +lib_libskx1_la_SOURCES = src/configs/skx1/config_ker.cxx if !ENABLE_SKX2 -if !ENABLE_HASWELL -lib_libskx1_la_SOURCES = src/configs/haswell/bli_gemm_asm_d6x8.c \ - src/configs/skx2/vpu_count.cxx \ - src/configs/skx1/config.cxx -else -lib_libskx1_la_SOURCES = src/configs/skx2/vpu_count.cxx \ - src/configs/skx1/config.cxx +lib_libtblis_la_SOURCES += src/configs/skx2/vpu_count.cxx endif -else if !ENABLE_HASWELL -lib_libskx1_la_SOURCES = src/configs/haswell/bli_gemm_asm_d6x8.c \ - src/configs/skx1/config.cxx -else -lib_libskx1_la_SOURCES = src/configs/skx1/config.cxx -endif +lib_libskx1_la_SOURCES += src/configs/haswell/bli_gemm_asm_d6x8.c endif if ENABLE_INTEL_COMPILER lib_libskx1_la_CFLAGS = -O3 -xCORE-AVX512 @@ -377,10 +372,12 @@ endif if ENABLE_SKX2 noinst_LTLIBRARIES += lib/libskx2.la lib_libtblis_la_LIBADD += lib/libskx2.la +lib_libtblis_la_SOURCES += src/configs/skx2/vpu_count.cxx \ + src/configs/skx2/config.cxx lib_libskx2_la_SOURCES = src/configs/skx2/bli_sgemm_opt_12x32_l2.c \ src/configs/skx2/bli_dgemm_opt_12x16_l2.c \ src/configs/skx2/vpu_count.cxx \ - src/configs/skx2/config.cxx + src/configs/skx2/config_ker.cxx # src/configs/skx2/bli_dgemm_opt_12x16_l1.c \ # src/configs/skx2/bli_dgemm_opt_8x8_l1.c \ # src/configs/skx2/bli_dgemm_opt_8x8_l2.c \ diff --git a/Makefile.in b/Makefile.in index 20afbc75c..ffd34d85e 100644 --- a/Makefile.in +++ b/Makefile.in @@ -97,33 +97,49 @@ host_triplet = @host@ # @ENABLE_BULLDOZER_TRUE@am__append_3 = lib/libbulldozer.la @ENABLE_BULLDOZER_TRUE@am__append_4 = lib/libbulldozer.la -@ENABLE_PILEDRIVER_TRUE@am__append_5 = lib/libpiledriver.la +@ENABLE_BULLDOZER_TRUE@am__append_5 = src/configs/bulldozer/config.cxx @ENABLE_PILEDRIVER_TRUE@am__append_6 = lib/libpiledriver.la -@ENABLE_EXCAVATOR_TRUE@am__append_7 = lib/libexcavator.la -@ENABLE_EXCAVATOR_TRUE@am__append_8 = lib/libexcavator.la -@ENABLE_ZEN_TRUE@am__append_9 = lib/libzen.la -@ENABLE_ZEN_TRUE@am__append_10 = lib/libzen.la +@ENABLE_PILEDRIVER_TRUE@am__append_7 = lib/libpiledriver.la +@ENABLE_PILEDRIVER_TRUE@am__append_8 = src/configs/piledriver/config.cxx +@ENABLE_EXCAVATOR_TRUE@am__append_9 = lib/libexcavator.la +@ENABLE_EXCAVATOR_TRUE@am__append_10 = lib/libexcavator.la +@ENABLE_EXCAVATOR_TRUE@am__append_11 = src/configs/excavator/config.cxx +@ENABLE_EXCAVATOR_TRUE@@ENABLE_PILEDRIVER_FALSE@am__append_12 = src/configs/excavator/bli_gemm_asm_d8x3.c +@ENABLE_ZEN_TRUE@am__append_13 = lib/libzen.la +@ENABLE_ZEN_TRUE@am__append_14 = lib/libzen.la +@ENABLE_ZEN_TRUE@am__append_15 = src/configs/zen/config.cxx +@ENABLE_HASWELL_FALSE@@ENABLE_ZEN_TRUE@am__append_16 = src/configs/haswell/bli_gemm_asm_d6x8.c # # Intel architectures # -@ENABLE_CORE2_TRUE@am__append_11 = lib/libcore2.la -@ENABLE_CORE2_TRUE@am__append_12 = lib/libcore2.la -@ENABLE_SANDYBRIDGE_TRUE@am__append_13 = lib/libsandybridge.la -@ENABLE_SANDYBRIDGE_TRUE@am__append_14 = lib/libsandybridge.la -@ENABLE_HASWELL_TRUE@am__append_15 = lib/libhaswell.la -@ENABLE_HASWELL_TRUE@am__append_16 = lib/libhaswell.la -@ENABLE_KNL_TRUE@am__append_17 = lib/libknl.la -@ENABLE_KNL_TRUE@am__append_18 = lib/libknl.la -@ENABLE_SKX1_TRUE@am__append_19 = lib/libskx1.la -@ENABLE_SKX1_TRUE@am__append_20 = lib/libskx1.la -@ENABLE_SKX2_TRUE@am__append_21 = lib/libskx2.la -@ENABLE_SKX2_TRUE@am__append_22 = lib/libskx2.la +@ENABLE_CORE2_TRUE@am__append_17 = lib/libcore2.la +@ENABLE_CORE2_TRUE@am__append_18 = lib/libcore2.la +@ENABLE_CORE2_TRUE@am__append_19 = src/configs/core2/config.cxx +@ENABLE_SANDYBRIDGE_TRUE@am__append_20 = lib/libsandybridge.la +@ENABLE_SANDYBRIDGE_TRUE@am__append_21 = lib/libsandybridge.la +@ENABLE_SANDYBRIDGE_TRUE@am__append_22 = src/configs/sandybridge/config.cxx +@ENABLE_HASWELL_TRUE@am__append_23 = lib/libhaswell.la +@ENABLE_HASWELL_TRUE@am__append_24 = lib/libhaswell.la +@ENABLE_HASWELL_TRUE@am__append_25 = src/configs/haswell/config.cxx +@ENABLE_KNL_TRUE@am__append_26 = lib/libknl.la +@ENABLE_KNL_TRUE@am__append_27 = lib/libknl.la +@ENABLE_KNL_TRUE@am__append_28 = src/configs/knl/config.cxx +@ENABLE_SKX1_TRUE@am__append_29 = lib/libskx1.la +@ENABLE_SKX1_TRUE@am__append_30 = lib/libskx1.la +@ENABLE_SKX1_TRUE@am__append_31 = src/configs/skx1/config.cxx +@ENABLE_SKX1_TRUE@@ENABLE_SKX2_FALSE@am__append_32 = src/configs/skx2/vpu_count.cxx +@ENABLE_HASWELL_FALSE@@ENABLE_SKX1_TRUE@am__append_33 = src/configs/haswell/bli_gemm_asm_d6x8.c +@ENABLE_SKX2_TRUE@am__append_34 = lib/libskx2.la +@ENABLE_SKX2_TRUE@am__append_35 = lib/libskx2.la +@ENABLE_SKX2_TRUE@am__append_36 = src/configs/skx2/vpu_count.cxx \ +@ENABLE_SKX2_TRUE@ src/configs/skx2/config.cxx + noinst_PROGRAMS = bin/test$(EXEEXT) $(am__EXEEXT_1) $(am__EXEEXT_2) \ $(am__EXEEXT_3) -@ENABLE_BLAS_TRUE@am__append_23 = bin/bench bin/batched_bench #bin/dpd_bench -@ENABLE_BLAS_TRUE@@ENABLE_SKX1_TRUE@am__append_24 = bin/skx_bench -@ENABLE_BLAS_TRUE@@ENABLE_SKX1_FALSE@@ENABLE_SKX2_TRUE@am__append_25 = bin/skx_bench +@ENABLE_BLAS_TRUE@am__append_37 = bin/bench bin/batched_bench #bin/dpd_bench +@ENABLE_BLAS_TRUE@@ENABLE_SKX1_TRUE@am__append_38 = bin/skx_bench +@ENABLE_BLAS_TRUE@@ENABLE_SKX1_FALSE@@ENABLE_SKX2_TRUE@am__append_39 = bin/skx_bench subdir = . ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/as-gcc-inline-assembly.m4 \ @@ -201,10 +217,10 @@ LTLIBRARIES = $(lib_LTLIBRARIES) $(noinst_LTLIBRARIES) lib_libbulldozer_la_LIBADD = am__lib_libbulldozer_la_SOURCES_DIST = \ src/configs/bulldozer/bli_gemm_asm_d4x6_fma4.c \ - src/configs/bulldozer/config.cxx + src/configs/bulldozer/config_ker.cxx am__dirstamp = $(am__leading_dot)dirstamp @ENABLE_BULLDOZER_TRUE@am_lib_libbulldozer_la_OBJECTS = src/configs/bulldozer/lib_libbulldozer_la-bli_gemm_asm_d4x6_fma4.lo \ -@ENABLE_BULLDOZER_TRUE@ src/configs/bulldozer/lib_libbulldozer_la-config.lo +@ENABLE_BULLDOZER_TRUE@ src/configs/bulldozer/lib_libbulldozer_la-config_ker.lo lib_libbulldozer_la_OBJECTS = $(am_lib_libbulldozer_la_OBJECTS) AM_V_lt = $(am__v_lt_@AM_V@) am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) @@ -218,9 +234,9 @@ lib_libbulldozer_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \ lib_libcore2_la_LIBADD = am__lib_libcore2_la_SOURCES_DIST = \ src/configs/core2/bli_gemm_asm_d4x4.c \ - src/configs/core2/config.cxx + src/configs/core2/config_ker.cxx @ENABLE_CORE2_TRUE@am_lib_libcore2_la_OBJECTS = src/configs/core2/lib_libcore2_la-bli_gemm_asm_d4x4.lo \ -@ENABLE_CORE2_TRUE@ src/configs/core2/lib_libcore2_la-config.lo +@ENABLE_CORE2_TRUE@ src/configs/core2/lib_libcore2_la-config_ker.lo lib_libcore2_la_OBJECTS = $(am_lib_libcore2_la_OBJECTS) lib_libcore2_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \ $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \ @@ -229,11 +245,11 @@ lib_libcore2_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \ @ENABLE_CORE2_TRUE@am_lib_libcore2_la_rpath = lib_libexcavator_la_LIBADD = am__lib_libexcavator_la_SOURCES_DIST = \ - src/configs/excavator/bli_gemm_asm_d8x3.c \ - src/configs/excavator/config.cxx -@ENABLE_EXCAVATOR_TRUE@@ENABLE_PILEDRIVER_FALSE@am_lib_libexcavator_la_OBJECTS = src/configs/excavator/lib_libexcavator_la-bli_gemm_asm_d8x3.lo \ -@ENABLE_EXCAVATOR_TRUE@@ENABLE_PILEDRIVER_FALSE@ src/configs/excavator/lib_libexcavator_la-config.lo -@ENABLE_EXCAVATOR_TRUE@@ENABLE_PILEDRIVER_TRUE@am_lib_libexcavator_la_OBJECTS = src/configs/excavator/lib_libexcavator_la-config.lo + src/configs/excavator/config_ker.cxx \ + src/configs/excavator/bli_gemm_asm_d8x3.c +@ENABLE_EXCAVATOR_TRUE@@ENABLE_PILEDRIVER_FALSE@am__objects_1 = src/configs/excavator/lib_libexcavator_la-bli_gemm_asm_d8x3.lo +@ENABLE_EXCAVATOR_TRUE@am_lib_libexcavator_la_OBJECTS = src/configs/excavator/lib_libexcavator_la-config_ker.lo \ +@ENABLE_EXCAVATOR_TRUE@ $(am__objects_1) lib_libexcavator_la_OBJECTS = $(am_lib_libexcavator_la_OBJECTS) lib_libexcavator_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \ $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \ @@ -243,9 +259,9 @@ lib_libexcavator_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \ lib_libhaswell_la_LIBADD = am__lib_libhaswell_la_SOURCES_DIST = \ src/configs/haswell/bli_gemm_asm_d6x8.c \ - src/configs/haswell/config.cxx + src/configs/haswell/config_ker.cxx @ENABLE_HASWELL_TRUE@am_lib_libhaswell_la_OBJECTS = src/configs/haswell/lib_libhaswell_la-bli_gemm_asm_d6x8.lo \ -@ENABLE_HASWELL_TRUE@ src/configs/haswell/lib_libhaswell_la-config.lo +@ENABLE_HASWELL_TRUE@ src/configs/haswell/lib_libhaswell_la-config_ker.lo lib_libhaswell_la_OBJECTS = $(am_lib_libhaswell_la_OBJECTS) lib_libhaswell_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \ $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \ @@ -259,13 +275,13 @@ am__lib_libknl_la_SOURCES_DIST = \ src/configs/knl/bli_dpackm_opt_30x8.c \ src/configs/knl/bli_sgemm_opt_24x16.c \ src/configs/knl/bli_dgemm_opt_24x8.c \ - src/configs/knl/config.cxx + src/configs/knl/config_ker.cxx @ENABLE_KNL_TRUE@am_lib_libknl_la_OBJECTS = src/configs/knl/lib_libknl_la-bli_spackm_opt_24x16.lo \ @ENABLE_KNL_TRUE@ src/configs/knl/lib_libknl_la-bli_dpackm_opt_24x8.lo \ @ENABLE_KNL_TRUE@ src/configs/knl/lib_libknl_la-bli_dpackm_opt_30x8.lo \ @ENABLE_KNL_TRUE@ src/configs/knl/lib_libknl_la-bli_sgemm_opt_24x16.lo \ @ENABLE_KNL_TRUE@ src/configs/knl/lib_libknl_la-bli_dgemm_opt_24x8.lo \ -@ENABLE_KNL_TRUE@ src/configs/knl/lib_libknl_la-config.lo +@ENABLE_KNL_TRUE@ src/configs/knl/lib_libknl_la-config_ker.lo lib_libknl_la_OBJECTS = $(am_lib_libknl_la_OBJECTS) lib_libknl_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \ $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \ @@ -275,9 +291,9 @@ lib_libknl_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \ lib_libpiledriver_la_LIBADD = am__lib_libpiledriver_la_SOURCES_DIST = \ src/configs/piledriver/bli_gemm_asm_d8x3.c \ - src/configs/piledriver/config.cxx + src/configs/piledriver/config_ker.cxx @ENABLE_PILEDRIVER_TRUE@am_lib_libpiledriver_la_OBJECTS = src/configs/piledriver/lib_libpiledriver_la-bli_gemm_asm_d8x3.lo \ -@ENABLE_PILEDRIVER_TRUE@ src/configs/piledriver/lib_libpiledriver_la-config.lo +@ENABLE_PILEDRIVER_TRUE@ src/configs/piledriver/lib_libpiledriver_la-config_ker.lo lib_libpiledriver_la_OBJECTS = $(am_lib_libpiledriver_la_OBJECTS) lib_libpiledriver_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \ $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \ @@ -297,9 +313,9 @@ lib_libreference_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \ lib_libsandybridge_la_LIBADD = am__lib_libsandybridge_la_SOURCES_DIST = \ src/configs/sandybridge/bli_gemm_asm_d8x4.c \ - src/configs/sandybridge/config.cxx + src/configs/sandybridge/config_ker.cxx @ENABLE_SANDYBRIDGE_TRUE@am_lib_libsandybridge_la_OBJECTS = src/configs/sandybridge/lib_libsandybridge_la-bli_gemm_asm_d8x4.lo \ -@ENABLE_SANDYBRIDGE_TRUE@ src/configs/sandybridge/lib_libsandybridge_la-config.lo +@ENABLE_SANDYBRIDGE_TRUE@ src/configs/sandybridge/lib_libsandybridge_la-config_ker.lo lib_libsandybridge_la_OBJECTS = $(am_lib_libsandybridge_la_OBJECTS) lib_libsandybridge_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \ $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \ @@ -307,17 +323,11 @@ lib_libsandybridge_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \ $(LDFLAGS) -o $@ @ENABLE_SANDYBRIDGE_TRUE@am_lib_libsandybridge_la_rpath = lib_libskx1_la_LIBADD = -am__lib_libskx1_la_SOURCES_DIST = \ - src/configs/haswell/bli_gemm_asm_d6x8.c \ - src/configs/skx2/vpu_count.cxx src/configs/skx1/config.cxx -@ENABLE_HASWELL_FALSE@@ENABLE_SKX1_TRUE@@ENABLE_SKX2_FALSE@am_lib_libskx1_la_OBJECTS = src/configs/haswell/lib_libskx1_la-bli_gemm_asm_d6x8.lo \ -@ENABLE_HASWELL_FALSE@@ENABLE_SKX1_TRUE@@ENABLE_SKX2_FALSE@ src/configs/skx2/lib_libskx1_la-vpu_count.lo \ -@ENABLE_HASWELL_FALSE@@ENABLE_SKX1_TRUE@@ENABLE_SKX2_FALSE@ src/configs/skx1/lib_libskx1_la-config.lo -@ENABLE_HASWELL_FALSE@@ENABLE_SKX1_TRUE@@ENABLE_SKX2_TRUE@am_lib_libskx1_la_OBJECTS = src/configs/haswell/lib_libskx1_la-bli_gemm_asm_d6x8.lo \ -@ENABLE_HASWELL_FALSE@@ENABLE_SKX1_TRUE@@ENABLE_SKX2_TRUE@ src/configs/skx1/lib_libskx1_la-config.lo -@ENABLE_HASWELL_TRUE@@ENABLE_SKX1_TRUE@@ENABLE_SKX2_FALSE@am_lib_libskx1_la_OBJECTS = src/configs/skx2/lib_libskx1_la-vpu_count.lo \ -@ENABLE_HASWELL_TRUE@@ENABLE_SKX1_TRUE@@ENABLE_SKX2_FALSE@ src/configs/skx1/lib_libskx1_la-config.lo -@ENABLE_HASWELL_TRUE@@ENABLE_SKX1_TRUE@@ENABLE_SKX2_TRUE@am_lib_libskx1_la_OBJECTS = src/configs/skx1/lib_libskx1_la-config.lo +am__lib_libskx1_la_SOURCES_DIST = src/configs/skx1/config_ker.cxx \ + src/configs/haswell/bli_gemm_asm_d6x8.c +@ENABLE_HASWELL_FALSE@@ENABLE_SKX1_TRUE@am__objects_2 = src/configs/haswell/lib_libskx1_la-bli_gemm_asm_d6x8.lo +@ENABLE_SKX1_TRUE@am_lib_libskx1_la_OBJECTS = src/configs/skx1/lib_libskx1_la-config_ker.lo \ +@ENABLE_SKX1_TRUE@ $(am__objects_2) lib_libskx1_la_OBJECTS = $(am_lib_libskx1_la_OBJECTS) lib_libskx1_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \ $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \ @@ -328,11 +338,11 @@ lib_libskx2_la_LIBADD = am__lib_libskx2_la_SOURCES_DIST = \ src/configs/skx2/bli_sgemm_opt_12x32_l2.c \ src/configs/skx2/bli_dgemm_opt_12x16_l2.c \ - src/configs/skx2/vpu_count.cxx src/configs/skx2/config.cxx + src/configs/skx2/vpu_count.cxx src/configs/skx2/config_ker.cxx @ENABLE_SKX2_TRUE@am_lib_libskx2_la_OBJECTS = src/configs/skx2/lib_libskx2_la-bli_sgemm_opt_12x32_l2.lo \ @ENABLE_SKX2_TRUE@ src/configs/skx2/lib_libskx2_la-bli_dgemm_opt_12x16_l2.lo \ @ENABLE_SKX2_TRUE@ src/configs/skx2/lib_libskx2_la-vpu_count.lo \ -@ENABLE_SKX2_TRUE@ src/configs/skx2/lib_libskx2_la-config.lo +@ENABLE_SKX2_TRUE@ src/configs/skx2/lib_libskx2_la-config_ker.lo lib_libskx2_la_OBJECTS = $(am_lib_libskx2_la_OBJECTS) lib_libskx2_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \ $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \ @@ -340,10 +350,75 @@ lib_libskx2_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \ $(LDFLAGS) -o $@ @ENABLE_SKX2_TRUE@am_lib_libskx2_la_rpath = lib_libtblis_la_DEPENDENCIES = src/external/tci/lib/libtci.la \ - $(am__append_2) $(am__append_4) $(am__append_6) \ - $(am__append_8) $(am__append_10) $(am__append_12) \ - $(am__append_14) $(am__append_16) $(am__append_18) \ - $(am__append_20) $(am__append_22) + $(am__append_2) $(am__append_4) $(am__append_7) \ + $(am__append_10) $(am__append_14) $(am__append_18) \ + $(am__append_21) $(am__append_24) $(am__append_27) \ + $(am__append_30) $(am__append_35) +am__lib_libtblis_la_SOURCES_DIST = src/iface/1v/add.cxx \ + src/iface/1v/dot.cxx src/iface/1v/mult.cxx \ + src/iface/1v/reduce.cxx src/iface/1v/scale.cxx \ + src/iface/1v/set.cxx src/iface/1v/shift.cxx \ + src/iface/1m/add.cxx src/iface/1m/dot.cxx \ + src/iface/1m/reduce.cxx src/iface/1m/scale.cxx \ + src/iface/1m/set.cxx src/iface/1m/shift.cxx \ + src/iface/1t/add.cxx src/iface/1t/dot.cxx \ + src/iface/1t/reduce.cxx src/iface/1t/scale.cxx \ + src/iface/1t/set.cxx src/iface/1t/shift.cxx \ + src/iface/2m/mult.cxx src/iface/3m/mult.cxx \ + src/iface/3t/mult.cxx src/internal/1v/add.cxx \ + src/internal/1v/dot.cxx src/internal/1v/mult.cxx \ + src/internal/1v/reduce.cxx src/internal/1v/scale.cxx \ + src/internal/1v/set.cxx src/internal/1v/shift.cxx \ + src/internal/1m/add.cxx src/internal/1m/dot.cxx \ + src/internal/1m/reduce.cxx src/internal/1m/scale.cxx \ + src/internal/1m/set.cxx src/internal/1m/shift.cxx \ + src/internal/1t/dense/add.cxx src/internal/1t/dense/dot.cxx \ + src/internal/1t/dense/reduce.cxx \ + src/internal/1t/dense/scale.cxx src/internal/1t/dense/set.cxx \ + src/internal/1t/dense/shift.cxx src/internal/1t/dpd/add.cxx \ + src/internal/1t/dpd/dot.cxx src/internal/1t/dpd/reduce.cxx \ + src/internal/1t/dpd/scale.cxx src/internal/1t/dpd/set.cxx \ + src/internal/1t/dpd/shift.cxx src/internal/1t/indexed/add.cxx \ + src/internal/1t/indexed/dot.cxx \ + src/internal/1t/indexed/reduce.cxx \ + src/internal/1t/indexed/scale.cxx \ + src/internal/1t/indexed/set.cxx \ + src/internal/1t/indexed/shift.cxx \ + src/internal/1t/indexed_dpd/add.cxx \ + src/internal/1t/indexed_dpd/dot.cxx \ + src/internal/1t/indexed_dpd/reduce.cxx \ + src/internal/1t/indexed_dpd/scale.cxx \ + src/internal/1t/indexed_dpd/set.cxx \ + src/internal/1t/indexed_dpd/shift.cxx src/internal/2m/mult.cxx \ + src/internal/3m/mult.cxx src/internal/3t/dense/mult.cxx \ + src/internal/3t/dpd/mult.cxx src/internal/3t/indexed/mult.cxx \ + src/internal/3t/indexed_dpd/mult.cxx src/configs/configs.cxx \ + src/util/basic_types.cxx src/util/configs.cxx \ + src/util/cpuid.cxx src/util/env.cxx src/util/random.cxx \ + src/util/thread.cxx src/configs/bulldozer/config.cxx \ + src/configs/piledriver/config.cxx \ + src/configs/excavator/config.cxx src/configs/zen/config.cxx \ + src/configs/core2/config.cxx \ + src/configs/sandybridge/config.cxx \ + src/configs/haswell/config.cxx src/configs/knl/config.cxx \ + src/configs/skx1/config.cxx src/configs/skx2/vpu_count.cxx \ + src/configs/skx2/config.cxx +@ENABLE_BULLDOZER_TRUE@am__objects_3 = \ +@ENABLE_BULLDOZER_TRUE@ src/configs/bulldozer/config.lo +@ENABLE_PILEDRIVER_TRUE@am__objects_4 = \ +@ENABLE_PILEDRIVER_TRUE@ src/configs/piledriver/config.lo +@ENABLE_EXCAVATOR_TRUE@am__objects_5 = \ +@ENABLE_EXCAVATOR_TRUE@ src/configs/excavator/config.lo +@ENABLE_ZEN_TRUE@am__objects_6 = src/configs/zen/config.lo +@ENABLE_CORE2_TRUE@am__objects_7 = src/configs/core2/config.lo +@ENABLE_SANDYBRIDGE_TRUE@am__objects_8 = \ +@ENABLE_SANDYBRIDGE_TRUE@ src/configs/sandybridge/config.lo +@ENABLE_HASWELL_TRUE@am__objects_9 = src/configs/haswell/config.lo +@ENABLE_KNL_TRUE@am__objects_10 = src/configs/knl/config.lo +@ENABLE_SKX1_TRUE@am__objects_11 = src/configs/skx1/config.lo +@ENABLE_SKX1_TRUE@@ENABLE_SKX2_FALSE@am__objects_12 = src/configs/skx2/vpu_count.lo +@ENABLE_SKX2_TRUE@am__objects_13 = src/configs/skx2/vpu_count.lo \ +@ENABLE_SKX2_TRUE@ src/configs/skx2/config.lo am_lib_libtblis_la_OBJECTS = src/iface/1v/add.lo src/iface/1v/dot.lo \ src/iface/1v/mult.lo src/iface/1v/reduce.lo \ src/iface/1v/scale.lo src/iface/1v/set.lo \ @@ -382,15 +457,19 @@ am_lib_libtblis_la_OBJECTS = src/iface/1v/add.lo src/iface/1v/dot.lo \ src/internal/3t/dpd/mult.lo src/internal/3t/indexed/mult.lo \ src/internal/3t/indexed_dpd/mult.lo src/configs/configs.lo \ src/util/basic_types.lo src/util/configs.lo src/util/cpuid.lo \ - src/util/env.lo src/util/random.lo src/util/thread.lo + src/util/env.lo src/util/random.lo src/util/thread.lo \ + $(am__objects_3) $(am__objects_4) $(am__objects_5) \ + $(am__objects_6) $(am__objects_7) $(am__objects_8) \ + $(am__objects_9) $(am__objects_10) $(am__objects_11) \ + $(am__objects_12) $(am__objects_13) lib_libtblis_la_OBJECTS = $(am_lib_libtblis_la_OBJECTS) lib_libzen_la_LIBADD = -am__lib_libzen_la_SOURCES_DIST = \ - src/configs/haswell/bli_gemm_asm_d6x8.c \ - src/configs/zen/config.cxx -@ENABLE_HASWELL_FALSE@@ENABLE_ZEN_TRUE@am_lib_libzen_la_OBJECTS = src/configs/haswell/lib_libzen_la-bli_gemm_asm_d6x8.lo \ -@ENABLE_HASWELL_FALSE@@ENABLE_ZEN_TRUE@ src/configs/zen/lib_libzen_la-config.lo -@ENABLE_HASWELL_TRUE@@ENABLE_ZEN_TRUE@am_lib_libzen_la_OBJECTS = src/configs/zen/lib_libzen_la-config.lo +am__lib_libzen_la_SOURCES_DIST = src/configs/zen/config_ker.cxx \ + src/configs/haswell/bli_gemm_asm_d6x8.c +@ENABLE_HASWELL_FALSE@@ENABLE_ZEN_TRUE@am__objects_14 = src/configs/haswell/lib_libzen_la-bli_gemm_asm_d6x8.lo +@ENABLE_ZEN_TRUE@am_lib_libzen_la_OBJECTS = \ +@ENABLE_ZEN_TRUE@ src/configs/zen/lib_libzen_la-config_ker.lo \ +@ENABLE_ZEN_TRUE@ $(am__objects_14) lib_libzen_la_OBJECTS = $(am_lib_libzen_la_OBJECTS) lib_libzen_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \ $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \ @@ -433,34 +512,44 @@ DEFAULT_INCLUDES = -I.@am__isrc@ depcomp = $(SHELL) $(top_srcdir)/depcomp am__maybe_remake_depfiles = depfiles am__depfiles_remade = src/configs/$(DEPDIR)/configs.Plo \ + src/configs/bulldozer/$(DEPDIR)/config.Plo \ src/configs/bulldozer/$(DEPDIR)/lib_libbulldozer_la-bli_gemm_asm_d4x6_fma4.Plo \ - src/configs/bulldozer/$(DEPDIR)/lib_libbulldozer_la-config.Plo \ + src/configs/bulldozer/$(DEPDIR)/lib_libbulldozer_la-config_ker.Plo \ + src/configs/core2/$(DEPDIR)/config.Plo \ src/configs/core2/$(DEPDIR)/lib_libcore2_la-bli_gemm_asm_d4x4.Plo \ - src/configs/core2/$(DEPDIR)/lib_libcore2_la-config.Plo \ + src/configs/core2/$(DEPDIR)/lib_libcore2_la-config_ker.Plo \ + src/configs/excavator/$(DEPDIR)/config.Plo \ src/configs/excavator/$(DEPDIR)/lib_libexcavator_la-bli_gemm_asm_d8x3.Plo \ - src/configs/excavator/$(DEPDIR)/lib_libexcavator_la-config.Plo \ + src/configs/excavator/$(DEPDIR)/lib_libexcavator_la-config_ker.Plo \ + src/configs/haswell/$(DEPDIR)/config.Plo \ src/configs/haswell/$(DEPDIR)/lib_libhaswell_la-bli_gemm_asm_d6x8.Plo \ - src/configs/haswell/$(DEPDIR)/lib_libhaswell_la-config.Plo \ + src/configs/haswell/$(DEPDIR)/lib_libhaswell_la-config_ker.Plo \ src/configs/haswell/$(DEPDIR)/lib_libskx1_la-bli_gemm_asm_d6x8.Plo \ src/configs/haswell/$(DEPDIR)/lib_libzen_la-bli_gemm_asm_d6x8.Plo \ + src/configs/knl/$(DEPDIR)/config.Plo \ src/configs/knl/$(DEPDIR)/lib_libknl_la-bli_dgemm_opt_24x8.Plo \ src/configs/knl/$(DEPDIR)/lib_libknl_la-bli_dpackm_opt_24x8.Plo \ src/configs/knl/$(DEPDIR)/lib_libknl_la-bli_dpackm_opt_30x8.Plo \ src/configs/knl/$(DEPDIR)/lib_libknl_la-bli_sgemm_opt_24x16.Plo \ src/configs/knl/$(DEPDIR)/lib_libknl_la-bli_spackm_opt_24x16.Plo \ - src/configs/knl/$(DEPDIR)/lib_libknl_la-config.Plo \ + src/configs/knl/$(DEPDIR)/lib_libknl_la-config_ker.Plo \ + src/configs/piledriver/$(DEPDIR)/config.Plo \ src/configs/piledriver/$(DEPDIR)/lib_libpiledriver_la-bli_gemm_asm_d8x3.Plo \ - src/configs/piledriver/$(DEPDIR)/lib_libpiledriver_la-config.Plo \ + src/configs/piledriver/$(DEPDIR)/lib_libpiledriver_la-config_ker.Plo \ src/configs/reference/$(DEPDIR)/lib_libreference_la-config.Plo \ + src/configs/sandybridge/$(DEPDIR)/config.Plo \ src/configs/sandybridge/$(DEPDIR)/lib_libsandybridge_la-bli_gemm_asm_d8x4.Plo \ - src/configs/sandybridge/$(DEPDIR)/lib_libsandybridge_la-config.Plo \ - src/configs/skx1/$(DEPDIR)/lib_libskx1_la-config.Plo \ - src/configs/skx2/$(DEPDIR)/lib_libskx1_la-vpu_count.Plo \ + src/configs/sandybridge/$(DEPDIR)/lib_libsandybridge_la-config_ker.Plo \ + src/configs/skx1/$(DEPDIR)/config.Plo \ + src/configs/skx1/$(DEPDIR)/lib_libskx1_la-config_ker.Plo \ + src/configs/skx2/$(DEPDIR)/config.Plo \ src/configs/skx2/$(DEPDIR)/lib_libskx2_la-bli_dgemm_opt_12x16_l2.Plo \ src/configs/skx2/$(DEPDIR)/lib_libskx2_la-bli_sgemm_opt_12x32_l2.Plo \ - src/configs/skx2/$(DEPDIR)/lib_libskx2_la-config.Plo \ + src/configs/skx2/$(DEPDIR)/lib_libskx2_la-config_ker.Plo \ src/configs/skx2/$(DEPDIR)/lib_libskx2_la-vpu_count.Plo \ - src/configs/zen/$(DEPDIR)/lib_libzen_la-config.Plo \ + src/configs/skx2/$(DEPDIR)/vpu_count.Plo \ + src/configs/zen/$(DEPDIR)/config.Plo \ + src/configs/zen/$(DEPDIR)/lib_libzen_la-config_ker.Plo \ src/iface/1m/$(DEPDIR)/add.Plo src/iface/1m/$(DEPDIR)/dot.Plo \ src/iface/1m/$(DEPDIR)/reduce.Plo \ src/iface/1m/$(DEPDIR)/scale.Plo \ @@ -591,7 +680,8 @@ DIST_SOURCES = $(am__lib_libbulldozer_la_SOURCES_DIST) \ $(am__lib_libreference_la_SOURCES_DIST) \ $(am__lib_libsandybridge_la_SOURCES_DIST) \ $(am__lib_libskx1_la_SOURCES_DIST) \ - $(am__lib_libskx2_la_SOURCES_DIST) $(lib_libtblis_la_SOURCES) \ + $(am__lib_libskx2_la_SOURCES_DIST) \ + $(am__lib_libtblis_la_SOURCES_DIST) \ $(am__lib_libzen_la_SOURCES_DIST) $(bin_batched_bench_SOURCES) \ $(bin_bench_SOURCES) $(bin_skx_bench_SOURCES) \ $(bin_test_SOURCES) @@ -820,100 +910,50 @@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ lib_LTLIBRARIES = lib/libtblis.la -lib_libtblis_la_SOURCES = \ - \ - src/iface/1v/add.cxx \ - src/iface/1v/dot.cxx \ - src/iface/1v/mult.cxx \ - src/iface/1v/reduce.cxx \ - src/iface/1v/scale.cxx \ - src/iface/1v/set.cxx \ - src/iface/1v/shift.cxx \ - \ - src/iface/1m/add.cxx \ - src/iface/1m/dot.cxx \ - src/iface/1m/reduce.cxx \ - src/iface/1m/scale.cxx \ - src/iface/1m/set.cxx \ - src/iface/1m/shift.cxx \ - \ - src/iface/1t/add.cxx \ - src/iface/1t/dot.cxx \ - src/iface/1t/reduce.cxx \ - src/iface/1t/scale.cxx \ - src/iface/1t/set.cxx \ - src/iface/1t/shift.cxx \ - \ - src/iface/2m/mult.cxx \ - \ - src/iface/3m/mult.cxx \ - \ - src/iface/3t/mult.cxx \ - \ - src/internal/1v/add.cxx \ - src/internal/1v/dot.cxx \ - src/internal/1v/mult.cxx \ - src/internal/1v/reduce.cxx \ - src/internal/1v/scale.cxx \ - src/internal/1v/set.cxx \ - src/internal/1v/shift.cxx \ - \ - src/internal/1m/add.cxx \ - src/internal/1m/dot.cxx \ - src/internal/1m/reduce.cxx \ - src/internal/1m/scale.cxx \ - src/internal/1m/set.cxx \ - src/internal/1m/shift.cxx \ - \ - src/internal/1t/dense/add.cxx \ - src/internal/1t/dense/dot.cxx \ - src/internal/1t/dense/reduce.cxx \ - src/internal/1t/dense/scale.cxx \ - src/internal/1t/dense/set.cxx \ - src/internal/1t/dense/shift.cxx \ - \ - src/internal/1t/dpd/add.cxx \ - src/internal/1t/dpd/dot.cxx \ - src/internal/1t/dpd/reduce.cxx \ - src/internal/1t/dpd/scale.cxx \ - src/internal/1t/dpd/set.cxx \ - src/internal/1t/dpd/shift.cxx \ - \ - src/internal/1t/indexed/add.cxx \ - src/internal/1t/indexed/dot.cxx \ - src/internal/1t/indexed/reduce.cxx \ - src/internal/1t/indexed/scale.cxx \ - src/internal/1t/indexed/set.cxx \ - src/internal/1t/indexed/shift.cxx \ - \ - src/internal/1t/indexed_dpd/add.cxx \ - src/internal/1t/indexed_dpd/dot.cxx \ - src/internal/1t/indexed_dpd/reduce.cxx \ - src/internal/1t/indexed_dpd/scale.cxx \ - src/internal/1t/indexed_dpd/set.cxx \ - src/internal/1t/indexed_dpd/shift.cxx \ - \ - src/internal/2m/mult.cxx \ - \ - src/internal/3m/mult.cxx \ - \ - src/internal/3t/dense/mult.cxx \ - \ - src/internal/3t/dpd/mult.cxx \ - \ - src/internal/3t/indexed/mult.cxx \ - \ - src/internal/3t/indexed_dpd/mult.cxx \ - \ - src/configs/configs.cxx \ - \ - src/util/basic_types.cxx \ - src/util/configs.cxx \ - src/util/cpuid.cxx \ - src/util/env.cxx \ - src/util/random.cxx \ - src/util/thread.cxx - +lib_libtblis_la_SOURCES = src/iface/1v/add.cxx src/iface/1v/dot.cxx \ + src/iface/1v/mult.cxx src/iface/1v/reduce.cxx \ + src/iface/1v/scale.cxx src/iface/1v/set.cxx \ + src/iface/1v/shift.cxx src/iface/1m/add.cxx \ + src/iface/1m/dot.cxx src/iface/1m/reduce.cxx \ + src/iface/1m/scale.cxx src/iface/1m/set.cxx \ + src/iface/1m/shift.cxx src/iface/1t/add.cxx \ + src/iface/1t/dot.cxx src/iface/1t/reduce.cxx \ + src/iface/1t/scale.cxx src/iface/1t/set.cxx \ + src/iface/1t/shift.cxx src/iface/2m/mult.cxx \ + src/iface/3m/mult.cxx src/iface/3t/mult.cxx \ + src/internal/1v/add.cxx src/internal/1v/dot.cxx \ + src/internal/1v/mult.cxx src/internal/1v/reduce.cxx \ + src/internal/1v/scale.cxx src/internal/1v/set.cxx \ + src/internal/1v/shift.cxx src/internal/1m/add.cxx \ + src/internal/1m/dot.cxx src/internal/1m/reduce.cxx \ + src/internal/1m/scale.cxx src/internal/1m/set.cxx \ + src/internal/1m/shift.cxx src/internal/1t/dense/add.cxx \ + src/internal/1t/dense/dot.cxx src/internal/1t/dense/reduce.cxx \ + src/internal/1t/dense/scale.cxx src/internal/1t/dense/set.cxx \ + src/internal/1t/dense/shift.cxx src/internal/1t/dpd/add.cxx \ + src/internal/1t/dpd/dot.cxx src/internal/1t/dpd/reduce.cxx \ + src/internal/1t/dpd/scale.cxx src/internal/1t/dpd/set.cxx \ + src/internal/1t/dpd/shift.cxx src/internal/1t/indexed/add.cxx \ + src/internal/1t/indexed/dot.cxx \ + src/internal/1t/indexed/reduce.cxx \ + src/internal/1t/indexed/scale.cxx \ + src/internal/1t/indexed/set.cxx \ + src/internal/1t/indexed/shift.cxx \ + src/internal/1t/indexed_dpd/add.cxx \ + src/internal/1t/indexed_dpd/dot.cxx \ + src/internal/1t/indexed_dpd/reduce.cxx \ + src/internal/1t/indexed_dpd/scale.cxx \ + src/internal/1t/indexed_dpd/set.cxx \ + src/internal/1t/indexed_dpd/shift.cxx src/internal/2m/mult.cxx \ + src/internal/3m/mult.cxx src/internal/3t/dense/mult.cxx \ + src/internal/3t/dpd/mult.cxx src/internal/3t/indexed/mult.cxx \ + src/internal/3t/indexed_dpd/mult.cxx src/configs/configs.cxx \ + src/util/basic_types.cxx src/util/configs.cxx \ + src/util/cpuid.cxx src/util/env.cxx src/util/random.cxx \ + src/util/thread.cxx $(am__append_5) $(am__append_8) \ + $(am__append_11) $(am__append_15) $(am__append_19) \ + $(am__append_22) $(am__append_25) $(am__append_28) \ + $(am__append_31) $(am__append_32) $(am__append_36) pkginclude_HEADERS = src/tblis.h src/tblis_config.h utilincludedir = $(pkgincludedir)/util utilinclude_HEADERS = \ @@ -1024,56 +1064,54 @@ stl_extinclude_HEADERS = \ src/external/stl_ext/include/vector.hpp \ src/external/stl_ext/include/zip.hpp -noinst_LTLIBRARIES = $(am__append_1) $(am__append_3) $(am__append_5) \ - $(am__append_7) $(am__append_9) $(am__append_11) \ - $(am__append_13) $(am__append_15) $(am__append_17) \ - $(am__append_19) $(am__append_21) +noinst_LTLIBRARIES = $(am__append_1) $(am__append_3) $(am__append_6) \ + $(am__append_9) $(am__append_13) $(am__append_17) \ + $(am__append_20) $(am__append_23) $(am__append_26) \ + $(am__append_29) $(am__append_34) lib_libtblis_la_LIBADD = src/external/tci/lib/libtci.la \ - $(am__append_2) $(am__append_4) $(am__append_6) \ - $(am__append_8) $(am__append_10) $(am__append_12) \ - $(am__append_14) $(am__append_16) $(am__append_18) \ - $(am__append_20) $(am__append_22) + $(am__append_2) $(am__append_4) $(am__append_7) \ + $(am__append_10) $(am__append_14) $(am__append_18) \ + $(am__append_21) $(am__append_24) $(am__append_27) \ + $(am__append_30) $(am__append_35) @ENABLE_REFERENCE_TRUE@lib_libreference_la_SOURCES = src/configs/reference/config.cxx @ENABLE_REFERENCE_TRUE@lib_libreference_la_CFLAGS = -O3 @ENABLE_REFERENCE_TRUE@lib_libreference_la_CXXFLAGS = -O3 @ENABLE_BULLDOZER_TRUE@lib_libbulldozer_la_SOURCES = src/configs/bulldozer/bli_gemm_asm_d4x6_fma4.c \ -@ENABLE_BULLDOZER_TRUE@ src/configs/bulldozer/config.cxx +@ENABLE_BULLDOZER_TRUE@ src/configs/bulldozer/config_ker.cxx @ENABLE_BULLDOZER_TRUE@lib_libbulldozer_la_CFLAGS = -O3 -mavx -mfma4 -march=bdver1 -mfpmath=sse @ENABLE_BULLDOZER_TRUE@lib_libbulldozer_la_CXXFLAGS = -O3 -mavx -mfma4 -march=bdver1 -mfpmath=sse @ENABLE_PILEDRIVER_TRUE@lib_libpiledriver_la_SOURCES = src/configs/piledriver/bli_gemm_asm_d8x3.c \ -@ENABLE_PILEDRIVER_TRUE@ src/configs/piledriver/config.cxx +@ENABLE_PILEDRIVER_TRUE@ src/configs/piledriver/config_ker.cxx @ENABLE_PILEDRIVER_TRUE@lib_libpiledriver_la_CFLAGS = -O3 -mavx -mfma -mfma4 -march=bdver2 -mfpmath=sse @ENABLE_PILEDRIVER_TRUE@lib_libpiledriver_la_CXXFLAGS = -O3 -mavx -mfma -mfma4 -march=bdver2 -mfpmath=sse -@ENABLE_EXCAVATOR_TRUE@@ENABLE_PILEDRIVER_FALSE@lib_libexcavator_la_SOURCES = src/configs/excavator/bli_gemm_asm_d8x3.c \ -@ENABLE_EXCAVATOR_TRUE@@ENABLE_PILEDRIVER_FALSE@ src/configs/excavator/config.cxx - -@ENABLE_EXCAVATOR_TRUE@@ENABLE_PILEDRIVER_TRUE@lib_libexcavator_la_SOURCES = src/configs/excavator/config.cxx +@ENABLE_EXCAVATOR_TRUE@lib_libexcavator_la_SOURCES = \ +@ENABLE_EXCAVATOR_TRUE@ src/configs/excavator/config_ker.cxx \ +@ENABLE_EXCAVATOR_TRUE@ $(am__append_12) @ENABLE_EXCAVATOR_TRUE@lib_libexcavator_la_CFLAGS = -O3 -mavx -mavx2 -mfma -march=bdver4 -mfpmath=sse @ENABLE_EXCAVATOR_TRUE@lib_libexcavator_la_CXXFLAGS = -O3 -mavx -mavx2 -mfma -march=bdver4 -mfpmath=sse -@ENABLE_HASWELL_FALSE@@ENABLE_ZEN_TRUE@lib_libzen_la_SOURCES = src/configs/haswell/bli_gemm_asm_d6x8.c \ -@ENABLE_HASWELL_FALSE@@ENABLE_ZEN_TRUE@ src/configs/zen/config.cxx - -@ENABLE_HASWELL_TRUE@@ENABLE_ZEN_TRUE@lib_libzen_la_SOURCES = src/configs/zen/config.cxx +@ENABLE_ZEN_TRUE@lib_libzen_la_SOURCES = \ +@ENABLE_ZEN_TRUE@ src/configs/zen/config_ker.cxx \ +@ENABLE_ZEN_TRUE@ $(am__append_16) @ENABLE_ZEN_TRUE@lib_libzen_la_CFLAGS = -O3 -mavx -mavx2 -mfma -march=znver1 -mfpmath=sse @ENABLE_ZEN_TRUE@lib_libzen_la_CXXFLAGS = -O3 -mavx -mavx2 -mfma -march=znver1 -mfpmath=sse @ENABLE_CORE2_TRUE@lib_libcore2_la_SOURCES = src/configs/core2/bli_gemm_asm_d4x4.c \ -@ENABLE_CORE2_TRUE@ src/configs/core2/config.cxx +@ENABLE_CORE2_TRUE@ src/configs/core2/config_ker.cxx @ENABLE_CORE2_TRUE@@ENABLE_INTEL_COMPILER_FALSE@lib_libcore2_la_CFLAGS = -O3 -msse3 -mssse3 -march=core2 -mfpmath=sse @ENABLE_CORE2_TRUE@@ENABLE_INTEL_COMPILER_TRUE@lib_libcore2_la_CFLAGS = -O3 -xSSSE3 @ENABLE_CORE2_TRUE@@ENABLE_INTEL_COMPILER_FALSE@lib_libcore2_la_CXXFLAGS = -O3 -msse3 -mssse3 -march=core2 -mfpmath=sse @ENABLE_CORE2_TRUE@@ENABLE_INTEL_COMPILER_TRUE@lib_libcore2_la_CXXFLAGS = -O3 -xSSSE3 @ENABLE_SANDYBRIDGE_TRUE@lib_libsandybridge_la_SOURCES = src/configs/sandybridge/bli_gemm_asm_d8x4.c \ -@ENABLE_SANDYBRIDGE_TRUE@ src/configs/sandybridge/config.cxx +@ENABLE_SANDYBRIDGE_TRUE@ src/configs/sandybridge/config_ker.cxx @ENABLE_INTEL_COMPILER_FALSE@@ENABLE_SANDYBRIDGE_TRUE@lib_libsandybridge_la_CFLAGS = -O3 -mavx -march=corei7-avx -mfpmath=sse @ENABLE_INTEL_COMPILER_TRUE@@ENABLE_SANDYBRIDGE_TRUE@lib_libsandybridge_la_CFLAGS = -O3 -xAVX @ENABLE_INTEL_COMPILER_FALSE@@ENABLE_SANDYBRIDGE_TRUE@lib_libsandybridge_la_CXXFLAGS = -O3 -mavx -march=corei7-avx -mfpmath=sse @ENABLE_INTEL_COMPILER_TRUE@@ENABLE_SANDYBRIDGE_TRUE@lib_libsandybridge_la_CXXFLAGS = -O3 -xAVX @ENABLE_HASWELL_TRUE@lib_libhaswell_la_SOURCES = src/configs/haswell/bli_gemm_asm_d6x8.c \ -@ENABLE_HASWELL_TRUE@ src/configs/haswell/config.cxx +@ENABLE_HASWELL_TRUE@ src/configs/haswell/config_ker.cxx @ENABLE_HASWELL_TRUE@@ENABLE_INTEL_COMPILER_FALSE@lib_libhaswell_la_CFLAGS = -O3 -mavx -mavx2 -mfma -march=core-avx2 -mfpmath=sse # src/configs/haswell/bli_gemm_asm_d12x4.c \ @@ -1087,7 +1125,7 @@ lib_libtblis_la_LIBADD = src/external/tci/lib/libtci.la \ @ENABLE_KNL_TRUE@ src/configs/knl/bli_dpackm_opt_30x8.c \ @ENABLE_KNL_TRUE@ src/configs/knl/bli_sgemm_opt_24x16.c \ @ENABLE_KNL_TRUE@ src/configs/knl/bli_dgemm_opt_24x8.c \ -@ENABLE_KNL_TRUE@ src/configs/knl/config.cxx +@ENABLE_KNL_TRUE@ src/configs/knl/config_ker.cxx @ENABLE_INTEL_COMPILER_FALSE@@ENABLE_KNL_TRUE@@IS_OSX_FALSE@lib_libknl_la_CFLAGS = -O3 -mavx512f -mavx512pf -march=knl -mfpmath=sse @ENABLE_INTEL_COMPILER_FALSE@@ENABLE_KNL_TRUE@@IS_OSX_TRUE@lib_libknl_la_CFLAGS = -O3 -mavx512f -mavx512pf -march=knl -mfpmath=sse -Wa,-march=knl @@ -1100,17 +1138,9 @@ lib_libtblis_la_LIBADD = src/external/tci/lib/libtci.la \ @ENABLE_INTEL_COMPILER_FALSE@@ENABLE_KNL_TRUE@@IS_OSX_FALSE@lib_libknl_la_CXXFLAGS = -O3 -mavx512f -mavx512pf -march=knl -mfpmath=sse @ENABLE_INTEL_COMPILER_FALSE@@ENABLE_KNL_TRUE@@IS_OSX_TRUE@lib_libknl_la_CXXFLAGS = -O3 -mavx512f -mavx512pf -march=knl -mfpmath=sse -Wa,-march=knl @ENABLE_INTEL_COMPILER_TRUE@@ENABLE_KNL_TRUE@lib_libknl_la_CXXFLAGS = -O3 -xMIC-AVX512 -@ENABLE_HASWELL_FALSE@@ENABLE_SKX1_TRUE@@ENABLE_SKX2_FALSE@lib_libskx1_la_SOURCES = src/configs/haswell/bli_gemm_asm_d6x8.c \ -@ENABLE_HASWELL_FALSE@@ENABLE_SKX1_TRUE@@ENABLE_SKX2_FALSE@ src/configs/skx2/vpu_count.cxx \ -@ENABLE_HASWELL_FALSE@@ENABLE_SKX1_TRUE@@ENABLE_SKX2_FALSE@ src/configs/skx1/config.cxx - -@ENABLE_HASWELL_FALSE@@ENABLE_SKX1_TRUE@@ENABLE_SKX2_TRUE@lib_libskx1_la_SOURCES = src/configs/haswell/bli_gemm_asm_d6x8.c \ -@ENABLE_HASWELL_FALSE@@ENABLE_SKX1_TRUE@@ENABLE_SKX2_TRUE@ src/configs/skx1/config.cxx - -@ENABLE_HASWELL_TRUE@@ENABLE_SKX1_TRUE@@ENABLE_SKX2_FALSE@lib_libskx1_la_SOURCES = src/configs/skx2/vpu_count.cxx \ -@ENABLE_HASWELL_TRUE@@ENABLE_SKX1_TRUE@@ENABLE_SKX2_FALSE@ src/configs/skx1/config.cxx - -@ENABLE_HASWELL_TRUE@@ENABLE_SKX1_TRUE@@ENABLE_SKX2_TRUE@lib_libskx1_la_SOURCES = src/configs/skx1/config.cxx +@ENABLE_SKX1_TRUE@lib_libskx1_la_SOURCES = \ +@ENABLE_SKX1_TRUE@ src/configs/skx1/config_ker.cxx \ +@ENABLE_SKX1_TRUE@ $(am__append_33) @ENABLE_INTEL_COMPILER_FALSE@@ENABLE_SKX1_TRUE@@IS_OSX_FALSE@lib_libskx1_la_CFLAGS = -O3 -mavx512f -mavx512dq -mavx512bw -mavx512vl -march=skylake-avx512 -mfpmath=sse @ENABLE_INTEL_COMPILER_FALSE@@ENABLE_SKX1_TRUE@@IS_OSX_TRUE@lib_libskx1_la_CFLAGS = -O3 -mavx512f -mavx512dq -mavx512bw -mavx512vl -march=skylake-avx512 -mfpmath=sse -Wa,-march=skylake-avx512 @ENABLE_INTEL_COMPILER_TRUE@@ENABLE_SKX1_TRUE@lib_libskx1_la_CFLAGS = -O3 -xCORE-AVX512 @@ -1120,7 +1150,7 @@ lib_libtblis_la_LIBADD = src/external/tci/lib/libtci.la \ @ENABLE_SKX2_TRUE@lib_libskx2_la_SOURCES = src/configs/skx2/bli_sgemm_opt_12x32_l2.c \ @ENABLE_SKX2_TRUE@ src/configs/skx2/bli_dgemm_opt_12x16_l2.c \ @ENABLE_SKX2_TRUE@ src/configs/skx2/vpu_count.cxx \ -@ENABLE_SKX2_TRUE@ src/configs/skx2/config.cxx +@ENABLE_SKX2_TRUE@ src/configs/skx2/config_ker.cxx @ENABLE_INTEL_COMPILER_FALSE@@ENABLE_SKX2_TRUE@@IS_OSX_FALSE@lib_libskx2_la_CFLAGS = -O3 -mavx512f -mavx512dq -mavx512bw -mavx512vl -march=skylake-avx512 -mfpmath=sse @ENABLE_INTEL_COMPILER_FALSE@@ENABLE_SKX2_TRUE@@IS_OSX_TRUE@lib_libskx2_la_CFLAGS = -O3 -mavx512f -mavx512dq -mavx512bw -mavx512vl -march=skylake-avx512 -mfpmath=sse -Wa,-march=skylake-avx512 @@ -1294,7 +1324,7 @@ src/configs/bulldozer/$(DEPDIR)/$(am__dirstamp): src/configs/bulldozer/lib_libbulldozer_la-bli_gemm_asm_d4x6_fma4.lo: \ src/configs/bulldozer/$(am__dirstamp) \ src/configs/bulldozer/$(DEPDIR)/$(am__dirstamp) -src/configs/bulldozer/lib_libbulldozer_la-config.lo: \ +src/configs/bulldozer/lib_libbulldozer_la-config_ker.lo: \ src/configs/bulldozer/$(am__dirstamp) \ src/configs/bulldozer/$(DEPDIR)/$(am__dirstamp) lib/$(am__dirstamp): @@ -1312,7 +1342,7 @@ src/configs/core2/$(DEPDIR)/$(am__dirstamp): src/configs/core2/lib_libcore2_la-bli_gemm_asm_d4x4.lo: \ src/configs/core2/$(am__dirstamp) \ src/configs/core2/$(DEPDIR)/$(am__dirstamp) -src/configs/core2/lib_libcore2_la-config.lo: \ +src/configs/core2/lib_libcore2_la-config_ker.lo: \ src/configs/core2/$(am__dirstamp) \ src/configs/core2/$(DEPDIR)/$(am__dirstamp) @@ -1324,10 +1354,10 @@ src/configs/excavator/$(am__dirstamp): src/configs/excavator/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) src/configs/excavator/$(DEPDIR) @: > src/configs/excavator/$(DEPDIR)/$(am__dirstamp) -src/configs/excavator/lib_libexcavator_la-bli_gemm_asm_d8x3.lo: \ +src/configs/excavator/lib_libexcavator_la-config_ker.lo: \ src/configs/excavator/$(am__dirstamp) \ src/configs/excavator/$(DEPDIR)/$(am__dirstamp) -src/configs/excavator/lib_libexcavator_la-config.lo: \ +src/configs/excavator/lib_libexcavator_la-bli_gemm_asm_d8x3.lo: \ src/configs/excavator/$(am__dirstamp) \ src/configs/excavator/$(DEPDIR)/$(am__dirstamp) @@ -1342,7 +1372,7 @@ src/configs/haswell/$(DEPDIR)/$(am__dirstamp): src/configs/haswell/lib_libhaswell_la-bli_gemm_asm_d6x8.lo: \ src/configs/haswell/$(am__dirstamp) \ src/configs/haswell/$(DEPDIR)/$(am__dirstamp) -src/configs/haswell/lib_libhaswell_la-config.lo: \ +src/configs/haswell/lib_libhaswell_la-config_ker.lo: \ src/configs/haswell/$(am__dirstamp) \ src/configs/haswell/$(DEPDIR)/$(am__dirstamp) @@ -1369,7 +1399,7 @@ src/configs/knl/lib_libknl_la-bli_sgemm_opt_24x16.lo: \ src/configs/knl/lib_libknl_la-bli_dgemm_opt_24x8.lo: \ src/configs/knl/$(am__dirstamp) \ src/configs/knl/$(DEPDIR)/$(am__dirstamp) -src/configs/knl/lib_libknl_la-config.lo: \ +src/configs/knl/lib_libknl_la-config_ker.lo: \ src/configs/knl/$(am__dirstamp) \ src/configs/knl/$(DEPDIR)/$(am__dirstamp) @@ -1384,7 +1414,7 @@ src/configs/piledriver/$(DEPDIR)/$(am__dirstamp): src/configs/piledriver/lib_libpiledriver_la-bli_gemm_asm_d8x3.lo: \ src/configs/piledriver/$(am__dirstamp) \ src/configs/piledriver/$(DEPDIR)/$(am__dirstamp) -src/configs/piledriver/lib_libpiledriver_la-config.lo: \ +src/configs/piledriver/lib_libpiledriver_la-config_ker.lo: \ src/configs/piledriver/$(am__dirstamp) \ src/configs/piledriver/$(DEPDIR)/$(am__dirstamp) @@ -1411,36 +1441,33 @@ src/configs/sandybridge/$(DEPDIR)/$(am__dirstamp): src/configs/sandybridge/lib_libsandybridge_la-bli_gemm_asm_d8x4.lo: \ src/configs/sandybridge/$(am__dirstamp) \ src/configs/sandybridge/$(DEPDIR)/$(am__dirstamp) -src/configs/sandybridge/lib_libsandybridge_la-config.lo: \ +src/configs/sandybridge/lib_libsandybridge_la-config_ker.lo: \ src/configs/sandybridge/$(am__dirstamp) \ src/configs/sandybridge/$(DEPDIR)/$(am__dirstamp) lib/libsandybridge.la: $(lib_libsandybridge_la_OBJECTS) $(lib_libsandybridge_la_DEPENDENCIES) $(EXTRA_lib_libsandybridge_la_DEPENDENCIES) lib/$(am__dirstamp) $(AM_V_CXXLD)$(lib_libsandybridge_la_LINK) $(am_lib_libsandybridge_la_rpath) $(lib_libsandybridge_la_OBJECTS) $(lib_libsandybridge_la_LIBADD) $(LIBS) -src/configs/haswell/lib_libskx1_la-bli_gemm_asm_d6x8.lo: \ - src/configs/haswell/$(am__dirstamp) \ - src/configs/haswell/$(DEPDIR)/$(am__dirstamp) -src/configs/skx2/$(am__dirstamp): - @$(MKDIR_P) src/configs/skx2 - @: > src/configs/skx2/$(am__dirstamp) -src/configs/skx2/$(DEPDIR)/$(am__dirstamp): - @$(MKDIR_P) src/configs/skx2/$(DEPDIR) - @: > src/configs/skx2/$(DEPDIR)/$(am__dirstamp) -src/configs/skx2/lib_libskx1_la-vpu_count.lo: \ - src/configs/skx2/$(am__dirstamp) \ - src/configs/skx2/$(DEPDIR)/$(am__dirstamp) src/configs/skx1/$(am__dirstamp): @$(MKDIR_P) src/configs/skx1 @: > src/configs/skx1/$(am__dirstamp) src/configs/skx1/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) src/configs/skx1/$(DEPDIR) @: > src/configs/skx1/$(DEPDIR)/$(am__dirstamp) -src/configs/skx1/lib_libskx1_la-config.lo: \ +src/configs/skx1/lib_libskx1_la-config_ker.lo: \ src/configs/skx1/$(am__dirstamp) \ src/configs/skx1/$(DEPDIR)/$(am__dirstamp) +src/configs/haswell/lib_libskx1_la-bli_gemm_asm_d6x8.lo: \ + src/configs/haswell/$(am__dirstamp) \ + src/configs/haswell/$(DEPDIR)/$(am__dirstamp) lib/libskx1.la: $(lib_libskx1_la_OBJECTS) $(lib_libskx1_la_DEPENDENCIES) $(EXTRA_lib_libskx1_la_DEPENDENCIES) lib/$(am__dirstamp) $(AM_V_CXXLD)$(lib_libskx1_la_LINK) $(am_lib_libskx1_la_rpath) $(lib_libskx1_la_OBJECTS) $(lib_libskx1_la_LIBADD) $(LIBS) +src/configs/skx2/$(am__dirstamp): + @$(MKDIR_P) src/configs/skx2 + @: > src/configs/skx2/$(am__dirstamp) +src/configs/skx2/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) src/configs/skx2/$(DEPDIR) + @: > src/configs/skx2/$(DEPDIR)/$(am__dirstamp) src/configs/skx2/lib_libskx2_la-bli_sgemm_opt_12x32_l2.lo: \ src/configs/skx2/$(am__dirstamp) \ src/configs/skx2/$(DEPDIR)/$(am__dirstamp) @@ -1450,7 +1477,7 @@ src/configs/skx2/lib_libskx2_la-bli_dgemm_opt_12x16_l2.lo: \ src/configs/skx2/lib_libskx2_la-vpu_count.lo: \ src/configs/skx2/$(am__dirstamp) \ src/configs/skx2/$(DEPDIR)/$(am__dirstamp) -src/configs/skx2/lib_libskx2_la-config.lo: \ +src/configs/skx2/lib_libskx2_la-config_ker.lo: \ src/configs/skx2/$(am__dirstamp) \ src/configs/skx2/$(DEPDIR)/$(am__dirstamp) @@ -1735,21 +1762,47 @@ src/util/random.lo: src/util/$(am__dirstamp) \ src/util/$(DEPDIR)/$(am__dirstamp) src/util/thread.lo: src/util/$(am__dirstamp) \ src/util/$(DEPDIR)/$(am__dirstamp) - -lib/libtblis.la: $(lib_libtblis_la_OBJECTS) $(lib_libtblis_la_DEPENDENCIES) $(EXTRA_lib_libtblis_la_DEPENDENCIES) lib/$(am__dirstamp) - $(AM_V_CXXLD)$(CXXLINK) -rpath $(libdir) $(lib_libtblis_la_OBJECTS) $(lib_libtblis_la_LIBADD) $(LIBS) -src/configs/haswell/lib_libzen_la-bli_gemm_asm_d6x8.lo: \ - src/configs/haswell/$(am__dirstamp) \ - src/configs/haswell/$(DEPDIR)/$(am__dirstamp) +src/configs/bulldozer/config.lo: \ + src/configs/bulldozer/$(am__dirstamp) \ + src/configs/bulldozer/$(DEPDIR)/$(am__dirstamp) +src/configs/piledriver/config.lo: \ + src/configs/piledriver/$(am__dirstamp) \ + src/configs/piledriver/$(DEPDIR)/$(am__dirstamp) +src/configs/excavator/config.lo: \ + src/configs/excavator/$(am__dirstamp) \ + src/configs/excavator/$(DEPDIR)/$(am__dirstamp) src/configs/zen/$(am__dirstamp): @$(MKDIR_P) src/configs/zen @: > src/configs/zen/$(am__dirstamp) src/configs/zen/$(DEPDIR)/$(am__dirstamp): @$(MKDIR_P) src/configs/zen/$(DEPDIR) @: > src/configs/zen/$(DEPDIR)/$(am__dirstamp) -src/configs/zen/lib_libzen_la-config.lo: \ +src/configs/zen/config.lo: src/configs/zen/$(am__dirstamp) \ + src/configs/zen/$(DEPDIR)/$(am__dirstamp) +src/configs/core2/config.lo: src/configs/core2/$(am__dirstamp) \ + src/configs/core2/$(DEPDIR)/$(am__dirstamp) +src/configs/sandybridge/config.lo: \ + src/configs/sandybridge/$(am__dirstamp) \ + src/configs/sandybridge/$(DEPDIR)/$(am__dirstamp) +src/configs/haswell/config.lo: src/configs/haswell/$(am__dirstamp) \ + src/configs/haswell/$(DEPDIR)/$(am__dirstamp) +src/configs/knl/config.lo: src/configs/knl/$(am__dirstamp) \ + src/configs/knl/$(DEPDIR)/$(am__dirstamp) +src/configs/skx1/config.lo: src/configs/skx1/$(am__dirstamp) \ + src/configs/skx1/$(DEPDIR)/$(am__dirstamp) +src/configs/skx2/vpu_count.lo: src/configs/skx2/$(am__dirstamp) \ + src/configs/skx2/$(DEPDIR)/$(am__dirstamp) +src/configs/skx2/config.lo: src/configs/skx2/$(am__dirstamp) \ + src/configs/skx2/$(DEPDIR)/$(am__dirstamp) + +lib/libtblis.la: $(lib_libtblis_la_OBJECTS) $(lib_libtblis_la_DEPENDENCIES) $(EXTRA_lib_libtblis_la_DEPENDENCIES) lib/$(am__dirstamp) + $(AM_V_CXXLD)$(CXXLINK) -rpath $(libdir) $(lib_libtblis_la_OBJECTS) $(lib_libtblis_la_LIBADD) $(LIBS) +src/configs/zen/lib_libzen_la-config_ker.lo: \ src/configs/zen/$(am__dirstamp) \ src/configs/zen/$(DEPDIR)/$(am__dirstamp) +src/configs/haswell/lib_libzen_la-bli_gemm_asm_d6x8.lo: \ + src/configs/haswell/$(am__dirstamp) \ + src/configs/haswell/$(DEPDIR)/$(am__dirstamp) lib/libzen.la: $(lib_libzen_la_OBJECTS) $(lib_libzen_la_DEPENDENCIES) $(EXTRA_lib_libzen_la_DEPENDENCIES) lib/$(am__dirstamp) $(AM_V_CXXLD)$(lib_libzen_la_LINK) $(am_lib_libzen_la_rpath) $(lib_libzen_la_OBJECTS) $(lib_libzen_la_LIBADD) $(LIBS) @@ -1906,34 +1959,44 @@ distclean-compile: -rm -f *.tab.c @AMDEP_TRUE@@am__include@ @am__quote@src/configs/$(DEPDIR)/configs.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/configs/bulldozer/$(DEPDIR)/config.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/configs/bulldozer/$(DEPDIR)/lib_libbulldozer_la-bli_gemm_asm_d4x6_fma4.Plo@am__quote@ # am--include-marker -@AMDEP_TRUE@@am__include@ @am__quote@src/configs/bulldozer/$(DEPDIR)/lib_libbulldozer_la-config.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/configs/bulldozer/$(DEPDIR)/lib_libbulldozer_la-config_ker.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/configs/core2/$(DEPDIR)/config.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/configs/core2/$(DEPDIR)/lib_libcore2_la-bli_gemm_asm_d4x4.Plo@am__quote@ # am--include-marker -@AMDEP_TRUE@@am__include@ @am__quote@src/configs/core2/$(DEPDIR)/lib_libcore2_la-config.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/configs/core2/$(DEPDIR)/lib_libcore2_la-config_ker.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/configs/excavator/$(DEPDIR)/config.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/configs/excavator/$(DEPDIR)/lib_libexcavator_la-bli_gemm_asm_d8x3.Plo@am__quote@ # am--include-marker -@AMDEP_TRUE@@am__include@ @am__quote@src/configs/excavator/$(DEPDIR)/lib_libexcavator_la-config.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/configs/excavator/$(DEPDIR)/lib_libexcavator_la-config_ker.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/configs/haswell/$(DEPDIR)/config.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/configs/haswell/$(DEPDIR)/lib_libhaswell_la-bli_gemm_asm_d6x8.Plo@am__quote@ # am--include-marker -@AMDEP_TRUE@@am__include@ @am__quote@src/configs/haswell/$(DEPDIR)/lib_libhaswell_la-config.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/configs/haswell/$(DEPDIR)/lib_libhaswell_la-config_ker.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/configs/haswell/$(DEPDIR)/lib_libskx1_la-bli_gemm_asm_d6x8.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/configs/haswell/$(DEPDIR)/lib_libzen_la-bli_gemm_asm_d6x8.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/configs/knl/$(DEPDIR)/config.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/configs/knl/$(DEPDIR)/lib_libknl_la-bli_dgemm_opt_24x8.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/configs/knl/$(DEPDIR)/lib_libknl_la-bli_dpackm_opt_24x8.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/configs/knl/$(DEPDIR)/lib_libknl_la-bli_dpackm_opt_30x8.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/configs/knl/$(DEPDIR)/lib_libknl_la-bli_sgemm_opt_24x16.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/configs/knl/$(DEPDIR)/lib_libknl_la-bli_spackm_opt_24x16.Plo@am__quote@ # am--include-marker -@AMDEP_TRUE@@am__include@ @am__quote@src/configs/knl/$(DEPDIR)/lib_libknl_la-config.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/configs/knl/$(DEPDIR)/lib_libknl_la-config_ker.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/configs/piledriver/$(DEPDIR)/config.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/configs/piledriver/$(DEPDIR)/lib_libpiledriver_la-bli_gemm_asm_d8x3.Plo@am__quote@ # am--include-marker -@AMDEP_TRUE@@am__include@ @am__quote@src/configs/piledriver/$(DEPDIR)/lib_libpiledriver_la-config.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/configs/piledriver/$(DEPDIR)/lib_libpiledriver_la-config_ker.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/configs/reference/$(DEPDIR)/lib_libreference_la-config.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/configs/sandybridge/$(DEPDIR)/config.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/configs/sandybridge/$(DEPDIR)/lib_libsandybridge_la-bli_gemm_asm_d8x4.Plo@am__quote@ # am--include-marker -@AMDEP_TRUE@@am__include@ @am__quote@src/configs/sandybridge/$(DEPDIR)/lib_libsandybridge_la-config.Plo@am__quote@ # am--include-marker -@AMDEP_TRUE@@am__include@ @am__quote@src/configs/skx1/$(DEPDIR)/lib_libskx1_la-config.Plo@am__quote@ # am--include-marker -@AMDEP_TRUE@@am__include@ @am__quote@src/configs/skx2/$(DEPDIR)/lib_libskx1_la-vpu_count.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/configs/sandybridge/$(DEPDIR)/lib_libsandybridge_la-config_ker.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/configs/skx1/$(DEPDIR)/config.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/configs/skx1/$(DEPDIR)/lib_libskx1_la-config_ker.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/configs/skx2/$(DEPDIR)/config.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/configs/skx2/$(DEPDIR)/lib_libskx2_la-bli_dgemm_opt_12x16_l2.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/configs/skx2/$(DEPDIR)/lib_libskx2_la-bli_sgemm_opt_12x32_l2.Plo@am__quote@ # am--include-marker -@AMDEP_TRUE@@am__include@ @am__quote@src/configs/skx2/$(DEPDIR)/lib_libskx2_la-config.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/configs/skx2/$(DEPDIR)/lib_libskx2_la-config_ker.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/configs/skx2/$(DEPDIR)/lib_libskx2_la-vpu_count.Plo@am__quote@ # am--include-marker -@AMDEP_TRUE@@am__include@ @am__quote@src/configs/zen/$(DEPDIR)/lib_libzen_la-config.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/configs/skx2/$(DEPDIR)/vpu_count.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/configs/zen/$(DEPDIR)/config.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@src/configs/zen/$(DEPDIR)/lib_libzen_la-config_ker.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/iface/1m/$(DEPDIR)/add.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/iface/1m/$(DEPDIR)/dot.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/iface/1m/$(DEPDIR)/reduce.Plo@am__quote@ # am--include-marker @@ -2183,47 +2246,47 @@ src/configs/haswell/lib_libzen_la-bli_gemm_asm_d6x8.lo: src/configs/haswell/bli_ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LTCXXCOMPILE) -c -o $@ $< -src/configs/bulldozer/lib_libbulldozer_la-config.lo: src/configs/bulldozer/config.cxx -@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_libbulldozer_la_CXXFLAGS) $(CXXFLAGS) -MT src/configs/bulldozer/lib_libbulldozer_la-config.lo -MD -MP -MF src/configs/bulldozer/$(DEPDIR)/lib_libbulldozer_la-config.Tpo -c -o src/configs/bulldozer/lib_libbulldozer_la-config.lo `test -f 'src/configs/bulldozer/config.cxx' || echo '$(srcdir)/'`src/configs/bulldozer/config.cxx -@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/configs/bulldozer/$(DEPDIR)/lib_libbulldozer_la-config.Tpo src/configs/bulldozer/$(DEPDIR)/lib_libbulldozer_la-config.Plo -@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/configs/bulldozer/config.cxx' object='src/configs/bulldozer/lib_libbulldozer_la-config.lo' libtool=yes @AMDEPBACKSLASH@ +src/configs/bulldozer/lib_libbulldozer_la-config_ker.lo: src/configs/bulldozer/config_ker.cxx +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_libbulldozer_la_CXXFLAGS) $(CXXFLAGS) -MT src/configs/bulldozer/lib_libbulldozer_la-config_ker.lo -MD -MP -MF src/configs/bulldozer/$(DEPDIR)/lib_libbulldozer_la-config_ker.Tpo -c -o src/configs/bulldozer/lib_libbulldozer_la-config_ker.lo `test -f 'src/configs/bulldozer/config_ker.cxx' || echo '$(srcdir)/'`src/configs/bulldozer/config_ker.cxx +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/configs/bulldozer/$(DEPDIR)/lib_libbulldozer_la-config_ker.Tpo src/configs/bulldozer/$(DEPDIR)/lib_libbulldozer_la-config_ker.Plo +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/configs/bulldozer/config_ker.cxx' object='src/configs/bulldozer/lib_libbulldozer_la-config_ker.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_libbulldozer_la_CXXFLAGS) $(CXXFLAGS) -c -o src/configs/bulldozer/lib_libbulldozer_la-config.lo `test -f 'src/configs/bulldozer/config.cxx' || echo '$(srcdir)/'`src/configs/bulldozer/config.cxx +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_libbulldozer_la_CXXFLAGS) $(CXXFLAGS) -c -o src/configs/bulldozer/lib_libbulldozer_la-config_ker.lo `test -f 'src/configs/bulldozer/config_ker.cxx' || echo '$(srcdir)/'`src/configs/bulldozer/config_ker.cxx -src/configs/core2/lib_libcore2_la-config.lo: src/configs/core2/config.cxx -@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_libcore2_la_CXXFLAGS) $(CXXFLAGS) -MT src/configs/core2/lib_libcore2_la-config.lo -MD -MP -MF src/configs/core2/$(DEPDIR)/lib_libcore2_la-config.Tpo -c -o src/configs/core2/lib_libcore2_la-config.lo `test -f 'src/configs/core2/config.cxx' || echo '$(srcdir)/'`src/configs/core2/config.cxx -@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/configs/core2/$(DEPDIR)/lib_libcore2_la-config.Tpo src/configs/core2/$(DEPDIR)/lib_libcore2_la-config.Plo -@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/configs/core2/config.cxx' object='src/configs/core2/lib_libcore2_la-config.lo' libtool=yes @AMDEPBACKSLASH@ +src/configs/core2/lib_libcore2_la-config_ker.lo: src/configs/core2/config_ker.cxx +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_libcore2_la_CXXFLAGS) $(CXXFLAGS) -MT src/configs/core2/lib_libcore2_la-config_ker.lo -MD -MP -MF src/configs/core2/$(DEPDIR)/lib_libcore2_la-config_ker.Tpo -c -o src/configs/core2/lib_libcore2_la-config_ker.lo `test -f 'src/configs/core2/config_ker.cxx' || echo '$(srcdir)/'`src/configs/core2/config_ker.cxx +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/configs/core2/$(DEPDIR)/lib_libcore2_la-config_ker.Tpo src/configs/core2/$(DEPDIR)/lib_libcore2_la-config_ker.Plo +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/configs/core2/config_ker.cxx' object='src/configs/core2/lib_libcore2_la-config_ker.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_libcore2_la_CXXFLAGS) $(CXXFLAGS) -c -o src/configs/core2/lib_libcore2_la-config.lo `test -f 'src/configs/core2/config.cxx' || echo '$(srcdir)/'`src/configs/core2/config.cxx +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_libcore2_la_CXXFLAGS) $(CXXFLAGS) -c -o src/configs/core2/lib_libcore2_la-config_ker.lo `test -f 'src/configs/core2/config_ker.cxx' || echo '$(srcdir)/'`src/configs/core2/config_ker.cxx -src/configs/excavator/lib_libexcavator_la-config.lo: src/configs/excavator/config.cxx -@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_libexcavator_la_CXXFLAGS) $(CXXFLAGS) -MT src/configs/excavator/lib_libexcavator_la-config.lo -MD -MP -MF src/configs/excavator/$(DEPDIR)/lib_libexcavator_la-config.Tpo -c -o src/configs/excavator/lib_libexcavator_la-config.lo `test -f 'src/configs/excavator/config.cxx' || echo '$(srcdir)/'`src/configs/excavator/config.cxx -@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/configs/excavator/$(DEPDIR)/lib_libexcavator_la-config.Tpo src/configs/excavator/$(DEPDIR)/lib_libexcavator_la-config.Plo -@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/configs/excavator/config.cxx' object='src/configs/excavator/lib_libexcavator_la-config.lo' libtool=yes @AMDEPBACKSLASH@ +src/configs/excavator/lib_libexcavator_la-config_ker.lo: src/configs/excavator/config_ker.cxx +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_libexcavator_la_CXXFLAGS) $(CXXFLAGS) -MT src/configs/excavator/lib_libexcavator_la-config_ker.lo -MD -MP -MF src/configs/excavator/$(DEPDIR)/lib_libexcavator_la-config_ker.Tpo -c -o src/configs/excavator/lib_libexcavator_la-config_ker.lo `test -f 'src/configs/excavator/config_ker.cxx' || echo '$(srcdir)/'`src/configs/excavator/config_ker.cxx +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/configs/excavator/$(DEPDIR)/lib_libexcavator_la-config_ker.Tpo src/configs/excavator/$(DEPDIR)/lib_libexcavator_la-config_ker.Plo +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/configs/excavator/config_ker.cxx' object='src/configs/excavator/lib_libexcavator_la-config_ker.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_libexcavator_la_CXXFLAGS) $(CXXFLAGS) -c -o src/configs/excavator/lib_libexcavator_la-config.lo `test -f 'src/configs/excavator/config.cxx' || echo '$(srcdir)/'`src/configs/excavator/config.cxx +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_libexcavator_la_CXXFLAGS) $(CXXFLAGS) -c -o src/configs/excavator/lib_libexcavator_la-config_ker.lo `test -f 'src/configs/excavator/config_ker.cxx' || echo '$(srcdir)/'`src/configs/excavator/config_ker.cxx -src/configs/haswell/lib_libhaswell_la-config.lo: src/configs/haswell/config.cxx -@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_libhaswell_la_CXXFLAGS) $(CXXFLAGS) -MT src/configs/haswell/lib_libhaswell_la-config.lo -MD -MP -MF src/configs/haswell/$(DEPDIR)/lib_libhaswell_la-config.Tpo -c -o src/configs/haswell/lib_libhaswell_la-config.lo `test -f 'src/configs/haswell/config.cxx' || echo '$(srcdir)/'`src/configs/haswell/config.cxx -@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/configs/haswell/$(DEPDIR)/lib_libhaswell_la-config.Tpo src/configs/haswell/$(DEPDIR)/lib_libhaswell_la-config.Plo -@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/configs/haswell/config.cxx' object='src/configs/haswell/lib_libhaswell_la-config.lo' libtool=yes @AMDEPBACKSLASH@ +src/configs/haswell/lib_libhaswell_la-config_ker.lo: src/configs/haswell/config_ker.cxx +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_libhaswell_la_CXXFLAGS) $(CXXFLAGS) -MT src/configs/haswell/lib_libhaswell_la-config_ker.lo -MD -MP -MF src/configs/haswell/$(DEPDIR)/lib_libhaswell_la-config_ker.Tpo -c -o src/configs/haswell/lib_libhaswell_la-config_ker.lo `test -f 'src/configs/haswell/config_ker.cxx' || echo '$(srcdir)/'`src/configs/haswell/config_ker.cxx +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/configs/haswell/$(DEPDIR)/lib_libhaswell_la-config_ker.Tpo src/configs/haswell/$(DEPDIR)/lib_libhaswell_la-config_ker.Plo +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/configs/haswell/config_ker.cxx' object='src/configs/haswell/lib_libhaswell_la-config_ker.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_libhaswell_la_CXXFLAGS) $(CXXFLAGS) -c -o src/configs/haswell/lib_libhaswell_la-config.lo `test -f 'src/configs/haswell/config.cxx' || echo '$(srcdir)/'`src/configs/haswell/config.cxx +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_libhaswell_la_CXXFLAGS) $(CXXFLAGS) -c -o src/configs/haswell/lib_libhaswell_la-config_ker.lo `test -f 'src/configs/haswell/config_ker.cxx' || echo '$(srcdir)/'`src/configs/haswell/config_ker.cxx -src/configs/knl/lib_libknl_la-config.lo: src/configs/knl/config.cxx -@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_libknl_la_CXXFLAGS) $(CXXFLAGS) -MT src/configs/knl/lib_libknl_la-config.lo -MD -MP -MF src/configs/knl/$(DEPDIR)/lib_libknl_la-config.Tpo -c -o src/configs/knl/lib_libknl_la-config.lo `test -f 'src/configs/knl/config.cxx' || echo '$(srcdir)/'`src/configs/knl/config.cxx -@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/configs/knl/$(DEPDIR)/lib_libknl_la-config.Tpo src/configs/knl/$(DEPDIR)/lib_libknl_la-config.Plo -@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/configs/knl/config.cxx' object='src/configs/knl/lib_libknl_la-config.lo' libtool=yes @AMDEPBACKSLASH@ +src/configs/knl/lib_libknl_la-config_ker.lo: src/configs/knl/config_ker.cxx +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_libknl_la_CXXFLAGS) $(CXXFLAGS) -MT src/configs/knl/lib_libknl_la-config_ker.lo -MD -MP -MF src/configs/knl/$(DEPDIR)/lib_libknl_la-config_ker.Tpo -c -o src/configs/knl/lib_libknl_la-config_ker.lo `test -f 'src/configs/knl/config_ker.cxx' || echo '$(srcdir)/'`src/configs/knl/config_ker.cxx +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/configs/knl/$(DEPDIR)/lib_libknl_la-config_ker.Tpo src/configs/knl/$(DEPDIR)/lib_libknl_la-config_ker.Plo +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/configs/knl/config_ker.cxx' object='src/configs/knl/lib_libknl_la-config_ker.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_libknl_la_CXXFLAGS) $(CXXFLAGS) -c -o src/configs/knl/lib_libknl_la-config.lo `test -f 'src/configs/knl/config.cxx' || echo '$(srcdir)/'`src/configs/knl/config.cxx +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_libknl_la_CXXFLAGS) $(CXXFLAGS) -c -o src/configs/knl/lib_libknl_la-config_ker.lo `test -f 'src/configs/knl/config_ker.cxx' || echo '$(srcdir)/'`src/configs/knl/config_ker.cxx -src/configs/piledriver/lib_libpiledriver_la-config.lo: src/configs/piledriver/config.cxx -@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_libpiledriver_la_CXXFLAGS) $(CXXFLAGS) -MT src/configs/piledriver/lib_libpiledriver_la-config.lo -MD -MP -MF src/configs/piledriver/$(DEPDIR)/lib_libpiledriver_la-config.Tpo -c -o src/configs/piledriver/lib_libpiledriver_la-config.lo `test -f 'src/configs/piledriver/config.cxx' || echo '$(srcdir)/'`src/configs/piledriver/config.cxx -@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/configs/piledriver/$(DEPDIR)/lib_libpiledriver_la-config.Tpo src/configs/piledriver/$(DEPDIR)/lib_libpiledriver_la-config.Plo -@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/configs/piledriver/config.cxx' object='src/configs/piledriver/lib_libpiledriver_la-config.lo' libtool=yes @AMDEPBACKSLASH@ +src/configs/piledriver/lib_libpiledriver_la-config_ker.lo: src/configs/piledriver/config_ker.cxx +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_libpiledriver_la_CXXFLAGS) $(CXXFLAGS) -MT src/configs/piledriver/lib_libpiledriver_la-config_ker.lo -MD -MP -MF src/configs/piledriver/$(DEPDIR)/lib_libpiledriver_la-config_ker.Tpo -c -o src/configs/piledriver/lib_libpiledriver_la-config_ker.lo `test -f 'src/configs/piledriver/config_ker.cxx' || echo '$(srcdir)/'`src/configs/piledriver/config_ker.cxx +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/configs/piledriver/$(DEPDIR)/lib_libpiledriver_la-config_ker.Tpo src/configs/piledriver/$(DEPDIR)/lib_libpiledriver_la-config_ker.Plo +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/configs/piledriver/config_ker.cxx' object='src/configs/piledriver/lib_libpiledriver_la-config_ker.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_libpiledriver_la_CXXFLAGS) $(CXXFLAGS) -c -o src/configs/piledriver/lib_libpiledriver_la-config.lo `test -f 'src/configs/piledriver/config.cxx' || echo '$(srcdir)/'`src/configs/piledriver/config.cxx +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_libpiledriver_la_CXXFLAGS) $(CXXFLAGS) -c -o src/configs/piledriver/lib_libpiledriver_la-config_ker.lo `test -f 'src/configs/piledriver/config_ker.cxx' || echo '$(srcdir)/'`src/configs/piledriver/config_ker.cxx src/configs/reference/lib_libreference_la-config.lo: src/configs/reference/config.cxx @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_libreference_la_CXXFLAGS) $(CXXFLAGS) -MT src/configs/reference/lib_libreference_la-config.lo -MD -MP -MF src/configs/reference/$(DEPDIR)/lib_libreference_la-config.Tpo -c -o src/configs/reference/lib_libreference_la-config.lo `test -f 'src/configs/reference/config.cxx' || echo '$(srcdir)/'`src/configs/reference/config.cxx @@ -2232,26 +2295,19 @@ src/configs/reference/lib_libreference_la-config.lo: src/configs/reference/confi @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_libreference_la_CXXFLAGS) $(CXXFLAGS) -c -o src/configs/reference/lib_libreference_la-config.lo `test -f 'src/configs/reference/config.cxx' || echo '$(srcdir)/'`src/configs/reference/config.cxx -src/configs/sandybridge/lib_libsandybridge_la-config.lo: src/configs/sandybridge/config.cxx -@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_libsandybridge_la_CXXFLAGS) $(CXXFLAGS) -MT src/configs/sandybridge/lib_libsandybridge_la-config.lo -MD -MP -MF src/configs/sandybridge/$(DEPDIR)/lib_libsandybridge_la-config.Tpo -c -o src/configs/sandybridge/lib_libsandybridge_la-config.lo `test -f 'src/configs/sandybridge/config.cxx' || echo '$(srcdir)/'`src/configs/sandybridge/config.cxx -@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/configs/sandybridge/$(DEPDIR)/lib_libsandybridge_la-config.Tpo src/configs/sandybridge/$(DEPDIR)/lib_libsandybridge_la-config.Plo -@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/configs/sandybridge/config.cxx' object='src/configs/sandybridge/lib_libsandybridge_la-config.lo' libtool=yes @AMDEPBACKSLASH@ -@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_libsandybridge_la_CXXFLAGS) $(CXXFLAGS) -c -o src/configs/sandybridge/lib_libsandybridge_la-config.lo `test -f 'src/configs/sandybridge/config.cxx' || echo '$(srcdir)/'`src/configs/sandybridge/config.cxx - -src/configs/skx2/lib_libskx1_la-vpu_count.lo: src/configs/skx2/vpu_count.cxx -@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_libskx1_la_CXXFLAGS) $(CXXFLAGS) -MT src/configs/skx2/lib_libskx1_la-vpu_count.lo -MD -MP -MF src/configs/skx2/$(DEPDIR)/lib_libskx1_la-vpu_count.Tpo -c -o src/configs/skx2/lib_libskx1_la-vpu_count.lo `test -f 'src/configs/skx2/vpu_count.cxx' || echo '$(srcdir)/'`src/configs/skx2/vpu_count.cxx -@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/configs/skx2/$(DEPDIR)/lib_libskx1_la-vpu_count.Tpo src/configs/skx2/$(DEPDIR)/lib_libskx1_la-vpu_count.Plo -@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/configs/skx2/vpu_count.cxx' object='src/configs/skx2/lib_libskx1_la-vpu_count.lo' libtool=yes @AMDEPBACKSLASH@ +src/configs/sandybridge/lib_libsandybridge_la-config_ker.lo: src/configs/sandybridge/config_ker.cxx +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_libsandybridge_la_CXXFLAGS) $(CXXFLAGS) -MT src/configs/sandybridge/lib_libsandybridge_la-config_ker.lo -MD -MP -MF src/configs/sandybridge/$(DEPDIR)/lib_libsandybridge_la-config_ker.Tpo -c -o src/configs/sandybridge/lib_libsandybridge_la-config_ker.lo `test -f 'src/configs/sandybridge/config_ker.cxx' || echo '$(srcdir)/'`src/configs/sandybridge/config_ker.cxx +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/configs/sandybridge/$(DEPDIR)/lib_libsandybridge_la-config_ker.Tpo src/configs/sandybridge/$(DEPDIR)/lib_libsandybridge_la-config_ker.Plo +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/configs/sandybridge/config_ker.cxx' object='src/configs/sandybridge/lib_libsandybridge_la-config_ker.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_libskx1_la_CXXFLAGS) $(CXXFLAGS) -c -o src/configs/skx2/lib_libskx1_la-vpu_count.lo `test -f 'src/configs/skx2/vpu_count.cxx' || echo '$(srcdir)/'`src/configs/skx2/vpu_count.cxx +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_libsandybridge_la_CXXFLAGS) $(CXXFLAGS) -c -o src/configs/sandybridge/lib_libsandybridge_la-config_ker.lo `test -f 'src/configs/sandybridge/config_ker.cxx' || echo '$(srcdir)/'`src/configs/sandybridge/config_ker.cxx -src/configs/skx1/lib_libskx1_la-config.lo: src/configs/skx1/config.cxx -@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_libskx1_la_CXXFLAGS) $(CXXFLAGS) -MT src/configs/skx1/lib_libskx1_la-config.lo -MD -MP -MF src/configs/skx1/$(DEPDIR)/lib_libskx1_la-config.Tpo -c -o src/configs/skx1/lib_libskx1_la-config.lo `test -f 'src/configs/skx1/config.cxx' || echo '$(srcdir)/'`src/configs/skx1/config.cxx -@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/configs/skx1/$(DEPDIR)/lib_libskx1_la-config.Tpo src/configs/skx1/$(DEPDIR)/lib_libskx1_la-config.Plo -@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/configs/skx1/config.cxx' object='src/configs/skx1/lib_libskx1_la-config.lo' libtool=yes @AMDEPBACKSLASH@ +src/configs/skx1/lib_libskx1_la-config_ker.lo: src/configs/skx1/config_ker.cxx +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_libskx1_la_CXXFLAGS) $(CXXFLAGS) -MT src/configs/skx1/lib_libskx1_la-config_ker.lo -MD -MP -MF src/configs/skx1/$(DEPDIR)/lib_libskx1_la-config_ker.Tpo -c -o src/configs/skx1/lib_libskx1_la-config_ker.lo `test -f 'src/configs/skx1/config_ker.cxx' || echo '$(srcdir)/'`src/configs/skx1/config_ker.cxx +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/configs/skx1/$(DEPDIR)/lib_libskx1_la-config_ker.Tpo src/configs/skx1/$(DEPDIR)/lib_libskx1_la-config_ker.Plo +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/configs/skx1/config_ker.cxx' object='src/configs/skx1/lib_libskx1_la-config_ker.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_libskx1_la_CXXFLAGS) $(CXXFLAGS) -c -o src/configs/skx1/lib_libskx1_la-config.lo `test -f 'src/configs/skx1/config.cxx' || echo '$(srcdir)/'`src/configs/skx1/config.cxx +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_libskx1_la_CXXFLAGS) $(CXXFLAGS) -c -o src/configs/skx1/lib_libskx1_la-config_ker.lo `test -f 'src/configs/skx1/config_ker.cxx' || echo '$(srcdir)/'`src/configs/skx1/config_ker.cxx src/configs/skx2/lib_libskx2_la-vpu_count.lo: src/configs/skx2/vpu_count.cxx @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_libskx2_la_CXXFLAGS) $(CXXFLAGS) -MT src/configs/skx2/lib_libskx2_la-vpu_count.lo -MD -MP -MF src/configs/skx2/$(DEPDIR)/lib_libskx2_la-vpu_count.Tpo -c -o src/configs/skx2/lib_libskx2_la-vpu_count.lo `test -f 'src/configs/skx2/vpu_count.cxx' || echo '$(srcdir)/'`src/configs/skx2/vpu_count.cxx @@ -2260,19 +2316,19 @@ src/configs/skx2/lib_libskx2_la-vpu_count.lo: src/configs/skx2/vpu_count.cxx @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_libskx2_la_CXXFLAGS) $(CXXFLAGS) -c -o src/configs/skx2/lib_libskx2_la-vpu_count.lo `test -f 'src/configs/skx2/vpu_count.cxx' || echo '$(srcdir)/'`src/configs/skx2/vpu_count.cxx -src/configs/skx2/lib_libskx2_la-config.lo: src/configs/skx2/config.cxx -@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_libskx2_la_CXXFLAGS) $(CXXFLAGS) -MT src/configs/skx2/lib_libskx2_la-config.lo -MD -MP -MF src/configs/skx2/$(DEPDIR)/lib_libskx2_la-config.Tpo -c -o src/configs/skx2/lib_libskx2_la-config.lo `test -f 'src/configs/skx2/config.cxx' || echo '$(srcdir)/'`src/configs/skx2/config.cxx -@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/configs/skx2/$(DEPDIR)/lib_libskx2_la-config.Tpo src/configs/skx2/$(DEPDIR)/lib_libskx2_la-config.Plo -@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/configs/skx2/config.cxx' object='src/configs/skx2/lib_libskx2_la-config.lo' libtool=yes @AMDEPBACKSLASH@ +src/configs/skx2/lib_libskx2_la-config_ker.lo: src/configs/skx2/config_ker.cxx +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_libskx2_la_CXXFLAGS) $(CXXFLAGS) -MT src/configs/skx2/lib_libskx2_la-config_ker.lo -MD -MP -MF src/configs/skx2/$(DEPDIR)/lib_libskx2_la-config_ker.Tpo -c -o src/configs/skx2/lib_libskx2_la-config_ker.lo `test -f 'src/configs/skx2/config_ker.cxx' || echo '$(srcdir)/'`src/configs/skx2/config_ker.cxx +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/configs/skx2/$(DEPDIR)/lib_libskx2_la-config_ker.Tpo src/configs/skx2/$(DEPDIR)/lib_libskx2_la-config_ker.Plo +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/configs/skx2/config_ker.cxx' object='src/configs/skx2/lib_libskx2_la-config_ker.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_libskx2_la_CXXFLAGS) $(CXXFLAGS) -c -o src/configs/skx2/lib_libskx2_la-config.lo `test -f 'src/configs/skx2/config.cxx' || echo '$(srcdir)/'`src/configs/skx2/config.cxx +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_libskx2_la_CXXFLAGS) $(CXXFLAGS) -c -o src/configs/skx2/lib_libskx2_la-config_ker.lo `test -f 'src/configs/skx2/config_ker.cxx' || echo '$(srcdir)/'`src/configs/skx2/config_ker.cxx -src/configs/zen/lib_libzen_la-config.lo: src/configs/zen/config.cxx -@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_libzen_la_CXXFLAGS) $(CXXFLAGS) -MT src/configs/zen/lib_libzen_la-config.lo -MD -MP -MF src/configs/zen/$(DEPDIR)/lib_libzen_la-config.Tpo -c -o src/configs/zen/lib_libzen_la-config.lo `test -f 'src/configs/zen/config.cxx' || echo '$(srcdir)/'`src/configs/zen/config.cxx -@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/configs/zen/$(DEPDIR)/lib_libzen_la-config.Tpo src/configs/zen/$(DEPDIR)/lib_libzen_la-config.Plo -@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/configs/zen/config.cxx' object='src/configs/zen/lib_libzen_la-config.lo' libtool=yes @AMDEPBACKSLASH@ +src/configs/zen/lib_libzen_la-config_ker.lo: src/configs/zen/config_ker.cxx +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_libzen_la_CXXFLAGS) $(CXXFLAGS) -MT src/configs/zen/lib_libzen_la-config_ker.lo -MD -MP -MF src/configs/zen/$(DEPDIR)/lib_libzen_la-config_ker.Tpo -c -o src/configs/zen/lib_libzen_la-config_ker.lo `test -f 'src/configs/zen/config_ker.cxx' || echo '$(srcdir)/'`src/configs/zen/config_ker.cxx +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/configs/zen/$(DEPDIR)/lib_libzen_la-config_ker.Tpo src/configs/zen/$(DEPDIR)/lib_libzen_la-config_ker.Plo +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/configs/zen/config_ker.cxx' object='src/configs/zen/lib_libzen_la-config_ker.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_libzen_la_CXXFLAGS) $(CXXFLAGS) -c -o src/configs/zen/lib_libzen_la-config.lo `test -f 'src/configs/zen/config.cxx' || echo '$(srcdir)/'`src/configs/zen/config.cxx +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_libzen_la_CXXFLAGS) $(CXXFLAGS) -c -o src/configs/zen/lib_libzen_la-config_ker.lo `test -f 'src/configs/zen/config_ker.cxx' || echo '$(srcdir)/'`src/configs/zen/config_ker.cxx mostlyclean-libtool: -rm -f *.lo @@ -2942,34 +2998,44 @@ clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \ distclean: distclean-recursive -rm -f $(am__CONFIG_DISTCLEAN_FILES) -rm -f src/configs/$(DEPDIR)/configs.Plo + -rm -f src/configs/bulldozer/$(DEPDIR)/config.Plo -rm -f src/configs/bulldozer/$(DEPDIR)/lib_libbulldozer_la-bli_gemm_asm_d4x6_fma4.Plo - -rm -f src/configs/bulldozer/$(DEPDIR)/lib_libbulldozer_la-config.Plo + -rm -f src/configs/bulldozer/$(DEPDIR)/lib_libbulldozer_la-config_ker.Plo + -rm -f src/configs/core2/$(DEPDIR)/config.Plo -rm -f src/configs/core2/$(DEPDIR)/lib_libcore2_la-bli_gemm_asm_d4x4.Plo - -rm -f src/configs/core2/$(DEPDIR)/lib_libcore2_la-config.Plo + -rm -f src/configs/core2/$(DEPDIR)/lib_libcore2_la-config_ker.Plo + -rm -f src/configs/excavator/$(DEPDIR)/config.Plo -rm -f src/configs/excavator/$(DEPDIR)/lib_libexcavator_la-bli_gemm_asm_d8x3.Plo - -rm -f src/configs/excavator/$(DEPDIR)/lib_libexcavator_la-config.Plo + -rm -f src/configs/excavator/$(DEPDIR)/lib_libexcavator_la-config_ker.Plo + -rm -f src/configs/haswell/$(DEPDIR)/config.Plo -rm -f src/configs/haswell/$(DEPDIR)/lib_libhaswell_la-bli_gemm_asm_d6x8.Plo - -rm -f src/configs/haswell/$(DEPDIR)/lib_libhaswell_la-config.Plo + -rm -f src/configs/haswell/$(DEPDIR)/lib_libhaswell_la-config_ker.Plo -rm -f src/configs/haswell/$(DEPDIR)/lib_libskx1_la-bli_gemm_asm_d6x8.Plo -rm -f src/configs/haswell/$(DEPDIR)/lib_libzen_la-bli_gemm_asm_d6x8.Plo + -rm -f src/configs/knl/$(DEPDIR)/config.Plo -rm -f src/configs/knl/$(DEPDIR)/lib_libknl_la-bli_dgemm_opt_24x8.Plo -rm -f src/configs/knl/$(DEPDIR)/lib_libknl_la-bli_dpackm_opt_24x8.Plo -rm -f src/configs/knl/$(DEPDIR)/lib_libknl_la-bli_dpackm_opt_30x8.Plo -rm -f src/configs/knl/$(DEPDIR)/lib_libknl_la-bli_sgemm_opt_24x16.Plo -rm -f src/configs/knl/$(DEPDIR)/lib_libknl_la-bli_spackm_opt_24x16.Plo - -rm -f src/configs/knl/$(DEPDIR)/lib_libknl_la-config.Plo + -rm -f src/configs/knl/$(DEPDIR)/lib_libknl_la-config_ker.Plo + -rm -f src/configs/piledriver/$(DEPDIR)/config.Plo -rm -f src/configs/piledriver/$(DEPDIR)/lib_libpiledriver_la-bli_gemm_asm_d8x3.Plo - -rm -f src/configs/piledriver/$(DEPDIR)/lib_libpiledriver_la-config.Plo + -rm -f src/configs/piledriver/$(DEPDIR)/lib_libpiledriver_la-config_ker.Plo -rm -f src/configs/reference/$(DEPDIR)/lib_libreference_la-config.Plo + -rm -f src/configs/sandybridge/$(DEPDIR)/config.Plo -rm -f src/configs/sandybridge/$(DEPDIR)/lib_libsandybridge_la-bli_gemm_asm_d8x4.Plo - -rm -f src/configs/sandybridge/$(DEPDIR)/lib_libsandybridge_la-config.Plo - -rm -f src/configs/skx1/$(DEPDIR)/lib_libskx1_la-config.Plo - -rm -f src/configs/skx2/$(DEPDIR)/lib_libskx1_la-vpu_count.Plo + -rm -f src/configs/sandybridge/$(DEPDIR)/lib_libsandybridge_la-config_ker.Plo + -rm -f src/configs/skx1/$(DEPDIR)/config.Plo + -rm -f src/configs/skx1/$(DEPDIR)/lib_libskx1_la-config_ker.Plo + -rm -f src/configs/skx2/$(DEPDIR)/config.Plo -rm -f src/configs/skx2/$(DEPDIR)/lib_libskx2_la-bli_dgemm_opt_12x16_l2.Plo -rm -f src/configs/skx2/$(DEPDIR)/lib_libskx2_la-bli_sgemm_opt_12x32_l2.Plo - -rm -f src/configs/skx2/$(DEPDIR)/lib_libskx2_la-config.Plo + -rm -f src/configs/skx2/$(DEPDIR)/lib_libskx2_la-config_ker.Plo -rm -f src/configs/skx2/$(DEPDIR)/lib_libskx2_la-vpu_count.Plo - -rm -f src/configs/zen/$(DEPDIR)/lib_libzen_la-config.Plo + -rm -f src/configs/skx2/$(DEPDIR)/vpu_count.Plo + -rm -f src/configs/zen/$(DEPDIR)/config.Plo + -rm -f src/configs/zen/$(DEPDIR)/lib_libzen_la-config_ker.Plo -rm -f src/iface/1m/$(DEPDIR)/add.Plo -rm -f src/iface/1m/$(DEPDIR)/dot.Plo -rm -f src/iface/1m/$(DEPDIR)/reduce.Plo @@ -3112,34 +3178,44 @@ maintainer-clean: maintainer-clean-recursive -rm -f $(am__CONFIG_DISTCLEAN_FILES) -rm -rf $(top_srcdir)/autom4te.cache -rm -f src/configs/$(DEPDIR)/configs.Plo + -rm -f src/configs/bulldozer/$(DEPDIR)/config.Plo -rm -f src/configs/bulldozer/$(DEPDIR)/lib_libbulldozer_la-bli_gemm_asm_d4x6_fma4.Plo - -rm -f src/configs/bulldozer/$(DEPDIR)/lib_libbulldozer_la-config.Plo + -rm -f src/configs/bulldozer/$(DEPDIR)/lib_libbulldozer_la-config_ker.Plo + -rm -f src/configs/core2/$(DEPDIR)/config.Plo -rm -f src/configs/core2/$(DEPDIR)/lib_libcore2_la-bli_gemm_asm_d4x4.Plo - -rm -f src/configs/core2/$(DEPDIR)/lib_libcore2_la-config.Plo + -rm -f src/configs/core2/$(DEPDIR)/lib_libcore2_la-config_ker.Plo + -rm -f src/configs/excavator/$(DEPDIR)/config.Plo -rm -f src/configs/excavator/$(DEPDIR)/lib_libexcavator_la-bli_gemm_asm_d8x3.Plo - -rm -f src/configs/excavator/$(DEPDIR)/lib_libexcavator_la-config.Plo + -rm -f src/configs/excavator/$(DEPDIR)/lib_libexcavator_la-config_ker.Plo + -rm -f src/configs/haswell/$(DEPDIR)/config.Plo -rm -f src/configs/haswell/$(DEPDIR)/lib_libhaswell_la-bli_gemm_asm_d6x8.Plo - -rm -f src/configs/haswell/$(DEPDIR)/lib_libhaswell_la-config.Plo + -rm -f src/configs/haswell/$(DEPDIR)/lib_libhaswell_la-config_ker.Plo -rm -f src/configs/haswell/$(DEPDIR)/lib_libskx1_la-bli_gemm_asm_d6x8.Plo -rm -f src/configs/haswell/$(DEPDIR)/lib_libzen_la-bli_gemm_asm_d6x8.Plo + -rm -f src/configs/knl/$(DEPDIR)/config.Plo -rm -f src/configs/knl/$(DEPDIR)/lib_libknl_la-bli_dgemm_opt_24x8.Plo -rm -f src/configs/knl/$(DEPDIR)/lib_libknl_la-bli_dpackm_opt_24x8.Plo -rm -f src/configs/knl/$(DEPDIR)/lib_libknl_la-bli_dpackm_opt_30x8.Plo -rm -f src/configs/knl/$(DEPDIR)/lib_libknl_la-bli_sgemm_opt_24x16.Plo -rm -f src/configs/knl/$(DEPDIR)/lib_libknl_la-bli_spackm_opt_24x16.Plo - -rm -f src/configs/knl/$(DEPDIR)/lib_libknl_la-config.Plo + -rm -f src/configs/knl/$(DEPDIR)/lib_libknl_la-config_ker.Plo + -rm -f src/configs/piledriver/$(DEPDIR)/config.Plo -rm -f src/configs/piledriver/$(DEPDIR)/lib_libpiledriver_la-bli_gemm_asm_d8x3.Plo - -rm -f src/configs/piledriver/$(DEPDIR)/lib_libpiledriver_la-config.Plo + -rm -f src/configs/piledriver/$(DEPDIR)/lib_libpiledriver_la-config_ker.Plo -rm -f src/configs/reference/$(DEPDIR)/lib_libreference_la-config.Plo + -rm -f src/configs/sandybridge/$(DEPDIR)/config.Plo -rm -f src/configs/sandybridge/$(DEPDIR)/lib_libsandybridge_la-bli_gemm_asm_d8x4.Plo - -rm -f src/configs/sandybridge/$(DEPDIR)/lib_libsandybridge_la-config.Plo - -rm -f src/configs/skx1/$(DEPDIR)/lib_libskx1_la-config.Plo - -rm -f src/configs/skx2/$(DEPDIR)/lib_libskx1_la-vpu_count.Plo + -rm -f src/configs/sandybridge/$(DEPDIR)/lib_libsandybridge_la-config_ker.Plo + -rm -f src/configs/skx1/$(DEPDIR)/config.Plo + -rm -f src/configs/skx1/$(DEPDIR)/lib_libskx1_la-config_ker.Plo + -rm -f src/configs/skx2/$(DEPDIR)/config.Plo -rm -f src/configs/skx2/$(DEPDIR)/lib_libskx2_la-bli_dgemm_opt_12x16_l2.Plo -rm -f src/configs/skx2/$(DEPDIR)/lib_libskx2_la-bli_sgemm_opt_12x32_l2.Plo - -rm -f src/configs/skx2/$(DEPDIR)/lib_libskx2_la-config.Plo + -rm -f src/configs/skx2/$(DEPDIR)/lib_libskx2_la-config_ker.Plo -rm -f src/configs/skx2/$(DEPDIR)/lib_libskx2_la-vpu_count.Plo - -rm -f src/configs/zen/$(DEPDIR)/lib_libzen_la-config.Plo + -rm -f src/configs/skx2/$(DEPDIR)/vpu_count.Plo + -rm -f src/configs/zen/$(DEPDIR)/config.Plo + -rm -f src/configs/zen/$(DEPDIR)/lib_libzen_la-config_ker.Plo -rm -f src/iface/1m/$(DEPDIR)/add.Plo -rm -f src/iface/1m/$(DEPDIR)/dot.Plo -rm -f src/iface/1m/$(DEPDIR)/reduce.Plo diff --git a/src/configs/bulldozer/config.cxx b/src/configs/bulldozer/config.cxx index ad35728e3..0b9b56ebe 100644 --- a/src/configs/bulldozer/config.cxx +++ b/src/configs/bulldozer/config.cxx @@ -42,6 +42,4 @@ int bulldozer_check() return 1; } -TBLIS_CONFIG_INSTANTIATE(bulldozer); - } diff --git a/src/configs/bulldozer/config_ker.cxx b/src/configs/bulldozer/config_ker.cxx new file mode 100644 index 000000000..0f538ea7c --- /dev/null +++ b/src/configs/bulldozer/config_ker.cxx @@ -0,0 +1,9 @@ +#include "config.hpp" +#include "util/cpuid.hpp" + +namespace tblis +{ + +TBLIS_CONFIG_INSTANTIATE(bulldozer); + +} diff --git a/src/configs/core2/config.cxx b/src/configs/core2/config.cxx index cb48c25fc..2e985e6fc 100644 --- a/src/configs/core2/config.cxx +++ b/src/configs/core2/config.cxx @@ -30,6 +30,4 @@ int core2_check() return 1; } -TBLIS_CONFIG_INSTANTIATE(core2); - } diff --git a/src/configs/core2/config_ker.cxx b/src/configs/core2/config_ker.cxx new file mode 100644 index 000000000..6da83de59 --- /dev/null +++ b/src/configs/core2/config_ker.cxx @@ -0,0 +1,9 @@ +#include "config.hpp" +#include "util/cpuid.hpp" + +namespace tblis +{ + +TBLIS_CONFIG_INSTANTIATE(core2); + +} diff --git a/src/configs/excavator/config.cxx b/src/configs/excavator/config.cxx index b80315fa3..0fed25ca4 100644 --- a/src/configs/excavator/config.cxx +++ b/src/configs/excavator/config.cxx @@ -48,6 +48,4 @@ int excavator_check() return 4; } -TBLIS_CONFIG_INSTANTIATE(excavator); - } diff --git a/src/configs/excavator/config_ker.cxx b/src/configs/excavator/config_ker.cxx new file mode 100644 index 000000000..e93a5ece5 --- /dev/null +++ b/src/configs/excavator/config_ker.cxx @@ -0,0 +1,9 @@ +#include "config.hpp" +#include "util/cpuid.hpp" + +namespace tblis +{ + +TBLIS_CONFIG_INSTANTIATE(excavator); + +} diff --git a/src/configs/haswell/config.cxx b/src/configs/haswell/config.cxx index f0e08358c..1bd597a5d 100644 --- a/src/configs/haswell/config.cxx +++ b/src/configs/haswell/config.cxx @@ -10,13 +10,13 @@ int haswell_check() int vendor = get_cpu_type(family, model, features); if (vendor != VENDOR_INTEL) - { + { if (get_verbose() >= 1) printf("tblis: haswell: Wrong vendor.\n"); return -1; } if (!check_features(features, FEATURE_AVX)) - { + { if (get_verbose() >= 1) printf("tblis: haswell: Doesn't support AVX.\n"); return -1; } @@ -32,13 +32,8 @@ int haswell_check() if (get_verbose() >= 1) printf("tblis: haswell: Doesn't support AVX2.\n"); return -1; } - + return 3; } -//TBLIS_CONFIG_INSTANTIATE(haswell_d12x4); -//TBLIS_CONFIG_INSTANTIATE(haswell_d4x12); -//TBLIS_CONFIG_INSTANTIATE(haswell_d8x6); -TBLIS_CONFIG_INSTANTIATE(haswell_d6x8); - } diff --git a/src/configs/haswell/config_ker.cxx b/src/configs/haswell/config_ker.cxx new file mode 100644 index 000000000..bc86c214b --- /dev/null +++ b/src/configs/haswell/config_ker.cxx @@ -0,0 +1,12 @@ +#include "util/cpuid.hpp" +#include "config.hpp" + +namespace tblis +{ + +//TBLIS_CONFIG_INSTANTIATE(haswell_d12x4); +//TBLIS_CONFIG_INSTANTIATE(haswell_d4x12); +//TBLIS_CONFIG_INSTANTIATE(haswell_d8x6); +TBLIS_CONFIG_INSTANTIATE(haswell_d6x8); + +} diff --git a/src/configs/knl/config.cxx b/src/configs/knl/config.cxx index f043bca62..97cdcdfbc 100644 --- a/src/configs/knl/config.cxx +++ b/src/configs/knl/config.cxx @@ -1,129 +1,28 @@ #include "util/cpuid.hpp" #include "config.hpp" -#include "blis.h" - -template -using bli_packm_t = void(*)(conj_t conja, len_type n, const T* kappa, - const T* a, stride_type rs_a, stride_type cs_a, - T* p, stride_type cs_p); - -template -using bli_packm_func = typename std::remove_pointer>::type; - -extern "C" bli_packm_func bli_dpackm_30xk_opt; -extern "C" bli_packm_func bli_dpackm_24xk_opt; -extern "C" bli_packm_func bli_dpackm_8xk_opt; -extern "C" bli_packm_func bli_spackm_24xk_opt; -extern "C" bli_packm_func bli_spackm_16xk_opt; - namespace tblis { -void knl_dpackm_30xk(len_type m, len_type k, - const double* p_a, stride_type rs_a, stride_type cs_a, - double* p_ap) -{ - constexpr double one = 1.0; - - if (m == 30) - { - bli_dpackm_30xk_opt(BLIS_NO_CONJUGATE, k, &one, p_a, rs_a, cs_a, p_ap, 32); - } - else - { - pack_nn_ukr_def - (m, k, p_a, rs_a, cs_a, p_ap); - } -} - -void knl_dpackm_24xk(len_type m, len_type k, - const double* p_a, stride_type rs_a, stride_type cs_a, - double* p_ap) -{ - constexpr double one = 1.0; - - if (m == 24) - { - bli_dpackm_24xk_opt(BLIS_NO_CONJUGATE, k, &one, p_a, rs_a, cs_a, p_ap, 24); - } - else - { - pack_nn_ukr_def - (m, k, p_a, rs_a, cs_a, p_ap); - } -} - -void knl_dpackm_8xk(len_type m, len_type k, - const double* p_a, stride_type rs_a, stride_type cs_a, - double* p_ap) -{ - constexpr double one = 1.0; - - if (m == 8) - { - bli_dpackm_8xk_opt(BLIS_NO_CONJUGATE, k, &one, p_a, rs_a, cs_a, p_ap, 8); - } - else - { - pack_nn_ukr_def - (m, k, p_a, rs_a, cs_a, p_ap); - } -} - -void knl_spackm_24xk(len_type m, len_type k, - const float* p_a, stride_type rs_a, stride_type cs_a, - float* p_ap) -{ - constexpr float one = 1.0; - - if (m == 24) - { - bli_spackm_24xk_opt(BLIS_NO_CONJUGATE, k, &one, p_a, rs_a, cs_a, p_ap, 24); - } - else - { - pack_nn_ukr_def - (m, k, p_a, rs_a, cs_a, p_ap); - } -} - -void knl_spackm_16xk(len_type m, len_type k, - const float* p_a, stride_type rs_a, stride_type cs_a, - float* p_ap) -{ - constexpr float one = 1.0; - - if (m == 16) - { - bli_spackm_16xk_opt(BLIS_NO_CONJUGATE, k, &one, p_a, rs_a, cs_a, p_ap, 16); - } - else - { - pack_nn_ukr_def - (m, k, p_a, rs_a, cs_a, p_ap); - } -} - int knl_check() { int family, model, features; int vendor = get_cpu_type(family, model, features); if (vendor != VENDOR_INTEL) - { + { if (get_verbose() >= 1) printf("tblis: knl: Wrong vendor.\n"); return -1; } if (!check_features(features, FEATURE_AVX)) - { + { if (get_verbose() >= 1) printf("tblis: knl: Doesn't support AVX.\n"); return -1; } if (!check_features(features, FEATURE_FMA3)) - { + { if (get_verbose() >= 1) printf("tblis: knl: Doesn't support FMA3.\n"); return -1; } @@ -133,25 +32,20 @@ int knl_check() if (get_verbose() >= 1) printf("tblis: knl: Doesn't support AVX2.\n"); return -1; } - + if (!check_features(features, FEATURE_AVX512F)) { if (get_verbose() >= 1) printf("tblis: knl: Doesn't support AVX512F.\n"); return -1; } - + if (!check_features(features, FEATURE_AVX512PF)) { if (get_verbose() >= 1) printf("tblis: knl: Doesn't support AVX512PF.\n"); return -1; } - + return 4; } -//TBLIS_CONFIG_INSTANTIATE(knl_d30x8_knc); -//TBLIS_CONFIG_INSTANTIATE(knl_d30x8); -TBLIS_CONFIG_INSTANTIATE(knl_d24x8); -//TBLIS_CONFIG_INSTANTIATE(knl_d8x24); - } diff --git a/src/configs/knl/config_ker.cxx b/src/configs/knl/config_ker.cxx new file mode 100644 index 000000000..20e605ac5 --- /dev/null +++ b/src/configs/knl/config_ker.cxx @@ -0,0 +1,113 @@ +#include "util/cpuid.hpp" +#include "config.hpp" + +#include "blis.h" + +template +using bli_packm_t = void(*)(conj_t conja, len_type n, const T* kappa, + const T* a, stride_type rs_a, stride_type cs_a, + T* p, stride_type cs_p); + +template +using bli_packm_func = typename std::remove_pointer>::type; + +extern "C" bli_packm_func bli_dpackm_30xk_opt; +extern "C" bli_packm_func bli_dpackm_24xk_opt; +extern "C" bli_packm_func bli_dpackm_8xk_opt; +extern "C" bli_packm_func bli_spackm_24xk_opt; +extern "C" bli_packm_func bli_spackm_16xk_opt; + +namespace tblis +{ + +void knl_dpackm_30xk(len_type m, len_type k, + const double* p_a, stride_type rs_a, stride_type cs_a, + double* p_ap) +{ + constexpr double one = 1.0; + + if (m == 30) + { + bli_dpackm_30xk_opt(BLIS_NO_CONJUGATE, k, &one, p_a, rs_a, cs_a, p_ap, 32); + } + else + { + pack_nn_ukr_def + (m, k, p_a, rs_a, cs_a, p_ap); + } +} + +void knl_dpackm_24xk(len_type m, len_type k, + const double* p_a, stride_type rs_a, stride_type cs_a, + double* p_ap) +{ + constexpr double one = 1.0; + + if (m == 24) + { + bli_dpackm_24xk_opt(BLIS_NO_CONJUGATE, k, &one, p_a, rs_a, cs_a, p_ap, 24); + } + else + { + pack_nn_ukr_def + (m, k, p_a, rs_a, cs_a, p_ap); + } +} + +void knl_dpackm_8xk(len_type m, len_type k, + const double* p_a, stride_type rs_a, stride_type cs_a, + double* p_ap) +{ + constexpr double one = 1.0; + + if (m == 8) + { + bli_dpackm_8xk_opt(BLIS_NO_CONJUGATE, k, &one, p_a, rs_a, cs_a, p_ap, 8); + } + else + { + pack_nn_ukr_def + (m, k, p_a, rs_a, cs_a, p_ap); + } +} + +void knl_spackm_24xk(len_type m, len_type k, + const float* p_a, stride_type rs_a, stride_type cs_a, + float* p_ap) +{ + constexpr float one = 1.0; + + if (m == 24) + { + bli_spackm_24xk_opt(BLIS_NO_CONJUGATE, k, &one, p_a, rs_a, cs_a, p_ap, 24); + } + else + { + pack_nn_ukr_def + (m, k, p_a, rs_a, cs_a, p_ap); + } +} + +void knl_spackm_16xk(len_type m, len_type k, + const float* p_a, stride_type rs_a, stride_type cs_a, + float* p_ap) +{ + constexpr float one = 1.0; + + if (m == 16) + { + bli_spackm_16xk_opt(BLIS_NO_CONJUGATE, k, &one, p_a, rs_a, cs_a, p_ap, 16); + } + else + { + pack_nn_ukr_def + (m, k, p_a, rs_a, cs_a, p_ap); + } +} + +//TBLIS_CONFIG_INSTANTIATE(knl_d30x8_knc); +//TBLIS_CONFIG_INSTANTIATE(knl_d30x8); +TBLIS_CONFIG_INSTANTIATE(knl_d24x8); +//TBLIS_CONFIG_INSTANTIATE(knl_d8x24); + +} diff --git a/src/configs/piledriver/config.cxx b/src/configs/piledriver/config.cxx index 22cae3e31..4605f3845 100644 --- a/src/configs/piledriver/config.cxx +++ b/src/configs/piledriver/config.cxx @@ -10,19 +10,19 @@ int piledriver_check() int vendor = get_cpu_type(family, model, features); if (vendor != VENDOR_AMD) - { + { if (get_verbose() >= 1) printf("tblis: piledriver: Wrong vendor.\n"); return -1; } if (!check_features(features, FEATURE_AVX)) - { + { if (get_verbose() >= 1) printf("tblis: piledriver: Doesn't support AVX.\n"); return -1; } if (!check_features(features, FEATURE_FMA3)) - { + { if (get_verbose() >= 1) printf("tblis: piledriver: Doesn't support FMA3.\n"); return -1; } @@ -32,9 +32,9 @@ int piledriver_check() if (get_verbose() >= 1) printf("tblis: piledriver: Doesn't support FMA4.\n"); return -1; } - + if (family != 0x15) - { + { if (get_verbose() >= 1) printf("tblis: piledriver: Wrong family (%xh).\n", family); return -1; } @@ -48,6 +48,4 @@ int piledriver_check() return 2; } -TBLIS_CONFIG_INSTANTIATE(piledriver); - } diff --git a/src/configs/piledriver/config_ker.cxx b/src/configs/piledriver/config_ker.cxx new file mode 100644 index 000000000..ffa6e8104 --- /dev/null +++ b/src/configs/piledriver/config_ker.cxx @@ -0,0 +1,9 @@ +#include "config.hpp" +#include "util/cpuid.hpp" + +namespace tblis +{ + +TBLIS_CONFIG_INSTANTIATE(piledriver); + +} diff --git a/src/configs/sandybridge/config.cxx b/src/configs/sandybridge/config.cxx index e377a5e8a..1bd39e05c 100644 --- a/src/configs/sandybridge/config.cxx +++ b/src/configs/sandybridge/config.cxx @@ -10,13 +10,13 @@ int sandybridge_check() int vendor = get_cpu_type(family, model, features); if (vendor != VENDOR_INTEL) - { + { if (get_verbose() >= 1) printf("tblis: sandybridge: Wrong vendor.\n"); return -1; } if (!check_features(features, FEATURE_AVX)) - { + { if (get_verbose() >= 1) printf("tblis: sandybridge: Doesn't support AVX.\n"); return -1; } @@ -24,6 +24,4 @@ int sandybridge_check() return 2; } -TBLIS_CONFIG_INSTANTIATE(sandybridge); - } diff --git a/src/configs/sandybridge/config_ker.cxx b/src/configs/sandybridge/config_ker.cxx new file mode 100644 index 000000000..a57f782f9 --- /dev/null +++ b/src/configs/sandybridge/config_ker.cxx @@ -0,0 +1,9 @@ +#include "util/cpuid.hpp" +#include "config.hpp" + +namespace tblis +{ + +TBLIS_CONFIG_INSTANTIATE(sandybridge); + +} diff --git a/src/configs/skx1/config.cxx b/src/configs/skx1/config.cxx index 35da26fee..fa5946217 100644 --- a/src/configs/skx1/config.cxx +++ b/src/configs/skx1/config.cxx @@ -13,19 +13,19 @@ int skx1_check() int vendor = get_cpu_type(family, model, features); if (vendor != VENDOR_INTEL) - { + { if (get_verbose() >= 1) printf("tblis: skx1: Wrong vendor.\n"); return -1; } if (!check_features(features, FEATURE_AVX)) - { + { if (get_verbose() >= 1) printf("tblis: skx1: Doesn't support AVX.\n"); return -1; } if (!check_features(features, FEATURE_FMA3)) - { + { if (get_verbose() >= 1) printf("tblis: skx1: Doesn't support FMA3.\n"); return -1; } @@ -35,34 +35,34 @@ int skx1_check() if (get_verbose() >= 1) printf("tblis: skx1: Doesn't support AVX2.\n"); return -1; } - + if (!check_features(features, FEATURE_AVX512F)) { if (get_verbose() >= 1) printf("tblis: skx1: Doesn't support AVX512F.\n"); return -1; } - + if (!check_features(features, FEATURE_AVX512DQ)) { if (get_verbose() >= 1) printf("tblis: skx1: Doesn't support AVX512DQ.\n"); return -1; } - + if (!check_features(features, FEATURE_AVX512BW)) { if (get_verbose() >= 1) printf("tblis: skx1: Doesn't support AVX512BW.\n"); return -1; } - + if (!check_features(features, FEATURE_AVX512VL)) { if (get_verbose() >= 1) printf("tblis: skx1: Doesn't support AVX512VL.\n"); return -1; } - - int nvpu = vpu_count(); + + int nvpu = vpu_count(); if (nvpu != 1) - { + { if (get_verbose() >= 1) printf("tblis: skx1: Wrong number of VPUs (%d).\n", nvpu); return -1; } @@ -70,6 +70,4 @@ int skx1_check() return 4; } -TBLIS_CONFIG_INSTANTIATE(skx1); - } diff --git a/src/configs/skx1/config_ker.cxx b/src/configs/skx1/config_ker.cxx new file mode 100644 index 000000000..4412183df --- /dev/null +++ b/src/configs/skx1/config_ker.cxx @@ -0,0 +1,10 @@ +#include "config.hpp" + +#include "util/cpuid.hpp" + +namespace tblis +{ + +TBLIS_CONFIG_INSTANTIATE(skx1); + +} diff --git a/src/configs/skx2/config.cxx b/src/configs/skx2/config.cxx index e26536cf6..3ee8d9bbe 100644 --- a/src/configs/skx2/config.cxx +++ b/src/configs/skx2/config.cxx @@ -13,19 +13,19 @@ int skx2_check() int vendor = get_cpu_type(family, model, features); if (vendor != VENDOR_INTEL) - { + { if (get_verbose() >= 1) printf("tblis: skx2: Wrong vendor.\n"); return -1; } if (!check_features(features, FEATURE_AVX)) - { + { if (get_verbose() >= 1) printf("tblis: skx2: Doesn't support AVX.\n"); return -1; } if (!check_features(features, FEATURE_FMA3)) - { + { if (get_verbose() >= 1) printf("tblis: skx2: Doesn't support FMA3.\n"); return -1; } @@ -35,34 +35,34 @@ int skx2_check() if (get_verbose() >= 1) printf("tblis: skx2: Doesn't support AVX2.\n"); return -1; } - + if (!check_features(features, FEATURE_AVX512F)) { if (get_verbose() >= 1) printf("tblis: skx2: Doesn't support AVX512F.\n"); return -1; } - + if (!check_features(features, FEATURE_AVX512DQ)) { if (get_verbose() >= 1) printf("tblis: skx2: Doesn't support AVX512DQ.\n"); return -1; } - + if (!check_features(features, FEATURE_AVX512BW)) { if (get_verbose() >= 1) printf("tblis: skx2: Doesn't support AVX512BW.\n"); return -1; } - + if (!check_features(features, FEATURE_AVX512VL)) { if (get_verbose() >= 1) printf("tblis: skx2: Doesn't support AVX512VL.\n"); return -1; } - - int nvpu = vpu_count(); + + int nvpu = vpu_count(); if (nvpu != 2) - { + { if (get_verbose() >= 1) printf("tblis: skx2: Wrong number of VPUs (%d).\n", nvpu); return -1; } @@ -70,22 +70,4 @@ int skx2_check() return 4; } -//TBLIS_CONFIG_INSTANTIATE(skx_32x6_l1); -//TBLIS_CONFIG_INSTANTIATE(skx_32x6_l2); -//TBLIS_CONFIG_INSTANTIATE(skx_24x8_l1); -//TBLIS_CONFIG_INSTANTIATE(skx_24x8_l2); -//TBLIS_CONFIG_INSTANTIATE(skx_16x12_l1); -TBLIS_CONFIG_INSTANTIATE(skx_16x12_l2); -//TBLIS_CONFIG_INSTANTIATE(skx_12x16_l1); -//TBLIS_CONFIG_INSTANTIATE(skx_12x16_l2); -//TBLIS_CONFIG_INSTANTIATE(skx_8x24_l1); -//TBLIS_CONFIG_INSTANTIATE(skx_8x24_l2); -//TBLIS_CONFIG_INSTANTIATE(skx_6x32_l1); -//TBLIS_CONFIG_INSTANTIATE(skx_6x32_l2); -//TBLIS_CONFIG_INSTANTIATE(skx_8x8_l1); -//TBLIS_CONFIG_INSTANTIATE(skx_8x8_l2); -//TBLIS_CONFIG_INSTANTIATE(skx_8x8_l1_flip); -//TBLIS_CONFIG_INSTANTIATE(skx_8x8_l2_flip); -//TBLIS_CONFIG_INSTANTIATE(skx_knl); - } diff --git a/src/configs/skx2/config_ker.cxx b/src/configs/skx2/config_ker.cxx new file mode 100644 index 000000000..9485d1577 --- /dev/null +++ b/src/configs/skx2/config_ker.cxx @@ -0,0 +1,26 @@ +#include "config.hpp" + +#include "util/cpuid.hpp" + +namespace tblis +{ + +//TBLIS_CONFIG_INSTANTIATE(skx_32x6_l1); +//TBLIS_CONFIG_INSTANTIATE(skx_32x6_l2); +//TBLIS_CONFIG_INSTANTIATE(skx_24x8_l1); +//TBLIS_CONFIG_INSTANTIATE(skx_24x8_l2); +//TBLIS_CONFIG_INSTANTIATE(skx_16x12_l1); +TBLIS_CONFIG_INSTANTIATE(skx_16x12_l2); +//TBLIS_CONFIG_INSTANTIATE(skx_12x16_l1); +//TBLIS_CONFIG_INSTANTIATE(skx_12x16_l2); +//TBLIS_CONFIG_INSTANTIATE(skx_8x24_l1); +//TBLIS_CONFIG_INSTANTIATE(skx_8x24_l2); +//TBLIS_CONFIG_INSTANTIATE(skx_6x32_l1); +//TBLIS_CONFIG_INSTANTIATE(skx_6x32_l2); +//TBLIS_CONFIG_INSTANTIATE(skx_8x8_l1); +//TBLIS_CONFIG_INSTANTIATE(skx_8x8_l2); +//TBLIS_CONFIG_INSTANTIATE(skx_8x8_l1_flip); +//TBLIS_CONFIG_INSTANTIATE(skx_8x8_l2_flip); +//TBLIS_CONFIG_INSTANTIATE(skx_knl); + +} diff --git a/src/configs/zen/config.cxx b/src/configs/zen/config.cxx index e4ae7a387..b4e9911fb 100644 --- a/src/configs/zen/config.cxx +++ b/src/configs/zen/config.cxx @@ -11,19 +11,19 @@ int zen_check() int vendor = get_cpu_type(family, model, features); if (vendor != VENDOR_AMD) - { + { if (get_verbose() >= 1) printf("tblis: zen: Wrong vendor.\n"); return -1; } if (!check_features(features, FEATURE_AVX)) - { + { if (get_verbose() >= 1) printf("tblis: zen: Doesn't support AVX.\n"); return -1; } if (!check_features(features, FEATURE_FMA3)) - { + { if (get_verbose() >= 1) printf("tblis: zen: Doesn't support FMA3.\n"); return -1; } @@ -33,9 +33,9 @@ int zen_check() if (get_verbose() >= 1) printf("tblis: zen: Doesn't support AVX2.\n"); return -1; } - + if (family != 0x17) - { + { if (get_verbose() >= 1) printf("tblis: zen: Wrong family (%xh).\n", family); return -1; } @@ -43,6 +43,4 @@ int zen_check() return 1; } -TBLIS_CONFIG_INSTANTIATE(zen); - } diff --git a/src/configs/zen/config_ker.cxx b/src/configs/zen/config_ker.cxx new file mode 100644 index 000000000..12c551168 --- /dev/null +++ b/src/configs/zen/config_ker.cxx @@ -0,0 +1,10 @@ +#include "config.hpp" + +#include "util/cpuid.hpp" + +namespace tblis +{ + +TBLIS_CONFIG_INSTANTIATE(zen); + +} From 8833756172fa251f0d19274532ec22d20f5947d2 Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Wed, 27 May 2020 16:36:41 -0500 Subject: [PATCH 06/16] Make config instances local static variables so that initialization happens only when the configuration is selected. --- src/configs/config_builder.hpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/configs/config_builder.hpp b/src/configs/config_builder.hpp index d439b6eaa..933329835 100644 --- a/src/configs/config_builder.hpp +++ b/src/configs/config_builder.hpp @@ -18,22 +18,22 @@ TBLIS_GET_VALUE_OR_DEFAULT_CASE(value,default,TBLIS_IS_EMPTY(value)) #define TBLIS_BEGIN_CONFIG(cfg) \ -extern config cfg##_config_instance; \ struct cfg##_config : config_template \ { \ typedef cfg##_config this_config; \ \ static constexpr const char* name = #cfg; \ \ - static const config& instance() \ - { \ - return cfg##_config_instance; \ - } \ + static const config& instance(); #define TBLIS_END_CONFIG }; #define TBLIS_CONFIG_INSTANTIATE(cfg) \ -config cfg##_config_instance = config(cfg##_config()); +const config& cfg##_config::instance() \ +{ \ + static config _instance(cfg##_config{}); \ + return _instance; \ +} #define TBLIS_CONFIG_REGISTER_BLOCKSIZE(name, S,D,C,Z, SE,DE,CE,ZE, SD,DD,CD,ZD) \ template struct name : register_blocksize Date: Wed, 27 May 2020 16:40:28 -0500 Subject: [PATCH 07/16] Fix duplicate object. --- Makefile.am | 1 - Makefile.in | 18 +----------------- 2 files changed, 1 insertion(+), 18 deletions(-) diff --git a/Makefile.am b/Makefile.am index 8dc5e0af7..9baa1ca36 100644 --- a/Makefile.am +++ b/Makefile.am @@ -376,7 +376,6 @@ lib_libtblis_la_SOURCES += src/configs/skx2/vpu_count.cxx \ src/configs/skx2/config.cxx lib_libskx2_la_SOURCES = src/configs/skx2/bli_sgemm_opt_12x32_l2.c \ src/configs/skx2/bli_dgemm_opt_12x16_l2.c \ - src/configs/skx2/vpu_count.cxx \ src/configs/skx2/config_ker.cxx # src/configs/skx2/bli_dgemm_opt_12x16_l1.c \ # src/configs/skx2/bli_dgemm_opt_8x8_l1.c \ diff --git a/Makefile.in b/Makefile.in index ffd34d85e..25bacb960 100644 --- a/Makefile.in +++ b/Makefile.in @@ -338,10 +338,9 @@ lib_libskx2_la_LIBADD = am__lib_libskx2_la_SOURCES_DIST = \ src/configs/skx2/bli_sgemm_opt_12x32_l2.c \ src/configs/skx2/bli_dgemm_opt_12x16_l2.c \ - src/configs/skx2/vpu_count.cxx src/configs/skx2/config_ker.cxx + src/configs/skx2/config_ker.cxx @ENABLE_SKX2_TRUE@am_lib_libskx2_la_OBJECTS = src/configs/skx2/lib_libskx2_la-bli_sgemm_opt_12x32_l2.lo \ @ENABLE_SKX2_TRUE@ src/configs/skx2/lib_libskx2_la-bli_dgemm_opt_12x16_l2.lo \ -@ENABLE_SKX2_TRUE@ src/configs/skx2/lib_libskx2_la-vpu_count.lo \ @ENABLE_SKX2_TRUE@ src/configs/skx2/lib_libskx2_la-config_ker.lo lib_libskx2_la_OBJECTS = $(am_lib_libskx2_la_OBJECTS) lib_libskx2_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \ @@ -546,7 +545,6 @@ am__depfiles_remade = src/configs/$(DEPDIR)/configs.Plo \ src/configs/skx2/$(DEPDIR)/lib_libskx2_la-bli_dgemm_opt_12x16_l2.Plo \ src/configs/skx2/$(DEPDIR)/lib_libskx2_la-bli_sgemm_opt_12x32_l2.Plo \ src/configs/skx2/$(DEPDIR)/lib_libskx2_la-config_ker.Plo \ - src/configs/skx2/$(DEPDIR)/lib_libskx2_la-vpu_count.Plo \ src/configs/skx2/$(DEPDIR)/vpu_count.Plo \ src/configs/zen/$(DEPDIR)/config.Plo \ src/configs/zen/$(DEPDIR)/lib_libzen_la-config_ker.Plo \ @@ -1149,7 +1147,6 @@ lib_libtblis_la_LIBADD = src/external/tci/lib/libtci.la \ @ENABLE_INTEL_COMPILER_TRUE@@ENABLE_SKX1_TRUE@lib_libskx1_la_CXXFLAGS = -O3 -xCORE-AVX512 @ENABLE_SKX2_TRUE@lib_libskx2_la_SOURCES = src/configs/skx2/bli_sgemm_opt_12x32_l2.c \ @ENABLE_SKX2_TRUE@ src/configs/skx2/bli_dgemm_opt_12x16_l2.c \ -@ENABLE_SKX2_TRUE@ src/configs/skx2/vpu_count.cxx \ @ENABLE_SKX2_TRUE@ src/configs/skx2/config_ker.cxx @ENABLE_INTEL_COMPILER_FALSE@@ENABLE_SKX2_TRUE@@IS_OSX_FALSE@lib_libskx2_la_CFLAGS = -O3 -mavx512f -mavx512dq -mavx512bw -mavx512vl -march=skylake-avx512 -mfpmath=sse @@ -1474,9 +1471,6 @@ src/configs/skx2/lib_libskx2_la-bli_sgemm_opt_12x32_l2.lo: \ src/configs/skx2/lib_libskx2_la-bli_dgemm_opt_12x16_l2.lo: \ src/configs/skx2/$(am__dirstamp) \ src/configs/skx2/$(DEPDIR)/$(am__dirstamp) -src/configs/skx2/lib_libskx2_la-vpu_count.lo: \ - src/configs/skx2/$(am__dirstamp) \ - src/configs/skx2/$(DEPDIR)/$(am__dirstamp) src/configs/skx2/lib_libskx2_la-config_ker.lo: \ src/configs/skx2/$(am__dirstamp) \ src/configs/skx2/$(DEPDIR)/$(am__dirstamp) @@ -1993,7 +1987,6 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@src/configs/skx2/$(DEPDIR)/lib_libskx2_la-bli_dgemm_opt_12x16_l2.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/configs/skx2/$(DEPDIR)/lib_libskx2_la-bli_sgemm_opt_12x32_l2.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/configs/skx2/$(DEPDIR)/lib_libskx2_la-config_ker.Plo@am__quote@ # am--include-marker -@AMDEP_TRUE@@am__include@ @am__quote@src/configs/skx2/$(DEPDIR)/lib_libskx2_la-vpu_count.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/configs/skx2/$(DEPDIR)/vpu_count.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/configs/zen/$(DEPDIR)/config.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@src/configs/zen/$(DEPDIR)/lib_libzen_la-config_ker.Plo@am__quote@ # am--include-marker @@ -2309,13 +2302,6 @@ src/configs/skx1/lib_libskx1_la-config_ker.lo: src/configs/skx1/config_ker.cxx @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_libskx1_la_CXXFLAGS) $(CXXFLAGS) -c -o src/configs/skx1/lib_libskx1_la-config_ker.lo `test -f 'src/configs/skx1/config_ker.cxx' || echo '$(srcdir)/'`src/configs/skx1/config_ker.cxx -src/configs/skx2/lib_libskx2_la-vpu_count.lo: src/configs/skx2/vpu_count.cxx -@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_libskx2_la_CXXFLAGS) $(CXXFLAGS) -MT src/configs/skx2/lib_libskx2_la-vpu_count.lo -MD -MP -MF src/configs/skx2/$(DEPDIR)/lib_libskx2_la-vpu_count.Tpo -c -o src/configs/skx2/lib_libskx2_la-vpu_count.lo `test -f 'src/configs/skx2/vpu_count.cxx' || echo '$(srcdir)/'`src/configs/skx2/vpu_count.cxx -@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/configs/skx2/$(DEPDIR)/lib_libskx2_la-vpu_count.Tpo src/configs/skx2/$(DEPDIR)/lib_libskx2_la-vpu_count.Plo -@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='src/configs/skx2/vpu_count.cxx' object='src/configs/skx2/lib_libskx2_la-vpu_count.lo' libtool=yes @AMDEPBACKSLASH@ -@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_libskx2_la_CXXFLAGS) $(CXXFLAGS) -c -o src/configs/skx2/lib_libskx2_la-vpu_count.lo `test -f 'src/configs/skx2/vpu_count.cxx' || echo '$(srcdir)/'`src/configs/skx2/vpu_count.cxx - src/configs/skx2/lib_libskx2_la-config_ker.lo: src/configs/skx2/config_ker.cxx @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_libskx2_la_CXXFLAGS) $(CXXFLAGS) -MT src/configs/skx2/lib_libskx2_la-config_ker.lo -MD -MP -MF src/configs/skx2/$(DEPDIR)/lib_libskx2_la-config_ker.Tpo -c -o src/configs/skx2/lib_libskx2_la-config_ker.lo `test -f 'src/configs/skx2/config_ker.cxx' || echo '$(srcdir)/'`src/configs/skx2/config_ker.cxx @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) src/configs/skx2/$(DEPDIR)/lib_libskx2_la-config_ker.Tpo src/configs/skx2/$(DEPDIR)/lib_libskx2_la-config_ker.Plo @@ -3032,7 +3018,6 @@ distclean: distclean-recursive -rm -f src/configs/skx2/$(DEPDIR)/lib_libskx2_la-bli_dgemm_opt_12x16_l2.Plo -rm -f src/configs/skx2/$(DEPDIR)/lib_libskx2_la-bli_sgemm_opt_12x32_l2.Plo -rm -f src/configs/skx2/$(DEPDIR)/lib_libskx2_la-config_ker.Plo - -rm -f src/configs/skx2/$(DEPDIR)/lib_libskx2_la-vpu_count.Plo -rm -f src/configs/skx2/$(DEPDIR)/vpu_count.Plo -rm -f src/configs/zen/$(DEPDIR)/config.Plo -rm -f src/configs/zen/$(DEPDIR)/lib_libzen_la-config_ker.Plo @@ -3212,7 +3197,6 @@ maintainer-clean: maintainer-clean-recursive -rm -f src/configs/skx2/$(DEPDIR)/lib_libskx2_la-bli_dgemm_opt_12x16_l2.Plo -rm -f src/configs/skx2/$(DEPDIR)/lib_libskx2_la-bli_sgemm_opt_12x32_l2.Plo -rm -f src/configs/skx2/$(DEPDIR)/lib_libskx2_la-config_ker.Plo - -rm -f src/configs/skx2/$(DEPDIR)/lib_libskx2_la-vpu_count.Plo -rm -f src/configs/skx2/$(DEPDIR)/vpu_count.Plo -rm -f src/configs/zen/$(DEPDIR)/config.Plo -rm -f src/configs/zen/$(DEPDIR)/lib_libzen_la-config_ker.Plo From d6afd7e3e0db9f201467112e24667287a64ad05f Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Wed, 27 May 2020 17:25:45 -0500 Subject: [PATCH 08/16] Force delayed initialization of configs. --- src/configs/config_builder.hpp | 6 +++--- src/configs/configs.hpp | 5 +++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/configs/config_builder.hpp b/src/configs/config_builder.hpp index 933329835..b4679bc79 100644 --- a/src/configs/config_builder.hpp +++ b/src/configs/config_builder.hpp @@ -24,14 +24,14 @@ struct cfg##_config : config_template \ \ static constexpr const char* name = #cfg; \ \ - static const config& instance(); + static const config& instance(int hack = 0); #define TBLIS_END_CONFIG }; #define TBLIS_CONFIG_INSTANTIATE(cfg) \ -const config& cfg##_config::instance() \ +const config& cfg##_config::instance(int hack) \ { \ - static config _instance(cfg##_config{}); \ + static config _instance(cfg##_config{}, hack); \ return _instance; \ } diff --git a/src/configs/configs.hpp b/src/configs/configs.hpp index 7dbb2ed94..450110ffa 100644 --- a/src/configs/configs.hpp +++ b/src/configs/configs.hpp @@ -166,8 +166,9 @@ struct config check_fn_t check; const char* name; + int _hack; - template config(const Traits&) + template config(const Traits&, int hack) : add_ukr(typename Traits::template add_ukr()), dot_ukr(typename Traits::template dot_ukr()), mult_ukr(typename Traits::template mult_ukr()), @@ -222,7 +223,7 @@ struct config mr_max_thread(typename Traits::template mr_max_thread()), nr_max_thread(typename Traits::template nr_max_thread()), - check(Traits::check), name(Traits::name) {} + check(Traits::check), name(Traits::name), _hack(hack) {} operator const tblis_config*() const { From 0bef960688dfff5def1d46531d51cafbd846cb0c Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Wed, 27 May 2020 18:41:22 -0500 Subject: [PATCH 09/16] Better fix for initialization issue. --- src/configs/config_builder.hpp | 6 ++--- src/configs/configs.cxx | 41 +++++++++++++++++++++++++--------- src/configs/configs.hpp | 5 ++--- test/3m/gemm_ukr.cxx | 24 +++++++++++++++----- 4 files changed, 55 insertions(+), 21 deletions(-) diff --git a/src/configs/config_builder.hpp b/src/configs/config_builder.hpp index b4679bc79..933329835 100644 --- a/src/configs/config_builder.hpp +++ b/src/configs/config_builder.hpp @@ -24,14 +24,14 @@ struct cfg##_config : config_template \ \ static constexpr const char* name = #cfg; \ \ - static const config& instance(int hack = 0); + static const config& instance(); #define TBLIS_END_CONFIG }; #define TBLIS_CONFIG_INSTANTIATE(cfg) \ -const config& cfg##_config::instance(int hack) \ +const config& cfg##_config::instance() \ { \ - static config _instance(cfg##_config{}, hack); \ + static config _instance(cfg##_config{}); \ return _instance; \ } diff --git a/src/configs/configs.cxx b/src/configs/configs.cxx index 8f6274284..b8c14799f 100644 --- a/src/configs/configs.cxx +++ b/src/configs/configs.cxx @@ -14,14 +14,29 @@ enum config_t num_configs }; -const config* const configs[num_configs] = +using instance_fn_t = const config& (*)(void); + +const char* names[num_configs] = +{ +#define FOREACH_CONFIG(config) config::name, +#include "configs/foreach_config.h" +}; + +const check_fn_t check[num_configs] = +{ +#define FOREACH_CONFIG(config) config::check, +#include "configs/foreach_config.h" +}; + +const instance_fn_t instance[num_configs] = { -#define FOREACH_CONFIG(config) &config::instance(), +#define FOREACH_CONFIG(config) &config::instance, #include "configs/foreach_config.h" }; struct default_config { + instance_fn_t value_fn = nullptr; const config* value = nullptr; default_config() @@ -30,25 +45,31 @@ struct default_config for (int cfg = 0;cfg < num_configs;cfg++) { - TBLIS_ASSERT(configs[cfg]->check); - int cur_prio = configs[cfg]->check(); + TBLIS_ASSERT(check[cfg]); + int cur_prio = check[cfg](); if (cur_prio > priority) { priority = cur_prio; - value = configs[cfg]; + value_fn = instance[cfg]; } if (get_verbose() >= 1) { printf("tblis: Configuration %s assigned priority %d.\n", - configs[cfg]->name, cur_prio); + names[cfg], cur_prio); } } - if (!value) + if (!value_fn) tblis_abort_with_message(nullptr, "tblis: No usable configuration enabled, aborting!"); + value = &value_fn(); + + if (!value) + tblis_abort_with_message(nullptr, + "tblis: Could not get config instance, aborting!"); + if (get_verbose() >= 1) { printf("tblis: Using configuration %s.\n", value->name); @@ -73,12 +94,12 @@ const config& get_config(const std::string& name) { for (int cfg = 0;cfg < num_configs;cfg++) { - if (configs[cfg]->name == name) + if (names[cfg] == name) { - if (configs[cfg]->check() == -1) + if (check[cfg]() == -1) tblis_abort_with_message(nullptr, "tblis: Configuration %s cannot be used!", name.c_str()); - return *configs[cfg]; + return instance[cfg](); } } diff --git a/src/configs/configs.hpp b/src/configs/configs.hpp index 450110ffa..7dbb2ed94 100644 --- a/src/configs/configs.hpp +++ b/src/configs/configs.hpp @@ -166,9 +166,8 @@ struct config check_fn_t check; const char* name; - int _hack; - template config(const Traits&, int hack) + template config(const Traits&) : add_ukr(typename Traits::template add_ukr()), dot_ukr(typename Traits::template dot_ukr()), mult_ukr(typename Traits::template mult_ukr()), @@ -223,7 +222,7 @@ struct config mr_max_thread(typename Traits::template mr_max_thread()), nr_max_thread(typename Traits::template nr_max_thread()), - check(Traits::check), name(Traits::name), _hack(hack) {} + check(Traits::check), name(Traits::name) {} operator const tblis_config*() const { diff --git a/test/3m/gemm_ukr.cxx b/test/3m/gemm_ukr.cxx index da40b4c48..43100bee7 100644 --- a/test/3m/gemm_ukr.cxx +++ b/test/3m/gemm_ukr.cxx @@ -3,12 +3,26 @@ #include "configs/include_configs.hpp" #include "nodes/gemm_ukr.hpp" -const config* const configs[] = +using instance_fn_t = const config& (*)(void); + +enum config_t +{ +#define FOREACH_CONFIG(config) config##_value, +#include "configs/foreach_config.h" + num_configs +}; + +const check_fn_t checks[] = +{ +#define FOREACH_CONFIG(config) config::check, +#include "configs/foreach_config.h" +}; + +const instance_fn_t instance[] = { -#define FOREACH_CONFIG(config) &config::instance(), +#define FOREACH_CONFIG(config) &config::instance, #include "configs/foreach_config.h" }; -constexpr auto num_configs = sizeof(configs)/sizeof(configs[0]); /* * Assume: @@ -27,9 +41,9 @@ TEMPLATED_TEST_CASE(gemm_ukr, T, all_types) { for (unsigned i = 0;i < num_configs;i++) { - auto& cfg = *configs[i]; + if (checks[i]() == -1) continue; - if (cfg.check() == -1) continue; + auto& cfg = instance[i](); len_type MR = cfg.gemm_mr.def(); len_type NR = cfg.gemm_nr.def(); From 4ebb3014a850ca26403b3e2b21f2854707c9cd47 Mon Sep 17 00:00:00 2001 From: MattDavis Date: Thu, 4 Jun 2020 14:12:12 -0400 Subject: [PATCH 10/16] SYSCTL -> SYSCTLBYNAME --- src/util/thread.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/util/thread.cxx b/src/util/thread.cxx index b11d8107f..51910aedb 100644 --- a/src/util/thread.cxx +++ b/src/util/thread.cxx @@ -66,7 +66,7 @@ struct thread_configuration num_threads = strtol(s.c_str(), NULL, 10); - #elif TBLIS_HAVE_SYSCTL + #elif TBLIS_HAVE_SYSCTLBYNAME size_t len = sizeof(num_threads); sysctlbyname("hw.physicalcpu", &num_threads, &len, NULL, 0); From 3e4c4b82943726c443b6f408c9c9791dcad7a847 Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Mon, 8 Jun 2020 18:30:29 -0500 Subject: [PATCH 11/16] Account for possible rounding error in beta-scaling with complex arithmetic. Fixes #30. --- test/3t/contract.cxx | 8 ++++---- test/3t/mult.cxx | 22 ++++++++++++++++++++-- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/test/3t/contract.cxx b/test/3t/contract.cxx index c9a060782..7ba56d865 100644 --- a/test/3t/contract.cxx +++ b/test/3t/contract.cxx @@ -50,7 +50,7 @@ REPLICATED_TEMPLATED_TEST_CASE(contract, R, T, all_types) TENSOR_INFO(C); auto idx_AB = intersection(idx_A, idx_B); - auto neps = prod(select_from(A.lengths(), idx_A, idx_AB))*prod(C.lengths()); + auto neps = (prod(select_from(A.lengths(), idx_A, idx_AB))+1)*prod(C.lengths()); impl = BLAS_BASED; D.reset(C); @@ -103,7 +103,7 @@ REPLICATED_TEMPLATED_TEST_CASE(dpd_contract, R, T, all_types) unsigned irrep_AC = A.irrep()^irrep_AB; unsigned irrep_BC = B.irrep()^irrep_AB; - neps += size_AB[irrep_AB]* + neps += (size_AB[irrep_AB]+1)* size_AC[irrep_AC]* size_BC[irrep_BC]; } @@ -145,7 +145,7 @@ REPLICATED_TEMPLATED_TEST_CASE(indexed_contract, R, T, all_types) INDEXED_TENSOR_INFO(C); auto idx_AB = intersection(idx_A, idx_B); - auto neps = prod(select_from(A.lengths(), idx_A, idx_AB))*prod(C.lengths()); + auto neps = (prod(select_from(A.lengths(), idx_A, idx_AB))+1)*prod(C.lengths()); dpd_impl = dpd_impl_t::BLOCKED; D.reset(C); @@ -189,7 +189,7 @@ REPLICATED_TEMPLATED_TEST_CASE(indexed_dpd_contract, R, T, all_types) unsigned irrep_AC = A.irrep()^irrep_AB; unsigned irrep_BC = B.irrep()^irrep_AB; - neps += size_AB[irrep_AB]* + neps += (size_AB[irrep_AB]+1)* size_AC[irrep_AC]* size_BC[irrep_BC]; } diff --git a/test/3t/mult.cxx b/test/3t/mult.cxx index 6d4b2d452..1f184ddd0 100644 --- a/test/3t/mult.cxx +++ b/test/3t/mult.cxx @@ -55,7 +55,7 @@ REPLICATED_TEMPLATED_TEST_CASE(mult, R, T, all_types) TENSOR_INFO(C); auto idx_AB = exclusion(intersection(idx_A, idx_B), idx_C); - auto neps = prod(select_from(A.lengths(), idx_A, idx_AB))*prod(C.lengths()); + auto neps = (prod(select_from(A.lengths(), idx_A, idx_AB))+1)*prod(C.lengths()); impl = REFERENCE; D.reset(C); @@ -116,6 +116,15 @@ REPLICATED_TEMPLATED_TEST_CASE(dpd_mult, R, T, all_types) size_AC[irrep_AC]* size_BC[irrep_BC]; } + for (unsigned irrep_AC = 0;irrep_AC < nirrep;irrep_AC++) + for (unsigned irrep_BC = 0;irrep_BC < nirrep;irrep_BC++) + { + unsigned irrep_ABC = irrep_AC^irrep_BC^C.irrep(); + + neps += size_ABC[irrep_ABC]* + size_AC[irrep_AC]* + size_BC[irrep_BC]; + } dpd_impl = dpd_impl_t::BLOCKED; D.reset(C); @@ -148,7 +157,7 @@ REPLICATED_TEMPLATED_TEST_CASE(indexed_mult, R, T, all_types) INDEXED_TENSOR_INFO(C); auto idx_AB = exclusion(intersection(idx_A, idx_B), idx_C); - auto neps = prod(select_from(A.lengths(), idx_A, idx_AB))*prod(C.lengths()); + auto neps = (prod(select_from(A.lengths(), idx_A, idx_AB))+1)*prod(C.lengths()); dpd_impl = dpd_impl_t::BLOCKED; D.reset(C); @@ -202,6 +211,15 @@ REPLICATED_TEMPLATED_TEST_CASE(indexed_dpd_mult, R, T, all_types) size_AC[irrep_AC]* size_BC[irrep_BC]; } + for (unsigned irrep_AC = 0;irrep_AC < nirrep;irrep_AC++) + for (unsigned irrep_BC = 0;irrep_BC < nirrep;irrep_BC++) + { + unsigned irrep_ABC = irrep_AC^irrep_BC^C.irrep(); + + neps += size_ABC[irrep_ABC]* + size_AC[irrep_AC]* + size_BC[irrep_BC]; + } dpd_impl = dpd_impl_t::BLOCKED; D.reset(C); From 8b3ad8bf7934cb0d7da9c5ef20b1ee3f537c4f67 Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Fri, 11 Dec 2020 16:22:37 -0600 Subject: [PATCH 12/16] Fix infinite loop in varray_base::operator<<. --- src/external/marray/include/varray_base.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/external/marray/include/varray_base.hpp b/src/external/marray/include/varray_base.hpp index d8389c1f1..17d4c9c5d 100644 --- a/src/external/marray/include/varray_base.hpp +++ b/src/external/marray/include/varray_base.hpp @@ -935,6 +935,8 @@ class varray_base break; } } + + if (ndim == 1) break; } return os; From b9c401aacdd81482a6524a1f238206d83388ea7d Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Fri, 11 Dec 2020 16:30:06 -0600 Subject: [PATCH 13/16] Add operator<< to marray_slice. --- src/external/marray/include/marray_slice.hpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/external/marray/include/marray_slice.hpp b/src/external/marray/include/marray_slice.hpp index 815063bdc..f7ccdcf5d 100644 --- a/src/external/marray/include/marray_slice.hpp +++ b/src/external/marray/include/marray_slice.hpp @@ -228,6 +228,11 @@ class marray_slice return (*this)[std::forward(arg)](std::forward(args)...); } + friend std::ostream& operator<<(std::ostream& os, const marray_slice& x) + { + return os << x.view(); + } + const_pointer cdata() const { return data_; From 44afa2c7deb39cdaf2d1f07a30ac9a4bace5311a Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Mon, 14 Dec 2020 12:48:55 -0600 Subject: [PATCH 14/16] iterator -> marray_iterator fix. --- src/external/marray/include/marray_base.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/external/marray/include/marray_base.hpp b/src/external/marray/include/marray_base.hpp index e393821c4..f337fe184 100644 --- a/src/external/marray/include/marray_base.hpp +++ b/src/external/marray/include/marray_base.hpp @@ -159,7 +159,7 @@ class marray_iterator marray_iterator operator-(difference_type n) const { - return iterator(*array_, i_-n); + return marray_iterator(*array_, i_-n); } difference_type operator-(const marray_iterator& other) const @@ -678,7 +678,7 @@ class marray_base strides(const detail::array_1d& len_, layout layout = DEFAULT) { //TODO: add alignment option - + MARRAY_ASSERT(len_.size() == NDim); std::array len; @@ -704,7 +704,7 @@ class marray_base static stride_type size(const detail::array_1d& len_) { //TODO: add alignment option - + len_vector len; len_.slurp(len); From f5c060957fca96caae6cc5de20d1cae33431ddff Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Mon, 14 Dec 2020 18:52:46 -0600 Subject: [PATCH 15/16] Update .travis.yml Attempt to upgrade to Xcode 12.2 and Ubuntu 18.04. --- .travis.yml | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/.travis.yml b/.travis.yml index 2405e852b..9f61485b2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,7 +1,7 @@ language: cpp -dist: trusty +dist: bionic sudo: required -osx_image: xcode10.1 +osx_image: xcode12.2 os: - linux @@ -33,7 +33,7 @@ matrix: before_install: - 'if [ "$TRAVIS_OS_NAME" = "osx" ]; then rm /usr/local/include/c++; - brew update && brew install gcc@6 tbb llvm@9; + brew update && brew install gcc@9 tbb llvm@9; rm /usr/local/include/c++; brew link --force --overwrite llvm@9; fi' @@ -42,23 +42,23 @@ addons: apt: sources: - ubuntu-toolchain-r-test - - llvm-toolchain-precise-3.9 + - llvm-toolchain-6 packages: - - gcc-6 - - g++-6 - - clang-3.9 + - gcc-9 + - g++-9 + - clang-6.0 - libiomp-dev - libtbb-dev - - binutils-2.26 + #- binutils-2.26 install: - - 'if [ "$TRAVIS_OS_NAME" = "linux" ]; then - export PATH=/usr/lib/binutils-2.26/bin:${PATH}; - fi' - - if [ "$CC" = "gcc" ] ; then export CC="gcc-6" CXX="g++-6"; fi + #- 'if [ "$TRAVIS_OS_NAME" = "linux" ]; then + # export PATH=/usr/lib/binutils-2.26/bin:${PATH}; + # fi' + - if [ "$CC" = "gcc" ] ; then export CC="gcc-9" CXX="g++-9"; fi - 'if [ "$CC" = "clang" ] ; then if [ "$TRAVIS_OS_NAME" = "linux" ]; then - export CC="clang-3.9 -fopenmp=libiomp5" CXX="clang++-3.9 -fopenmp=libiomp5"; + export CC="clang-6.0 -fopenmp=libiomp5" CXX="clang++-6.0 -fopenmp=libiomp5"; else export CC="clang-9" CXX="clang++"; fi; From 3dd4acbc9b320bde2223e24778162f5af68f38ac Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Mon, 14 Dec 2020 19:14:24 -0600 Subject: [PATCH 16/16] Update .travis.yml Cut back builds as in `develop`. --- .travis.yml | 48 ++++++++++++++---------------------------------- 1 file changed, 14 insertions(+), 34 deletions(-) diff --git a/.travis.yml b/.travis.yml index 9f61485b2..e0876f2da 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,40 +3,20 @@ dist: bionic sudo: required osx_image: xcode12.2 -os: - - linux - - osx - -compiler: - - gcc - - clang - -env: - - TEST_INSTALL=0 RUN_TEST=1 THREADING="none" BUILD_CONFIG="auto" - - TEST_INSTALL=0 RUN_TEST=1 THREADING="none" BUILD_CONFIG="reference" - - TEST_INSTALL=1 RUN_TEST=0 THREADING="none" BUILD_CONFIG="auto" - - TEST_INSTALL=0 RUN_TEST=0 THREADING="openmp" BUILD_CONFIG="auto" - - TEST_INSTALL=0 RUN_TEST=0 THREADING="pthreads" BUILD_CONFIG="auto" - - TEST_INSTALL=0 RUN_TEST=0 THREADING="tbb" BUILD_CONFIG="auto" - -matrix: - exclude: - - env: TEST_INSTALL=0 RUN_TEST=1 THREADING="none" BUILD_CONFIG="auto" - os: osx - - env: TEST_INSTALL=0 RUN_TEST=1 THREADING="none" BUILD_CONFIG="reference" +jobs: + include: + - env: TEST_INSTALL=1 RUN_TEST=1 THREADING="pthread" BUILD_CONFIG="auto" CC=clang os: osx - - env: TEST_INSTALL=1 RUN_TEST=0 THREADING="none" BUILD_CONFIG="auto" - os: osx - - env: TEST_INSTALL=0 RUN_TEST=0 THREADING="tbb" BUILD_CONFIG="auto" - os: osx - -before_install: - - 'if [ "$TRAVIS_OS_NAME" = "osx" ]; then - rm /usr/local/include/c++; - brew update && brew install gcc@9 tbb llvm@9; - rm /usr/local/include/c++; - brew link --force --overwrite llvm@9; - fi' + - env: TEST_INSTALL=1 RUN_TEST=1 THREADING="openmp" BUILD_CONFIG="auto" CC=gcc + os: linux + - env: TEST_INSTALL=1 RUN_TEST=1 THREADING="pthread" BUILD_CONFIG="auto" CC=gcc + os: linux + - env: TEST_INSTALL=1 RUN_TEST=1 THREADING="tbb" BUILD_CONFIG="auto" CC=gcc + os: linux + - env: TEST_INSTALL=1 RUN_TEST=1 THREADING="none" BUILD_CONFIG="auto" CC=gcc + os: linux + - env: TEST_INSTALL=1 RUN_TEST=1 THREADING="none" BUILD_CONFIG="auto" CC=clang + os: linux addons: apt: @@ -60,7 +40,7 @@ install: if [ "$TRAVIS_OS_NAME" = "linux" ]; then export CC="clang-6.0 -fopenmp=libiomp5" CXX="clang++-6.0 -fopenmp=libiomp5"; else - export CC="clang-9" CXX="clang++"; + export CC="clang" CXX="clang++"; fi; fi'