Skip to content

Commit

Permalink
AOCL-BLAS 5.0 Release
Browse files Browse the repository at this point in the history
  • Loading branch information
sireeshasanga committed Oct 10, 2024
2 parents 7c564c7 + f3c166b commit 34d4bba
Show file tree
Hide file tree
Showing 1,264 changed files with 234,393 additions and 50,230 deletions.
2 changes: 2 additions & 0 deletions .appveyor.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
skip_branch_with_pr: true

environment:
matrix:
- LIB_TYPE: shared
Expand Down
6 changes: 4 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,10 @@ GPATH
GRTAGS
GTAGS

# Windows Build
build/*
# cmake builds
build_*/*

# Windows build
bin/*
*.dll
*.lib
Expand Down
7 changes: 7 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,13 @@ matrix:
CC=aarch64-linux-gnu-gcc CXX=aarch64-linux-gnu-g++ \
PACKAGES="gcc-aarch64-linux-gnu g++-aarch64-linux-gnu libc6-dev-arm64-cross qemu-system-arm qemu-user" \
TESTSUITE_WRAPPER="qemu-aarch64 -L /usr/aarch64-linux-gnu/"
# Apple M1 (firestorm) build and fast testsuite (qemu)
- os: linux
compiler: aarch64-linux-gnu-gcc
env: OOT=0 TEST=FAST SDE=0 THR="none" CONF="firestorm" \
CC=aarch64-linux-gnu-gcc CXX=aarch64-linux-gnu-g++ \
PACKAGES="gcc-aarch64-linux-gnu g++-aarch64-linux-gnu libc6-dev-arm64-cross qemu-system-arm qemu-user" \
TESTSUITE_WRAPPER="qemu-aarch64 -L /usr/aarch64-linux-gnu/"
# armsve build and fast testsuite (qemu)
- os: linux
compiler: aarch64-linux-gnu-gcc-10
Expand Down
398 changes: 313 additions & 85 deletions CMakeLists.txt

Large diffs are not rendered by default.

16 changes: 16 additions & 0 deletions CMakePresets.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"version": 6,
"cmakeMinimumRequired": {
"major": 3,
"minor": 25,
"patch": 0
},
"include": [
"build/cmake/presets/linux-make-clang.json",
"build/cmake/presets/linux-make-gcc.json",
"build/cmake/presets/linux-make.json",
"build/cmake/presets/linux-ninja.json",
"build/cmake/presets/win-msvc.json",
"build/cmake/presets/win-ninja.json"
]
}
1 change: 1 addition & 0 deletions CREDITS
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ but many others have contributed code and feedback, including
Nathaniel Smith @njsmith
Shaden Smith @ShadenSmith
Tyler Smith @tlrmchlsmth (The University of Texas at Austin)
Snehith @ArcadioN09
Paul Springer @springer13 (RWTH Aachen University)
Adam J. Stewart @adamjstewart (University of Illinois at Urbana-Champaign)
Vladimir Sukarev
Expand Down
170 changes: 128 additions & 42 deletions LICENSE

Large diffs are not rendered by default.

77 changes: 63 additions & 14 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# libraries.
#
# Copyright (C) 2014, The University of Texas at Austin
# Copyright (C) 2022 - 2023, Advanced Micro Devices, Inc. All rights reserved.
# Copyright (C) 2022 - 2024, Advanced Micro Devices, Inc. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
Expand Down Expand Up @@ -191,6 +191,13 @@ gen-obj-paths-from-src = $(foreach ch, $(1), \
# directories.
MK_CONFIG_OBJS := $(call gen-obj-paths-from-src,$(CONFIG_SRC_SUFS),$(MK_CONFIG_SRC),$(CONFIG_PATH),$(BASE_OBJ_CONFIG_PATH))

MK_KERNELS_LPGEMM_SRC := $(filter ./kernels/zen/lpgemm/%.c, $(MK_KERNELS_SRC))
MK_KERNELS_LPGEMM_SRC += $(filter ./kernels/zen4/lpgemm/%.c, $(MK_KERNELS_SRC))
MK_KERNELS_SRC := $(filter-out $(MK_KERNELS_LPGEMM_SRC),$(MK_KERNELS_SRC))
ifeq ($(filter aocl_gemm, $(ADDON_LIST)), aocl_gemm)
MK_KERNELS_LPGEMM_OBJS := $(call gen-obj-paths-from-src,$(KERNELS_SRC_SUFS),$(MK_KERNELS_LPGEMM_SRC),$(KERNELS_PATH),$(BASE_OBJ_KERNELS_PATH))
endif

# Generate object file paths for architecture-specific kernel source code.
# We target only .c, .s, and .S files. Note that MK_KERNELS_SRC is already
# limited to the kernel source corresponding to the kernel sets in
Expand Down Expand Up @@ -220,10 +227,29 @@ MK_ADDON_KERS_SRC := $(foreach addon, $(ADDON_LIST), \
$(filter $(ADDON_PATH)/$(addon)/$(KERNELS_DIR)/%, \
$(MK_ADDON_SRC)) \
)

# Generate non-kernel list for all addons except aocl_gemm
# We process aocl_gemma addon separately.
MK_ADDON_OTHER_SRC := $(foreach addon, $(ADDON_LIST), \
$(filter-out $(ADDON_PATH)/$(addon)/$(KERNELS_DIR)/%, \
$(MK_ADDON_SRC)) \
$(if $(filter-out aocl_gemm,$(addon)), \
$(filter-out $(ADDON_PATH)/$(addon)/$(KERNELS_DIR)/%, \
$(MK_ADDON_SRC))) \
)

# Pick the .cpp files present in JIT folder only in the following conditions
# 1. when gcc version is older than 11.2
# 2. when aocl_gemm addon is enabled.
ifeq ($(filter aocl_gemm, $(ADDON_LIST)), aocl_gemm)
ifeq ($(GCC_OT_11_2_0),no)
MK_AOCL_GEMM_OTHER_SRC := $(filter-out $(ADDON_PATH)/$(aocl_gemm)/$(KERNELS_DIR)/%, \
$(MK_ADDON_SRC))
MK_ADDON_OTHER_SRC := $(filter %.c,$(MK_AOCL_GEMM_OTHER_SRC))
else
MK_ADDON_OTHER_SRC := $(filter-out $(ADDON_PATH)/$(aocl_gemm)/$(KERNELS_DIR)/%, \
$(MK_ADDON_SRC))
endif
endif

MK_ADDON_KERS_OBJS := $(call gen-obj-paths-from-src,$(ADDON_SRC_SUFS),$(MK_ADDON_KERS_SRC),$(ADDON_PATH),$(BASE_OBJ_ADDON_PATH))
MK_ADDON_OTHER_OBJS := $(call gen-obj-paths-from-src,$(ADDON_SRC_SUFS),$(MK_ADDON_OTHER_SRC),$(ADDON_PATH),$(BASE_OBJ_ADDON_PATH))
MK_ADDON_OBJS := $(MK_ADDON_KERS_OBJS) $(MK_ADDON_OTHER_OBJS)
Expand Down Expand Up @@ -264,6 +290,10 @@ MK_BLIS_OBJS := $(MK_CONFIG_OBJS) \
$(MK_ADDON_OBJS) \
$(MK_SANDBOX_OBJS)

ifeq ($(filter aocl_gemm, $(ADDON_LIST)), aocl_gemm)
MK_BLIS_OBJS += $(MK_KERNELS_LPGEMM_OBJS)
endif

# Optionally filter out the BLAS and CBLAS compatibility layer object files.
# This is not actually necessary, since each affected file is guarded by C
# preprocessor macros, but it but prevents "empty" object files from being
Expand Down Expand Up @@ -606,6 +636,19 @@ else
endif
endef

# first argument: a kernel set (name) being targeted (e.g. haswell).
# second argument: the configuration whose CFLAGS we should use in compilation.
# third argument: the kernel file suffix being considered.
define make-kernels-lpgemm-rule
$(BASE_OBJ_KERNELS_PATH)/$(1)/%.o: $(KERNELS_PATH)/$(1)/%.$(3) $(BLIS_H_FLAT) $(MAKE_DEFS_MK_PATHS)
ifeq ($(ENABLE_VERBOSE),yes)
$(CC) $(call get-kernel-lpgemm-cflags-for,$(2)) -c $$< -o $$@
else
@echo "Compiling $$@" $(call get-kernel-lpgemm-text-for,$(2))
@$(CC) $(call get-kernel-lpgemm-cflags-for,$(2)) -c $$< -o $$@
endif
endef

# first argument: a configuration name from the union of config_list and
# config_name, used to look up the CFLAGS to use during compilation.
# second argument: the C99 addon file suffix being considered.
Expand Down Expand Up @@ -710,6 +753,10 @@ $(foreach conf, $(CONFIG_LIST), $(eval $(call make-refkern-rule,$(conf))))
$(foreach suf, $(KERNELS_SRC_SUFS), \
$(foreach kset, $(KERNEL_LIST), $(eval $(call make-kernels-rule,$(kset),$(call get-config-for-kset,$(kset)),$(suf)))))

ifeq ($(filter aocl_gemm, $(ADDON_LIST)), aocl_gemm)
$(foreach suf, $(KERNELS_SRC_SUFS), \
$(foreach kset, $(KERNEL_LIST), $(eval $(call make-kernels-lpgemm-rule,$(kset)/lpgemm,$(call get-config-for-kset,$(kset)),$(suf)))))
endif
# Instantiate the build rule for C addon files. Use the CFLAGS for the
# configuration family.
$(foreach suf, $(ADDON_C99_SUFS), \
Expand Down Expand Up @@ -850,20 +897,14 @@ else
@$(RANLIB) $@
endif

# first argument: the base name of the BLAS test driver.
define make-blat-rule
$(BASE_EXE_BLASTEST_PATH)/$(1).x: $(BASE_OBJ_BLASTEST_PATH)/$(1).o $(BLASTEST_F2C_LIB) $(LIBBLIS_LINK)
$(BASE_EXE_BLASTEST_PATH)/%.x: $(BASE_OBJ_BLASTEST_PATH)/%.o $(BLASTEST_F2C_LIB) $(LIBBLIS_LINK)
@mkdir -p $(BASE_EXE_BLASTEST_PATH)
ifeq ($(ENABLE_VERBOSE),yes)
$(LINKER) $(BASE_OBJ_BLASTEST_PATH)/$(1).o $(BLASTEST_F2C_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $$@
$(LINKER) $< $(BLASTEST_F2C_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@
else
@echo "Linking $$(@F) against '$(notdir $(BLASTEST_F2C_LIB)) $(LIBBLIS_LINK) $(LDFLAGS)'"
@$(LINKER) $(BASE_OBJ_BLASTEST_PATH)/$(1).o $(BLASTEST_F2C_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $$@
@echo "Linking $@ against '$(notdir $(BLASTEST_F2C_LIB)) $(LIBBLIS_LINK) "$(LDFLAGS)"'"
@$(LINKER) $< $(BLASTEST_F2C_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@
endif
endef

# Instantiate the rule above for each driver file.
$(foreach name, $(BLASTEST_DRV_BASES), $(eval $(call make-blat-rule,$(name))))

# A rule to run ?blat1.x driver files.
define make-run-blat1-rule
Expand Down Expand Up @@ -933,7 +974,7 @@ $(TESTSUITE_BIN): $(MK_TESTSUITE_OBJS) $(LIBBLIS_LINK)
ifeq ($(ENABLE_VERBOSE),yes)
$(LINKER) $(MK_TESTSUITE_OBJS) $(LIBBLIS_LINK) $(LDFLAGS) -o $@
else
@echo "Linking $@ against '$(LIBBLIS_LINK) $(LDFLAGS)'"
@echo "Linking $@ against '$(LIBBLIS_LINK) "$(LDFLAGS)"'"
@$(LINKER) $(MK_TESTSUITE_OBJS) $(LIBBLIS_LINK) $(LDFLAGS) -o $@
endif

Expand Down Expand Up @@ -1081,13 +1122,21 @@ else
$(@)/$(CONFIG_DIR)/$(CONFIG_NAME)/
endif

# BLIS library in pkg-configure blis.pc.in file.
ifeq ($(THREADING_MODEL),off)
AOCLLIB := blis
else
AOCLLIB := blis-mt
endif

$(PC_SHARE_DIR_INST): $(PC_IN_FILE)
$(MKDIR) $(@)
ifeq ($(ENABLE_VERBOSE),no)
@echo "Installing $(PC_OUT_FILE) into $(@)/"
endif
$(shell cat "$(PC_IN_FILE)" \
| sed -e "s#@PACKAGE_VERSION@#$(VERSION)#g" \
| sed -e "s#@AOCLLIB@#$(AOCLLIB)#g" \
| sed -e "s#@prefix@#$(prefix)#g" \
| sed -e "s#@exec_prefix@#$(exec_prefix)#g" \
| sed -e "s#@libdir@#$(libdir)#g" \
Expand Down
59 changes: 44 additions & 15 deletions addon/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,36 @@
##Copyright (C) 2023, Advanced Micro Devices, Inc. All rights reserved. ##
#[=[
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2023 - 2024, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name(s) of the copyright holder(s) nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
]=]

# Writing a function that will be used to generate the required object
# libraries for the required addons.
Expand Down Expand Up @@ -59,17 +91,16 @@ function(generate_addon_targets addon_target)
# in get-addon-c99flags-for
${CADDONINCFLAGS}
)

if(THREADING_MODEL STREQUAL "openmp")
# Equivalent to CTHREADFLAGS in get-noopt-cflags-for
target_link_libraries(${addon_target}_C99_ADDON PRIVATE OpenMP::OpenMP_C)
elseif(THREADING_MODEL STREQUAL "pthreads")
# in get-noopt-cflags-for
target_compile_options(${addon_target}_C99_ADDON PRIVATE ${CTHREADFLAGS})
endif()
if(BUILD_SHARED_LIBS)
# Equivalent to CPICFLAGS in get-noopt-cflags-for
set_target_properties(${addon_target}_C99_ADDON PROPERTIES POSITION_INDEPENDENT_CODE ON)
endif()
# Equivalent to CPICFLAGS in get-noopt-cflags-for
set_target_properties(${addon_target}_C99_ADDON PROPERTIES POSITION_INDEPENDENT_CODE ON)
add_dependencies(${addon_target}_C99_ADDON flat-header)
# Put all those targets under object-libs-targets folder name so that they appear all together in IDE.
set_target_properties(${addon_target}_C99_ADDON PROPERTIES FOLDER object-libs-targets)
Expand Down Expand Up @@ -128,17 +159,17 @@ function(generate_addon_targets addon_target)
# in get-noopt-cflags-for
target_compile_options(${addon_target}_C99_KERNEL_ADDON PRIVATE ${CTHREADFLAGS})
endif()
if(BUILD_SHARED_LIBS)
# Equivalent to CPICFLAGS in get-noopt-cflags-for
set_target_properties(${addon_target}_C99_KERNEL_ADDON PROPERTIES POSITION_INDEPENDENT_CODE ON)
endif()
# Equivalent to CPICFLAGS in get-noopt-cflags-for
set_target_properties(${addon_target}_C99_KERNEL_ADDON PROPERTIES POSITION_INDEPENDENT_CODE ON)
add_dependencies(${addon_target}_C99_KERNEL_ADDON flat-header)
# Put all those targets under object-libs-targets folder name so that they appear all together in IDE.
set_target_properties(${addon_target}_C99_KERNEL_ADDON PROPERTIES FOLDER object-libs-targets)
endif()

# Collect all subdirectory paths that have at least one file with suffix in ADDON_CXX_SUFS list.
get_filepaths_with_suffixes(LOCAL_SOURCE_CXX_FILES "${CMAKE_CURRENT_SOURCE_DIR}/${addon_target}" "${ADDON_CXX_SUFS}")
if(("${CMAKE_C_COMPILER_ID}" STREQUAL "GNU") AND (CMAKE_C_COMPILER_VERSION VERSION_LESS 11.2.0))
# Collect all subdirectory paths that have at least one file with suffix in ADDON_CXX_SUFS list.
get_filepaths_with_suffixes(LOCAL_SOURCE_CXX_FILES "${CMAKE_CURRENT_SOURCE_DIR}/${addon_target}" "${ADDON_CXX_SUFS}")
endif()

# Only generate the object library if there is at least one source file.
list(LENGTH LOCAL_SOURCE_CXX_FILES size)
Expand Down Expand Up @@ -190,10 +221,8 @@ function(generate_addon_targets addon_target)
# in get-noopt-cflags-for
target_compile_options(${addon_target}_CXX_ADDON PRIVATE ${CTHREADFLAGS})
endif()
if(BUILD_SHARED_LIBS)
# Equivalent to CPICFLAGS in get-noopt-cflags-for
set_target_properties(${addon_target}_CXX_ADDON PROPERTIES POSITION_INDEPENDENT_CODE ON)
endif()
# Equivalent to CPICFLAGS in get-noopt-cflags-for
set_target_properties(${addon_target}_CXX_ADDON PROPERTIES POSITION_INDEPENDENT_CODE ON)
add_dependencies(${addon_target}_CXX_ADDON flat-header)
# Put all those targets under object-libs-targets folder name so that they appear all together in IDE.
set_target_properties(${addon_target}_CXX_ADDON PROPERTIES FOLDER object-libs-targets)
Expand Down
Loading

0 comments on commit 34d4bba

Please sign in to comment.