Skip to content

Commit

Permalink
Merge pull request #1335 from zaliu/master
Browse files Browse the repository at this point in the history
ROCm 4.3 merge staging into master
  • Loading branch information
zaliu authored Apr 22, 2021
2 parents 9ff7d7f + 99ddf62 commit 9cbabb0
Show file tree
Hide file tree
Showing 167 changed files with 67,326 additions and 11,831 deletions.
2 changes: 1 addition & 1 deletion .jenkins/extended.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ ci: {
def jobNameList = ["compute-rocm-dkms-no-npi-hipclang":([ubuntu18:['gfx900','gfx906','gfx908']]),
"rocm-docker":([ubuntu18:['gfx908']])]

// jobNameList = auxiliary.appendJobNameList(jobNameList)
jobNameList = auxiliary.appendJobNameList(jobNameList)

propertyList.each
{
Expand Down
11 changes: 10 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,17 @@
# Change Log for Tensile
## [(Unreleased) Tensile 4.28.0 for ROCm 4.3.0]

## [Tensile 4.28.0 for ROCm 4.3.0]
### Added
- TensileRetuneLibrary for updating existing library logic files
- Support GFX1030
- Support NHWC

### Fixed
- TensileCreateLibrary crash with relative output and --merge-files

### Changed
- Change cmake_minimum_required to VERSION 3.13

## [Tensile 4.27.0 for ROCm 4.2.0]
### Added
- Benchmarking and library support for CU efficiency vs. overall speed
Expand Down
37 changes: 18 additions & 19 deletions HostLibraryTests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
# CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
################################################################################

cmake_minimum_required(VERSION 3.5)
cmake_minimum_required(VERSION 3.13)

project(TensileHostLibraryTest)

Expand Down Expand Up @@ -52,7 +52,7 @@ if(TENSILE_STATIC_ONLY)
endif()

if(NOT Tensile_FOUND)
find_package(Tensile 4.27.0 EXACT REQUIRED ${TENSILE_COMPONENTS} PATHS "${CMAKE_CURRENT_SOURCE_DIR}/../Tensile")
find_package(Tensile 4.28.0 EXACT REQUIRED ${TENSILE_COMPONENTS} PATHS "${CMAKE_CURRENT_SOURCE_DIR}/../Tensile")
endif()

if(NOT TENSILE_DISABLE_CTEST)
Expand All @@ -79,22 +79,19 @@ if(TENSILE_USE_HIP)
find_package(HIP REQUIRED CONFIG PATHS $ENV{ROCM_PATH} /opt/rocm)
endif()

if(TENSILE_USE_OPENMP)
#set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_OPENMP")
find_package(OpenMP QUIET)
if (OPENMP_FOUND)
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
else()
if(EXISTS /etc/redhat-release)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp=libgomp")
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")
set(OPENMP_LIBRARY /usr/lib/x86_64-linux-gnu/libomp.so)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OPENMP_LIBRARY}")
endif()
endif()
if(TENSILE_USE_OPENMP AND NOT TARGET custom_openmp_cxx)

# Workaround for https://gitlab.kitware.com/cmake/cmake/-/issues/21787
# ensures we link to HIP's libomp and get an rpath to it.
add_library(custom_openmp_cxx INTERFACE)

if(TENSILE_USE_HIP)
target_compile_options(custom_openmp_cxx INTERFACE "-fopenmp")
target_link_options(custom_openmp_cxx INTERFACE "-fopenmp")
else ()
find_package(OpenMP REQUIRED)
target_link_libraries(custom_openmp_cxx INTERFACE OpenMP::OpenMP_CXX)
endif ()
endif()

add_subdirectory(configs)
Expand All @@ -119,6 +116,7 @@ set(test_sources ${test_sources}
ContractionProblem_test.cpp
ContractionSelectionLibrary_test.cpp
ContractionFitness_test.cpp
MultipleSolutionsPerSize_test.cpp
DataTypes_test.cpp
EmbeddedData_test.cpp
KernelArguments_test.cpp
Expand All @@ -140,6 +138,7 @@ if(TENSILE_USE_LLVM)
set(test_sources ${test_sources}
ContractionLibraryLoading_test.cpp
ContractionFitness_test.cpp
MultipleSolutionsPerSize_test.cpp
llvm/ArithmeticUnitPredicate_test.cpp
llvm/CUEfficiencyPredicate_test.cpp
llvm/DeterministicModePredicate_test.cpp
Expand Down Expand Up @@ -199,5 +198,5 @@ if(TENSILE_USE_HIP)
endif()

if(TENSILE_USE_OPENMP)
target_link_libraries(TensileTests PRIVATE "${OpenMP_EXE_LINKER_FLAGS}")
target_link_libraries(TensileTests PRIVATE custom_openmp_cxx)
endif()
2 changes: 2 additions & 0 deletions HostLibraryTests/CachingLibrary_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,8 @@ TEST(Hashing, Tuple2)
TwoInts tup;

size_t h = std::hash<TwoInts>()(tup);
if(h) // Use the code to quiet the compiler.
return;
}

TEST(CachingLibrary, Simple)
Expand Down
131 changes: 131 additions & 0 deletions HostLibraryTests/MultipleSolutionsPerSize_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
/*******************************************************************************
*
* MIT License
*
* Copyright 2019-2021 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*******************************************************************************/

#include <gtest/gtest.h>

#include <Tensile/AMDGPUPredicates.hpp>
#include <Tensile/CachingLibrary.hpp>
#include <Tensile/ContractionLibrary.hpp>
#include <Tensile/ContractionProblemPredicates.hpp>
#include <Tensile/ContractionProblemProperties.hpp>
#include <Tensile/ContractionSolution.hpp>
#include <Tensile/Distance.hpp>
#include <Tensile/MatchingLibrary.hpp>

#include <memory>
#include <random>

#ifdef _OPENMP
#include <omp.h>
#endif

TEST(MultipleSolutionsPerSize, ArithmeticUnit)
{
using namespace Tensile;

auto SolutionMFMA = std::make_shared<ContractionSolution>();
auto SolutionVALU = std::make_shared<ContractionSolution>();

SolutionMFMA->problemPredicate
= std::make_shared<Predicates::Contraction::ArithmeticUnitCompatible>(ArithmeticUnit::MFMA);
SolutionVALU->problemPredicate
= std::make_shared<Predicates::Contraction::ArithmeticUnitCompatible>(ArithmeticUnit::VALU);

SolutionMFMA->index = 0;
SolutionVALU->index = 1;

SolutionMap<ContractionSolution> map({{0, SolutionMFMA}, {1, SolutionVALU}});

auto LibraryMFMA = std::make_shared<SingleContractionLibrary>(SolutionMFMA);
auto LibraryVALU = std::make_shared<SingleContractionLibrary>(SolutionVALU);

AMDGPU gpu;

auto Problem_Size3 = ContractionProblem::GEMM(false, false, 3, 3, 3, 3, 3, 3, 1.2, false, 1);
auto Problem_Size5 = ContractionProblem::GEMM(false, false, 5, 5, 5, 5, 5, 5, 1.2, false, 1);
auto Problem_Size7 = ContractionProblem::GEMM(false, false, 7, 7, 7, 7, 7, 7, 1.2, false, 1);
auto Problem_Size9 = ContractionProblem::GEMM(false, false, 9, 9, 9, 9, 9, 9, 1.2, false, 1);

using Key = std::array<int64_t, 4>;
using Table
= Matching::DistanceMatchingTable<Key,
ContractionProblem,
std::shared_ptr<SolutionLibrary<ContractionProblem>>,
std::shared_ptr<ContractionSolution>,
Matching::EuclideanDistance<Key>>;
using Properties = std::vector<std::shared_ptr<Property<ContractionProblem>>>;

Properties properties;

{
auto freeSizeA = std::make_shared<Contraction::FreeSizeA>();
freeSizeA->index = 0;
properties.push_back(freeSizeA);
auto freeSizeB = std::make_shared<Contraction::FreeSizeB>();
freeSizeB->index = 0;
properties.push_back(freeSizeB);
auto batchSize = std::make_shared<Contraction::BatchSize>();
batchSize->index = 0;
properties.push_back(batchSize);
auto boundSize = std::make_shared<Contraction::BoundSize>();
boundSize->index = 0;
properties.push_back(boundSize);
}

std::shared_ptr<Table> matchingTable = std::make_shared<Table>(properties);

using Entry
= Matching::MatchingTableEntry<Key, std::shared_ptr<SolutionLibrary<ContractionProblem>>>;

std::vector<Entry> table;

{
Entry map0{{4, 4, 1, 4}, LibraryMFMA, 2.0};
table.push_back(map0);
Entry map1{{4, 4, 1, 4}, LibraryVALU, 1.0};
table.push_back(map1);
Entry map2{{8, 8, 1, 8}, LibraryVALU, 2.0};
table.push_back(map2);
Entry map3{{8, 8, 1, 8}, LibraryMFMA, 1.0};
table.push_back(map3);
}

matchingTable->table = table;

ProblemMatchingLibrary<ContractionProblem> lib;

lib.table = matchingTable;

auto theSolution0 = lib.findBestSolution(Problem_Size3, gpu);
EXPECT_EQ(theSolution0, SolutionMFMA);
auto theSolution1 = lib.findBestSolution(Problem_Size5, gpu);
EXPECT_EQ(theSolution1, SolutionMFMA);

auto theSolution2 = lib.findBestSolution(Problem_Size7, gpu);
EXPECT_EQ(theSolution2, SolutionVALU);
auto theSolution3 = lib.findBestSolution(Problem_Size9, gpu);
EXPECT_EQ(theSolution3, SolutionVALU);
}
2 changes: 2 additions & 0 deletions HostLibraryTests/TestData_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ TEST(TestData, Simple)

EXPECT_TRUE(static_cast<bool>(data));

#if defined(TENSILE_MSGPACK) || defined(TENSILE_LLVM)
auto is_regular_file
= static_cast<bool (*)(boost::filesystem::path const&)>(boost::filesystem::is_regular_file);

Expand All @@ -55,6 +56,7 @@ TEST(TestData, Simple)
std::cout << file << std::endl;
EXPECT_PRED1(is_regular_file, file);
}
#endif

if(TestData::Env("TENSILE_NEVER_SET_THIS_AKDJFLKDSJ"))
FAIL() << "TestData object constructed with unset environment variable "
Expand Down
Binary file modified HostLibraryTests/configs/SolutionLibraries/KernelsLite.dat.gz
Binary file not shown.
Binary file modified HostLibraryTests/configs/SolutionLibraries/KernelsLite.yaml.gz
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified HostLibraryTests/configs/SolutionLibraries/rocBLAS_Full.dat.gz
Binary file not shown.
Binary file modified HostLibraryTests/configs/SolutionLibraries/rocBLAS_Full.yaml.gz
Binary file not shown.
Loading

0 comments on commit 9cbabb0

Please sign in to comment.