From 02e41c7295df5edae07b40e3242ab40d11f03496 Mon Sep 17 00:00:00 2001 From: Tony Butler Date: Wed, 27 Mar 2019 15:11:05 -0600 Subject: [PATCH] Begin to wire asm stubs into detection (extend asm_type); Add more CPU detection so asm_type can be matched up --- xmrstak/backend/cpu/cpuType.cpp | 74 ++++++++++++++++--- xmrstak/backend/cpu/cpuType.hpp | 13 +++- .../backend/cpu/crypto/cryptonight_aesni.h | 20 +++-- xmrstak/backend/cpu/minethd.cpp | 4 +- 4 files changed, 91 insertions(+), 20 deletions(-) diff --git a/xmrstak/backend/cpu/cpuType.cpp b/xmrstak/backend/cpu/cpuType.cpp index 5e2519c3b..ed0e7f401 100644 --- a/xmrstak/backend/cpu/cpuType.cpp +++ b/xmrstak/backend/cpu/cpuType.cpp @@ -27,10 +27,10 @@ void cpuid(uint32_t eax, int32_t ecx, int32_t val[4]) #endif } -int32_t get_masked(int32_t val, int32_t h, int32_t l) +uint32_t get_masked(int32_t val, int32_t h, int32_t l) { val &= (0x7FFFFFFF >> (31 - (h - l))) << l; - return val >> l; + return static_cast(val >> l); } bool has_feature(int32_t val, int32_t bit) @@ -41,34 +41,86 @@ bool has_feature(int32_t val, int32_t bit) Model getModel() { + Model result; + + int32_t cpu_HFP = 0; // Highest Function Parameter + int32_t cpu_HEFP = 0; // Highest Extended Function Parameter int32_t cpu_info[4]; char cpustr[13] = {0}; + char brandstr[13] = {0}; cpuid(0, 0, cpu_info); + cpu_HFP = cpu_info[0]; std::memcpy(cpustr, &cpu_info[1], 4); std::memcpy(cpustr + 4, &cpu_info[3], 4); std::memcpy(cpustr + 8, &cpu_info[2], 4); - Model result; - cpuid(1, 0, cpu_info); - + result.model = get_masked(cpu_info[0], 8, 4); result.family = get_masked(cpu_info[0], 12, 8); - result.model = get_masked(cpu_info[0], 8, 4) | get_masked(cpu_info[0], 20, 16) << 4; - result.type_name = cpustr; + if(result.family == 0x6 || result.family == 0xF) + { + result.model += get_masked(cpu_info[0], 20, 16) << 4; + } + if(result.family != 0xF) + { + result.family += get_masked(cpu_info[0], 28, 20); + } - // feature bits https://en.wikipedia.org/wiki/CPUID - // sse2 + // feature bits https://en.wikipedia.org/wiki/CPUID#EAX=1:_Processor_Info_and_Feature_Bits + // sse2/sse3/ssse3 result.sse2 = has_feature(cpu_info[3], 26); + result.sse3 = has_feature(cpu_info[2], 0); + result.ssse3 = has_feature(cpu_info[2], 9); // aes-ni result.aes = has_feature(cpu_info[2], 25); // avx - 27 is the check if the OS overwrote cpu features result.avx = has_feature(cpu_info[2], 28) && has_feature(cpu_info[2], 27); + // extended feature bits https://en.wikipedia.org/wiki/CPUID#EAX=7,_ECX=0:_Extended_Features + if(cpu_HFP >= 7) + { + cpuid(7, 0, cpu_info); + result.avx2 = has_feature(cpu_info[1], 5); + } + // extended function support https://en.wikipedia.org/wiki/CPUID#EAX=80000000h:_Get_Highest_Extended_Function_Implemented + cpuid(0x80000000, 0, cpu_info); + cpu_HEFP = cpu_info[0]; + + // processor brand string https://en.wikipedia.org/wiki/CPUID#EAX=80000002h,80000003h,80000004h:_Processor_Brand_String + if(cpu_HEFP >= 0x80000004) + { + for(uint32_t efp=0x80000002; efp<0x80000004; efp++){ + cpuid(0x80000002, 0, cpu_info); + std::memcpy(brandstr+(16*(efp-0x80000002)), &cpu_info, 16); + } + result.brand_name = brandstr; + } + + if(strcmp(cpustr, "GenuineIntel") == 0) + { + if(result.family == 0x6){ + result.isIntelXBridge = ( + result.model == 0x2A //Sandy Bridge + || result.model == 0x3A //Ivy Bridge + ); + result.isIntelXWell = ( + result.model == 0x3C || result.model == 0x45 || result.model == 0x46 //Haswell + || result.model == 0x47 || result.model == 0x3D //Broadwell + ); + result.isIntelXLake = ( + result.model == 0x4E || result.model == 0x5E //Skylake + || result.model == 0x8E //Kaby/Coffee/Whiskey/Amber Lake + || result.model == 0x9E //Kaby/Coffee Lake + || result.model == 0x66 //Cannon Lake + ); + } + } if(strcmp(cpustr, "AuthenticAMD") == 0) { - if(result.family == 0xF) - result.family += get_masked(cpu_info[0], 28, 20); + result.isAMDHammer = (result.family != 0x15 && result.family >= 0xF && result.family <= 0x16); + result.isAMDBulldozer = (result.family == 0x15); + result.isAMDZen = (result.family == 0x17); } return result; diff --git a/xmrstak/backend/cpu/cpuType.hpp b/xmrstak/backend/cpu/cpuType.hpp index 2bafa4105..77eea86f3 100644 --- a/xmrstak/backend/cpu/cpuType.hpp +++ b/xmrstak/backend/cpu/cpuType.hpp @@ -11,10 +11,20 @@ struct Model { uint32_t family = 0u; uint32_t model = 0u; + bool isIntelXBridge = false; + bool isIntelXWell = false; + bool isIntelXLake = false; + bool isAMDHammer = false; + bool isAMDBulldozer = false; + bool isAMDZen = false; bool aes = false; bool sse2 = false; + bool sse3 = false; + bool ssse3 = false; bool avx = false; + bool avx2 = false; std::string type_name = "unknown"; + std::string brand_name = "unknown"; }; Model getModel(); @@ -24,7 +34,8 @@ Model getModel(); * This enables us to put in values exactly like in the manual * For example EBX[30:22] is get_masked(cpu_info[1], 31, 22) */ -int32_t get_masked(int32_t val, int32_t h, int32_t l); +uint32_t get_masked(int32_t val, int32_t h, int32_t l); + } // namespace cpu } // namespace xmrstak diff --git a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h index bfb65350f..1ad60dd26 100644 --- a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h +++ b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h @@ -1198,8 +1198,12 @@ struct Cryptonight_hash<5> }; extern "C" void cryptonight_v8_mainloop_ivybridge_asm(cryptonight_ctx* ctx0); -extern "C" void cryptonight_v8_mainloop_ryzen_asm(cryptonight_ctx* ctx0); extern "C" void cryptonight_v8_double_mainloop_sandybridge_asm(cryptonight_ctx* ctx0, cryptonight_ctx* ctx1); +extern "C" void cryptonight_v8_mainloop_bulldozer_asm(cryptonight_ctx* ctx0); +extern "C" void cryptonight_v8_mainloop_ryzen_asm(cryptonight_ctx* ctx0); +extern "C" void cryptonight_v8_rwz_mainloop_asm(cryptonight_ctx* ctx0); +extern "C" void cryptonight_v8_rwz_double_mainloop_asm(cryptonight_ctx* ctx0, cryptonight_ctx* ctx1); + template struct Cryptonight_hash_asm @@ -1318,7 +1322,7 @@ void* allocateExecutableMemory(size_t size) { #ifdef _WIN64 - return VirtualAlloc(0, size, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE); + return VirtualAlloc(nullptr, size, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE); #else #if defined(__APPLE__) return mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON, -1, 0); @@ -1374,19 +1378,23 @@ void patchAsmVariants(std::string selected_asm, cryptonight_ctx** ctx, const xmr cn_mainloop_fun src_code = nullptr; - if(selected_asm == "intel_avx") + if(selected_asm == "intel_avx" || selected_asm == "ivybridge" || selected_asm == "sandybridge") { // Intel Ivy Bridge (Xeon v2, Core i7/i5/i3 3xxx, Pentium G2xxx, Celeron G1xxx) if(N == 2) src_code = reinterpret_cast(cryptonight_v8_double_mainloop_sandybridge_asm); else src_code = cryptonight_v8_mainloop_ivybridge_asm; - ; } // supports only 1 thread per hash - if(selected_asm == "amd_avx") + if(selected_asm == "bulldozer") + { + // AMD 15h "Bulldozer" - Orochi/Vishera etc; Bulldozer/Piledriver/Steamroller/Excavator + src_code = cryptonight_v8_mainloop_bulldozer_asm; + } + if(selected_asm == "amd_avx" || selected_asm == "zen") { - // AMD Ryzen (1xxx and 2xxx series) + // AMD 17h "Zen" - Ryzen (1xxx and 2xxx series) src_code = cryptonight_v8_mainloop_ryzen_asm; } diff --git a/xmrstak/backend/cpu/minethd.cpp b/xmrstak/backend/cpu/minethd.cpp index 72570e96a..5abbef991 100644 --- a/xmrstak/backend/cpu/minethd.cpp +++ b/xmrstak/backend/cpu/minethd.cpp @@ -582,9 +582,9 @@ static std::string getAsmName(const uint32_t num_hashes) if(cpu_model.avx && cpu_model.aes) { - if(cpu_model.type_name.find("Intel") != std::string::npos) + if(cpu_model.isIntelXBridge || cpu_model.isIntelXWell || cpu_model.isIntelXLake) asm_type = "intel_avx"; - else if(cpu_model.type_name.find("AMD") != std::string::npos) + else if(cpu_model.isAMDBulldozer || cpu_model.isAMDZen) asm_type = "amd_avx"; } }