Skip to content

Commit

Permalink
Begin to wire asm stubs into detection (extend asm_type); Add more CP…
Browse files Browse the repository at this point in the history
…U detection so asm_type can be matched up
  • Loading branch information
Spudz76 committed Apr 30, 2019
1 parent 6743d35 commit aaa20dd
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 20 deletions.
74 changes: 63 additions & 11 deletions xmrstak/backend/cpu/cpuType.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,10 @@ void cpuid(uint32_t eax, int32_t ecx, int32_t val[4])
#endif
}

int32_t get_masked(int32_t val, int32_t h, int32_t l)
uint32_t get_masked(int32_t val, int32_t h, int32_t l)
{
val &= (0x7FFFFFFF >> (31 - (h - l))) << l;
return val >> l;
return static_cast<uint32_t>(val >> l);
}

bool has_feature(int32_t val, int32_t bit)
Expand All @@ -41,34 +41,86 @@ bool has_feature(int32_t val, int32_t bit)

Model getModel()
{
Model result;

int32_t cpu_HFP = 0; // Highest Function Parameter
int32_t cpu_HEFP = 0; // Highest Extended Function Parameter
int32_t cpu_info[4];
char cpustr[13] = {0};
char brandstr[13] = {0};

cpuid(0, 0, cpu_info);
cpu_HFP = cpu_info[0];
std::memcpy(cpustr, &cpu_info[1], 4);
std::memcpy(cpustr + 4, &cpu_info[3], 4);
std::memcpy(cpustr + 8, &cpu_info[2], 4);

Model result;

cpuid(1, 0, cpu_info);

result.model = get_masked(cpu_info[0], 8, 4);
result.family = get_masked(cpu_info[0], 12, 8);
result.model = get_masked(cpu_info[0], 8, 4) | get_masked(cpu_info[0], 20, 16) << 4;
result.type_name = cpustr;
if(result.family == 0x6 || result.family == 0xF)
{
result.model += get_masked(cpu_info[0], 20, 16) << 4;
}
if(result.family != 0xF)
{
result.family += get_masked(cpu_info[0], 28, 20);
}

// feature bits https://en.wikipedia.org/wiki/CPUID
// sse2
// feature bits https://en.wikipedia.org/wiki/CPUID#EAX=1:_Processor_Info_and_Feature_Bits
// sse2/sse3/ssse3
result.sse2 = has_feature(cpu_info[3], 26);
result.sse3 = has_feature(cpu_info[2], 0);
result.ssse3 = has_feature(cpu_info[2], 9);
// aes-ni
result.aes = has_feature(cpu_info[2], 25);
// avx - 27 is the check if the OS overwrote cpu features
result.avx = has_feature(cpu_info[2], 28) && has_feature(cpu_info[2], 27);

// extended feature bits https://en.wikipedia.org/wiki/CPUID#EAX=7,_ECX=0:_Extended_Features
if(cpu_HFP >= 7)
{
cpuid(7, 0, cpu_info);
result.avx2 = has_feature(cpu_info[1], 5);
}
// extended function support https://en.wikipedia.org/wiki/CPUID#EAX=80000000h:_Get_Highest_Extended_Function_Implemented
cpuid(0x80000000, 0, cpu_info);
cpu_HEFP = cpu_info[0];

// processor brand string https://en.wikipedia.org/wiki/CPUID#EAX=80000002h,80000003h,80000004h:_Processor_Brand_String
if(cpu_HEFP >= 0x80000004)
{
for(uint32_t efp=0x80000002; efp<0x80000004; efp++){
cpuid(0x80000002, 0, cpu_info);
std::memcpy(brandstr+(16*(efp-0x80000002)), &cpu_info, 16);
}
result.brand_name = brandstr;
}

if(strcmp(cpustr, "GenuineIntel") == 0)
{
if(result.family == 0x6){
result.isIntelXBridge = (
result.model == 0x2A //Sandy Bridge
|| result.model == 0x3A //Ivy Bridge
);
result.isIntelXWell = (
result.model == 0x3C || result.model == 0x45 || result.model == 0x46 //Haswell
|| result.model == 0x47 || result.model == 0x3D //Broadwell
);
result.isIntelXLake = (
result.model == 0x4E || result.model == 0x5E //Skylake
|| result.model == 0x8E //Kaby/Coffee/Whiskey/Amber Lake
|| result.model == 0x9E //Kaby/Coffee Lake
|| result.model == 0x66 //Cannon Lake
);
}
}
if(strcmp(cpustr, "AuthenticAMD") == 0)
{
if(result.family == 0xF)
result.family += get_masked(cpu_info[0], 28, 20);
result.isAMDHammer = (result.family != 0x15 && result.family >= 0xF && result.family <= 0x16);
result.isAMDBulldozer = (result.family == 0x15);
result.isAMDZen = (result.family == 0x17);
}

return result;
Expand Down
13 changes: 12 additions & 1 deletion xmrstak/backend/cpu/cpuType.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,20 @@ struct Model
{
uint32_t family = 0u;
uint32_t model = 0u;
bool isIntelXBridge = false;
bool isIntelXWell = false;
bool isIntelXLake = false;
bool isAMDHammer = false;
bool isAMDBulldozer = false;
bool isAMDZen = false;
bool aes = false;
bool sse2 = false;
bool sse3 = false;
bool ssse3 = false;
bool avx = false;
bool avx2 = false;
std::string type_name = "unknown";
std::string brand_name = "unknown";
};

Model getModel();
Expand All @@ -24,7 +34,8 @@ Model getModel();
* This enables us to put in values exactly like in the manual
* For example EBX[30:22] is get_masked(cpu_info[1], 31, 22)
*/
int32_t get_masked(int32_t val, int32_t h, int32_t l);
uint32_t get_masked(int32_t val, int32_t h, int32_t l);


} // namespace cpu
} // namespace xmrstak
20 changes: 14 additions & 6 deletions xmrstak/backend/cpu/crypto/cryptonight_aesni.h
Original file line number Diff line number Diff line change
Expand Up @@ -1198,8 +1198,12 @@ struct Cryptonight_hash<5>
};

extern "C" void cryptonight_v8_mainloop_ivybridge_asm(cryptonight_ctx* ctx0);
extern "C" void cryptonight_v8_mainloop_ryzen_asm(cryptonight_ctx* ctx0);
extern "C" void cryptonight_v8_double_mainloop_sandybridge_asm(cryptonight_ctx* ctx0, cryptonight_ctx* ctx1);
extern "C" void cryptonight_v8_mainloop_bulldozer_asm(cryptonight_ctx* ctx0);
extern "C" void cryptonight_v8_mainloop_ryzen_asm(cryptonight_ctx* ctx0);
extern "C" void cryptonight_v8_rwz_mainloop_asm(cryptonight_ctx* ctx0);
extern "C" void cryptonight_v8_rwz_double_mainloop_asm(cryptonight_ctx* ctx0, cryptonight_ctx* ctx1);


template <size_t N, size_t asm_version>
struct Cryptonight_hash_asm
Expand Down Expand Up @@ -1318,7 +1322,7 @@ void* allocateExecutableMemory(size_t size)
{

#ifdef _WIN64
return VirtualAlloc(0, size, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE);
return VirtualAlloc(nullptr, size, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE);
#else
#if defined(__APPLE__)
return mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON, -1, 0);
Expand Down Expand Up @@ -1374,19 +1378,23 @@ void patchAsmVariants(std::string selected_asm, cryptonight_ctx** ctx, const xmr

cn_mainloop_fun src_code = nullptr;

if(selected_asm == "intel_avx")
if(selected_asm == "intel_avx" || selected_asm == "ivybridge" || selected_asm == "sandybridge")
{
// Intel Ivy Bridge (Xeon v2, Core i7/i5/i3 3xxx, Pentium G2xxx, Celeron G1xxx)
if(N == 2)
src_code = reinterpret_cast<cn_mainloop_fun>(cryptonight_v8_double_mainloop_sandybridge_asm);
else
src_code = cryptonight_v8_mainloop_ivybridge_asm;
;
}
// supports only 1 thread per hash
if(selected_asm == "amd_avx")
if(selected_asm == "bulldozer")
{
// AMD 15h "Bulldozer" - Orochi/Vishera etc; Bulldozer/Piledriver/Steamroller/Excavator
src_code = cryptonight_v8_mainloop_bulldozer_asm;
}
if(selected_asm == "amd_avx" || selected_asm == "zen")
{
// AMD Ryzen (1xxx and 2xxx series)
// AMD 17h "Zen" - Ryzen (1xxx and 2xxx series)
src_code = cryptonight_v8_mainloop_ryzen_asm;
}

Expand Down
4 changes: 2 additions & 2 deletions xmrstak/backend/cpu/minethd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -567,9 +567,9 @@ static std::string getAsmName(const uint32_t num_hashes)

if(cpu_model.avx && cpu_model.aes)
{
if(cpu_model.type_name.find("Intel") != std::string::npos)
if(cpu_model.isIntelXBridge || cpu_model.isIntelXWell || cpu_model.isIntelXLake)
asm_type = "intel_avx";
else if(cpu_model.type_name.find("AMD") != std::string::npos)
else if(cpu_model.isAMDBulldozer || cpu_model.isAMDZen)
asm_type = "amd_avx";
}
}
Expand Down

0 comments on commit aaa20dd

Please sign in to comment.