Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add NMakefile for Windows self build and fix incompatibilities #44

Merged
merged 16 commits into from
Dec 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 22 additions & 4 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,15 @@ jobs:
strategy:
fail-fast: false # We want results from all OSes even if one fails.
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
os: [ubuntu-latest, macos-latest]
include:
- os: ubuntu-latest
build_command: make build ARCH=x86-64-v3 COMP=gcc OS=linux
- os: windows-latest
build_command: make build ARCH=x86-64-v3 COMP=gcc OS=windows
- os: macos-latest
build_command: make build ARCH=armv8.5-a COMP=gcc OS=osx

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4

- name: build
run: |-
Expand All @@ -36,3 +34,23 @@ jobs:
with:
name: artifact_${{ runner.os }}
path: bin

windows-build:
runs-on: windows-latest
timeout-minutes: 10

steps:
- uses: actions/checkout@v4

- name: build
run: |-
mkdir -p bin
cd src
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
nmake -fnmakefile vc-x64
shell: cmd

- uses: actions/upload-artifact@v4
with:
name: artifact_${{ runner.os }}
path: bin
70 changes: 70 additions & 0 deletions src/NMakefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#
# makefile
#
# Compilation options for Microsoft Visual C++ for Windows & nmake.
#
# x64-v4 x64 with sse2, avx, sse4.2 & popcount & avx2 & avx512 support
# x64-v3 x64 with sse2, avx, sse4.2 & popcount & avx2 support
# x64-v2 with sse2, avx, sse4.2 & popcount support
# x64 x64 with sse2 support
# a64 ARM v8

VC_FLAGS = /std:c17 /DUNICODE /utf-8 /D_CRT_SECURE_NO_DEPRECATE /I"..\include" /O2 /fp:fast /GS- /D NDEBUG /MT

vc-x64-v4:
# remove /vlen=256 for cl earlier than 14.42
cl $(VC_FLAGS) /arch:AVX512 /experimental:c11atomics /GL /vlen=256 /D__POPCNT__ /D__CRC32__ all.c ws2_32.lib /Fe..\bin\wEdax-x64-v4.exe /link /VERSION:4.6

vc-x64-v3:
cl $(VC_FLAGS) /arch:AVX2 /experimental:c11atomics /GL /D__POPCNT__ /D__CRC32__ all.c ws2_32.lib /Fe..\bin\wEdax-x64-v3.exe /link /VERSION:4.6

vc-x64-v2:
cl $(VC_FLAGS) /experimental:c11atomics /GL /D__SSE2__ /D__POPCNT__ /D__CRC32__ all.c ws2_32.lib /Fe..\bin\wEdax-x64-v2.exe /link /VERSION:4.6

vc-x64:
cl $(VC_FLAGS) /experimental:c11atomics /GL /D__SSE2__ all.c ws2_32.lib /Fe..\bin\wEdax-x64.exe /link /VERSION:4.6

vc-a64:
# vcvarsamd64_arm64.bat
cl $(VC_FLAGS) /experimental:c11atomics /GL /D__ARM_NEON all.c ws2_32.lib /Fe..\bin\wEdax-a64.exe /link /VERSION:4.6

clang-x64-v4:
clang-cl -mprefer-vector-width=256 $(VC_FLAGS) /U__STDC_NO_THREADS__ /arch:AVX512 all.c ws2_32.lib /Fe..\bin\wEdax-x64-v4.exe /link /VERSION:4.6

clang-x64-v3:
clang-cl $(VC_FLAGS) /U__STDC_NO_THREADS__ /arch:AVX2 all.c ws2_32.lib /Fe..\bin\wEdax-x64-v3.exe /link /VERSION:4.6

clang-x64-v2:
clang-cl -march=x86-64-v2 $(VC_FLAGS) /U__STDC_NO_THREADS__ all.c ws2_32.lib /Fe..\bin\wEdax-x64-v2.exe /link /VERSION:4.6

clang-x64:
clang-cl $(VC_FLAGS) /U__STDC_NO_THREADS__ all.c ws2_32.lib /Fe..\bin\wEdax-x64.exe /link /VERSION:4.6

clang-a64:
clang-cl --target=aarch64-win32-msvc $(VC_FLAGS) /U__STDC_NO_THREADS__ /D__ARM_NEON all.c ws2_32.lib /Fe..\bin\wEdax-a64.exe /link /VERSION:4.6

vc-pgo-x64-v3:
set VCPROFILE_PATH=..\src
cl $(VC_FLAGS) /arch:AVX2 /experimental:c11atomics /GL /D__POPCNT__ /D__CRC32__ all.c ws2_32.lib /Fe..\bin\wEdax-x64-v3.exe /link /ltcg:pgi /VERSION:4.6
cd ..\bin
wEdax-x64-v3 -l 60 -solve ..\problem\fforum-20-39.obf
wEdax-x64-v3 -l 18 -auto-store on -auto-start on -repeat 2 -auto-quit on -mode 2 -book-file book.pgo
del book.pgo book.pgo.store
cd ..\src
link all.obj ws2_32.lib /out:..\bin\wEdax-x64-v3.exe /ltcg:pgo /VERSION:4.6
del *.pgc ..\bin\*.pgd

vc-pgo-x64:
set VCPROFILE_PATH=..\src
cl $(VC_FLAGS) /experimental:c11atomics /GL /D__POPCNT__ /D__CRC32__ all.c ws2_32.lib /Fe..\bin\wEdax-x64.exe /link /ltcg:pgi /VERSION:4.6
cd ..\bin
wEdax-x64 -l 60 -solve ..\problem\fforum-20-39.obf
wEdax-x64 -l 18 -auto-store on -auto-start on -repeat 2 -auto-quit on -mode 2 -book-file book.pgo
del book.pgo book.pgo.store
cd ..\src
link all.obj ws2_32.lib /out:..\bin\wEdax-x64.exe /ltcg:pgo /VERSION:4.6
del *.pgc ..\bin\*.pgd

clean:
del -f pgopti* *.dyn all.gc* *~ *.p* *.obj

30 changes: 22 additions & 8 deletions src/bit.c
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ int bit_count_64(const uint64_t b)

return stdc_count_ones_ul(b); // C23 version

#elif defined(_MSC_VER)
#elif defined(_MSC_VER) && defined(__POPCNT__)

return __popcnt64(b); // Microsoft Visual C/C++ version

Expand Down Expand Up @@ -120,7 +120,7 @@ int bit_count_32(const uint32_t b)

return stdc_count_ones_ui(b); // C23 version

#elif defined(_MSC_VER)
#elif defined(_MSC_VER) && defined(__POPCNT__)

return __popcnt(b); // Microsoft Visual C/C++ version

Expand Down Expand Up @@ -150,14 +150,21 @@ int bit_leading_zeros_64(uint64_t b)

return stdc_leading_zeros_ul(b); // C23 version

#elif defined(_MSC_VER) && defined(__AVX2__)

return __lzcnt64(b); // Microsoft Visual C/C++ BMI1 version

#elif defined(_MSC_VER)

return __lzcnt64(b); // Microsoft Visual C/C++ version
unsigned long index;
if (_BitScanReverse64(&index, b))
return 63 - (int) index;
return 64;

#elif defined(__GNUC__)

// return b ? __builtin_clzl(b) : 64; // GNUC/CLANG version
return __builtin_clzl(b); // GNUC/CLANG version
// return b ? __builtin_clzll(b) : 64; // GNUC/CLANG version
return __builtin_clzll(b); // GNUC/CLANG version

#else

Expand All @@ -170,7 +177,7 @@ int bit_leading_zeros_64(uint64_t b)
c = b >> 4; if (c != 0) { n = n - 4; b = c; }
c = b >> 2; if (c != 0) { n = n - 2; b = c; }
c = b >> 1; if (c != 0) return n - 2;
return n - x;
return n - b;


#endif
Expand All @@ -188,9 +195,16 @@ int bit_leading_zeros_32(uint32_t b)

return stdc_leading_zeros_ui(b); // C23 version

#elif defined(_MSC_VER) && defined(__AVX2__)

return __lzcnt(b); // Microsoft Visual C/C++ BMI1 version

#elif defined(_MSC_VER)

return __lzcnt(b); // Microsoft Visual C/C++ version
unsigned long index;
if (_BitScanReverse(&index, b))
return 31 - (int) index;
return 32;

#elif defined(__GNUC__)

Expand All @@ -207,7 +221,7 @@ int bit_leading_zeros_32(uint32_t b)
c = b >> 4; if (c != 0) { n = n - 4; b = c; }
c = b >> 2; if (c != 0) { n = n - 2; b = c; }
c = b >> 1; if (c != 0) return n - 2;
return n - x;
return n - b;


#endif
Expand Down
4 changes: 4 additions & 0 deletions src/bit.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@
#include <stdio.h>
#include <stdint.h>

#ifndef __has_builtin // Compatibility with non-clang compilers.
#define __has_builtin(x) 0
#endif

struct Random;

/* declaration */
Expand Down
6 changes: 3 additions & 3 deletions src/crc32c.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,16 @@
#include "settings.h"

#if !USE_CRC32C || (!defined(__CRC32__) && !defined(__ARM_FEATURE_CRC32))
#define SOFT_CRC32C true
#define SOFT_CRC32C 1 /* true */
#else
#define SOFT_CRC32C false
#define SOFT_CRC32C 0 /* false */
#endif

#include <assert.h>
#ifdef __ARM_FEATURE_CRC32
#include <arm_acle.h>
#elif defined(__CRC32__)
#ifdef __MSC_VER
#ifdef _MSC_VER
#include <intrin.h>
#else
#include <x86intrin.h>
Expand Down
6 changes: 3 additions & 3 deletions src/flip_bitscan.c
Original file line number Diff line number Diff line change
Expand Up @@ -165,9 +165,9 @@ static const uint64_t FLIPPED_5_V[18] = {
*/
#if __has_builtin(__builtin_subcll)
static inline uint64_t OutflankToFlipmask(uint64_t outflank) {
uint64_t flipmask, cy;
flipmask = __builtin_subcl(outflank, 1, 0, &cy);
return __builtin_addcl(flipmask, 0, cy, &cy);
unsigned long long flipmask, cy;
flipmask = __builtin_subcll(outflank, 1, 0, &cy);
return __builtin_addcll(flipmask, 0, cy, &cy);
}
#elif (defined(_M_X64) && (_MSC_VER >= 1800)) || (defined(__x86_64__) && defined(__GNUC__) && (__GNUC__ > 7 || (__GNUC__ == 7 && __GNUC_MINOR__ >= 2)))
static inline uint64_t OutflankToFlipmask(uint64_t outflank) {
Expand Down
8 changes: 4 additions & 4 deletions src/flip_carry_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -374,11 +374,11 @@ static const uint64_t FLIPPED_5_U[137] = {
/*
* Set all bits below the sole outflank bit if outfrank != 0
*/
#if __has_builtin(__builtin_subcl)
#if __has_builtin(__builtin_subcll)
static inline uint64_t OutflankToFlipmask(uint64_t outflank) {
uint64_t flipmask, cy;
flipmask = __builtin_subcl(outflank, 1, 0, &cy);
return __builtin_addcl(flipmask, 0, cy, &cy);
unsigned long long flipmask, cy;
flipmask = __builtin_subcll(outflank, 1, 0, &cy);
return __builtin_addcll(flipmask, 0, cy, &cy);
}
#elif (defined(_M_X64) && (_MSC_VER >= 1800)) || (defined(__x86_64__) && defined(__GNUC__) && (__GNUC__ > 7 || (__GNUC__ == 7 && __GNUC_MINOR__ >= 2)))
static inline uint64_t OutflankToFlipmask(uint64_t outflank) {
Expand Down
6 changes: 3 additions & 3 deletions src/flip_neon_bitscan.c
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,9 @@ static const uint64_t FLIPPED_4_H[19] = { // ...cbahg
*/
#if __has_builtin(__builtin_subcll)
static inline uint64_t OutflankToFlipmask(uint64_t outflank) {
uint64_t flipmask, cy;
flipmask = __builtin_subcl(outflank, 1, 0, &cy);
return __builtin_addcl(flipmask, 0, cy, &cy);
unsigned long long flipmask, cy;
flipmask = __builtin_subcll(outflank, 1, 0, &cy);
return __builtin_addcll(flipmask, 0, cy, &cy);
}
#else
#define OutflankToFlipmask(outflank) ((outflank) - (uint32_t) ((outflank) != 0))
Expand Down
6 changes: 5 additions & 1 deletion src/flip_sve_lzcnt.c
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,10 @@ uint64_t flip(const int pos, const uint64_t P, const uint64_t O)

uint64_t board_flip(const Board *board, const int x)
{
return flip(x, P, O);
return flip(x, board->player, board->opponent);
}

uint64x2_t mm_flip(uint64x2_t OP, int pos)
{
return vdupq_n_u64(Flip(pos, vgetq_lane_u64(OP, 0), vgetq_lane_u64(OP, 1)));
}
12 changes: 6 additions & 6 deletions src/game.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,12 @@ struct Random;
typedef struct Game {
Board initial_board;
struct {
uint16_t year;
uint8_t month;
uint8_t day;
uint8_t hour;
uint8_t minute;
uint8_t second;
int16_t year;
int8_t month;
int8_t day;
int8_t hour;
int8_t minute;
int8_t second;
} date;
char name[2][32];
uint8_t move[60];
Expand Down
26 changes: 18 additions & 8 deletions src/hash.c
Original file line number Diff line number Diff line change
Expand Up @@ -257,18 +257,28 @@ static void data_new(HashData *data, const HashStore *store)
}

/**
* @brief Initialize a new hash table item.
* @brief Prefetch the hash entry.
*
* This implementation tries to be robust against concurrency. Data are first
* set up in a local thread-safe structure, before being copied into the
* hashtable entry. Then the hashcode of the entry is xored with the thread
* safe structure ; so that any corrupted entry won't be readable.
* The hash entry may not be in the CPU cache and take long to read, so
* prefetch it as soon as the hash code is available.
*
* @param hashtable Hash table to fetch from.
* @param hashcode Hash code.
*/
void hash_prefetch(HashTable *hashtable, const uint64_t hashcode) {
#if defined(__GNUC__)
Hash *hash = hashtable->hash + (hashcode & hashtable->hash_mask);
__builtin_prefetch(hash);
__builtin_prefetch(hash + HASH_N_WAY - 1);
#if defined(__GNUC__)
__builtin_prefetch(hash);
__builtin_prefetch(hash + HASH_N_WAY - 1);
#elif defined(__SSE2__)
_mm_prefetch((char const *) hash, _MM_HINT_T0);
_mm_prefetch((char const *)(hash + HASH_N_WAY - 1), _MM_HINT_T0);
#elif defined(__ARM_ACLE)
__pld(hash);
__pld(hash + HASH_N_WAY - 1);
#elif defined(_M_ARM64)
__prefetch(hash);
__prefetch(hash + HASH_N_WAY - 1);
#endif
}

Expand Down
4 changes: 2 additions & 2 deletions src/search.c
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ const Selectivity selectivity_table [] = {

/** threshold values to try stability cutoff during NWS search */
// TODO: better values may exist.
const uint8_t NWS_STABILITY_THRESHOLD[] = { // 99 = unused value...
const int8_t NWS_STABILITY_THRESHOLD[] = { // 99 = unused value...
99, 99, 99, 99, 6, 8, 10, 12,
#if USE_SOLID
8, 10, 20, 22, 24, 26, 28, 30,
Expand All @@ -120,7 +120,7 @@ const uint8_t NWS_STABILITY_THRESHOLD[] = { // 99 = unused value...

/** threshold values to try stability cutoff during PVS search */
// TODO: better values may exist.
const uint8_t PVS_STABILITY_THRESHOLD[] = { // 99 = unused value...
const int8_t PVS_STABILITY_THRESHOLD[] = { // 99 = unused value...
99, 99, 99, 99, -2, 0, 2, 4,
6, 8, 12, 14, 16, 18, 20, 22,
24, 26, 28, 30, 32, 34, 36, 38,
Expand Down
4 changes: 2 additions & 2 deletions src/search.h
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,8 @@ struct Node;
extern const uint8_t QUADRANT_ID[];
extern const Selectivity selectivity_table[];
extern const int NO_SELECTIVITY;
extern const uint8_t NWS_STABILITY_THRESHOLD[];
extern const uint8_t PVS_STABILITY_THRESHOLD[];
extern const int8_t NWS_STABILITY_THRESHOLD[];
extern const int8_t PVS_STABILITY_THRESHOLD[];
extern const uint8_t SQUARE_TYPE[];

/* function definition */
Expand Down
2 changes: 1 addition & 1 deletion src/settings.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@
#define MOVE_GENERATOR MOVE_GENERATOR_AVX512CD
#elif defined __AVX2__
#define MOVE_GENERATOR MOVE_GENERATOR_AVX_ACEPCK
#elif defined __SSE__
#elif defined __SSE2__
#define MOVE_GENERATOR MOVE_GENERATOR_CARRY_64
#elif defined __ARM_NEON
#define MOVE_GENERATOR MOVE_GENERATOR_NEON_BITSCAN
Expand Down
Loading
Loading