From 3237177ea53d1bd65224bf78b9eff8c01f84f862 Mon Sep 17 00:00:00 2001 From: Disservin Date: Sat, 21 Oct 2023 11:40:56 +0200 Subject: [PATCH] add clang-format This introduces clang-format to enforce a code style for stockfish. The lack of an automated code formatter has been a long standing issue https://github.com/official-stockfish/Stockfish/issues/3608 This PR includes a Makefile target to format the code accordingly. --- .clang-format | 39 + src/Makefile | 13 + src/benchmark.cpp | 102 +- src/benchmark.h | 4 +- src/bitboard.cpp | 145 +- src/bitboard.h | 260 +- src/evaluate.cpp | 183 +- src/evaluate.h | 30 +- src/main.cpp | 24 +- src/misc.cpp | 848 +++--- src/misc.h | 106 +- src/movegen.cpp | 172 +- src/movegen.h | 53 +- src/movepick.cpp | 505 ++-- src/movepick.h | 143 +- src/nnue/evaluate_nnue.cpp | 381 +-- src/nnue/evaluate_nnue.h | 54 +- src/nnue/features/half_ka_v2_hm.cpp | 93 +- src/nnue/features/half_ka_v2_hm.h | 70 +- src/nnue/layers/affine_transform.h | 423 ++- .../layers/affine_transform_sparse_input.h | 321 ++- src/nnue/layers/clipped_relu.h | 202 +- src/nnue/layers/simd.h | 312 ++- src/nnue/layers/sqr_clipped_relu.h | 101 +- src/nnue/nnue_accumulator.h | 10 +- src/nnue/nnue_architecture.h | 163 +- src/nnue/nnue_common.h | 455 ++-- src/nnue/nnue_feature_transformer.h | 1011 ++++---- src/position.cpp | 1781 ++++++------- src/position.h | 484 ++-- src/search.cpp | 2291 ++++++++--------- src/search.h | 92 +- src/syzygy/tbprobe.cpp | 610 +++-- src/syzygy/tbprobe.h | 30 +- src/thread.cpp | 240 +- src/thread.h | 150 +- src/thread_win32_osx.h | 45 +- src/timeman.cpp | 135 +- src/timeman.h | 31 +- src/tt.cpp | 152 +- src/tt.h | 106 +- src/tune.cpp | 92 +- src/tune.h | 191 +- src/types.h | 504 ++-- src/uci.cpp | 422 +-- src/uci.h | 52 +- src/ucioption.cpp | 188 +- 47 files changed, 6800 insertions(+), 7019 deletions(-) create mode 100644 .clang-format diff --git a/.clang-format b/.clang-format new file mode 100644 index 00000000000..b69f32d9a45 --- /dev/null +++ b/.clang-format @@ -0,0 +1,39 @@ +AccessModifierOffset: -1 +AlignAfterOpenBracket: Align +AlignConsecutiveAssignments: Consecutive +AlignConsecutiveDeclarations: Consecutive +AlignEscapedNewlines: DontAlign +AlignOperands: AlignAfterOperator +AlignTrailingComments: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortCaseLabelsOnASingleLine: true +AllowShortEnumsOnASingleLine: false +AllowShortIfStatementsOnASingleLine: false +AlwaysBreakTemplateDeclarations: Yes +BasedOnStyle: WebKit +BitFieldColonSpacing: After +BinPackParameters: false +BreakBeforeBinaryOperators: NonAssignment +BreakBeforeBraces: Attach +BreakBeforeTernaryOperators: true +BreakConstructorInitializers: AfterColon +BreakStringLiterals: false +ColumnLimit: 100 +ContinuationIndentWidth: 2 +Cpp11BracedListStyle: true +IndentGotoLabels: false +IndentPPDirectives: BeforeHash +IndentWidth: 4 +MaxEmptyLinesToKeep: 2 +NamespaceIndentation: None +PackConstructorInitializers: Never +ReflowComments: false +SortIncludes: false +SortUsingDeclarations: false +SpaceAfterCStyleCast: true +SpaceAfterTemplateKeyword: false +SpaceBeforeCaseColon: true +SpaceBeforeCpp11BracedList: false +SpaceBeforeInheritanceColon: false +SpaceInEmptyBlock: false +SpacesBeforeTrailingComments: 2 diff --git a/src/Makefile b/src/Makefile index 5b43c35fdd7..8978ae77267 100644 --- a/src/Makefile +++ b/src/Makefile @@ -57,6 +57,14 @@ SRCS = benchmark.cpp bitboard.cpp evaluate.cpp main.cpp \ search.cpp thread.cpp timeman.cpp tt.cpp uci.cpp ucioption.cpp tune.cpp syzygy/tbprobe.cpp \ nnue/evaluate_nnue.cpp nnue/features/half_ka_v2_hm.cpp +HEADERS = benchmark.h bitboard.h evaluate.h misc.h movegen.h movepick.h \ + nnue/evaluate_nnue.h nnue/features/half_ka_v2_hm.h nnue/layers/affine_transform.h \ + nnue/layers/affine_transform_sparse_input.h nnue/layers/clipped_relu.h nnue/layers/simd.h \ + nnue/layers/sqr_clipped_relu.h nnue/nnue_accumulator.h nnue/nnue_architecture.h \ + nnue/nnue_common.h nnue/nnue_feature_transformer.h position.h \ + search.h syzygy/tbprobe.h thread.h thread_win32_osx.h timeman.h \ + tt.h tune.h types.h uci.h + OBJS = $(notdir $(SRCS:.cpp=.o)) VPATH = syzygy:nnue:nnue/features @@ -936,6 +944,11 @@ net: netvariables fi; \ fi; \ +# format all $(HEADERS) +format: .depend + clang-format -i $(SRCS) -style=file + clang-format -i $(HEADERS) -style=file + # default target default: help diff --git a/src/benchmark.cpp b/src/benchmark.cpp index d67e37f66ed..cb81f81efe9 100644 --- a/src/benchmark.cpp +++ b/src/benchmark.cpp @@ -48,13 +48,10 @@ const std::vector Defaults = { "6k1/6p1/6Pp/ppp5/3pn2P/1P3K2/1PP2P2/3N4 b - - 0 1", "3b4/5kp1/1p1p1p1p/pP1PpP1P/P1P1P3/3KN3/8/8 w - - 0 1", "2K5/p7/7P/5pR1/8/5k2/r7/8 w - - 0 1 moves g5g6 f3e3 g6g5 e3f3", - "8/6pk/1p6/8/PP3p1p/5P2/4KP1q/3Q4 w - - 0 1", - "7k/3p2pp/4q3/8/4Q3/5Kp1/P6b/8 w - - 0 1", - "8/2p5/8/2kPKp1p/2p4P/2P5/3P4/8 w - - 0 1", - "8/1p3pp1/7p/5P1P/2k3P1/8/2K2P2/8 w - - 0 1", + "8/6pk/1p6/8/PP3p1p/5P2/4KP1q/3Q4 w - - 0 1", "7k/3p2pp/4q3/8/4Q3/5Kp1/P6b/8 w - - 0 1", + "8/2p5/8/2kPKp1p/2p4P/2P5/3P4/8 w - - 0 1", "8/1p3pp1/7p/5P1P/2k3P1/8/2K2P2/8 w - - 0 1", "8/pp2r1k1/2p1p3/3pP2p/1P1P1P1P/P5KR/8/8 w - - 0 1", - "8/3p4/p1bk3p/Pp6/1Kp1PpPp/2P2P1P/2P5/5B2 b - - 0 1", - "5k2/7R/4P2p/5K2/p1r2P1p/8/8/8 b - - 0 1", + "8/3p4/p1bk3p/Pp6/1Kp1PpPp/2P2P1P/2P5/5B2 b - - 0 1", "5k2/7R/4P2p/5K2/p1r2P1p/8/8/8 b - - 0 1", "6k1/6p1/P6p/r1N5/5p2/7P/1b3PP1/4R1K1 w - - 0 1", "1r3k2/4q3/2Pp3b/3Bp3/2Q2p2/1p1P2P1/1P2KP2/3N4 w - - 0 1", "6k1/4pp1p/3p2p1/P1pPb3/R7/1r2P1PP/3B1P2/6K1 w - - 0 1", @@ -66,9 +63,9 @@ const std::vector Defaults = { "4k3/3q1r2/1N2r1b1/3ppN2/2nPP3/1B1R2n1/2R1Q3/3K4 w - - 5 1", // 5-man positions - "8/8/8/8/5kp1/P7/8/1K1N4 w - - 0 1", // Kc2 - mate - "8/8/8/5N2/8/p7/8/2NK3k w - - 0 1", // Na2 - mate - "8/3k4/8/8/8/4B3/4KB2/2B5 w - - 0 1", // draw + "8/8/8/8/5kp1/P7/8/1K1N4 w - - 0 1", // Kc2 - mate + "8/8/8/5N2/8/p7/8/2NK3k w - - 0 1", // Na2 - mate + "8/3k4/8/8/8/4B3/4KB2/2B5 w - - 0 1", // draw // 6-man positions "8/8/1P6/5pr1/8/4R3/7k/2K5 w - - 0 1", // Re5 - mate @@ -76,22 +73,20 @@ const std::vector Defaults = { "8/8/3P3k/8/1p6/8/1P6/1K3n2 b - - 0 1", // Nd2 - draw // 7-man positions - "8/R7/2q5/8/6k1/8/1P5p/K6R w - - 0 124", // Draw + "8/R7/2q5/8/6k1/8/1P5p/K6R w - - 0 124", // Draw // Mate and stalemate positions "6k1/3b3r/1p1p4/p1n2p2/1PPNpP1q/P3Q1p1/1R1RB1P1/5K2 b - - 0 1", - "r2r1n2/pp2bk2/2p1p2p/3q4/3PN1QP/2P3R1/P4PP1/5RK1 w - - 0 1", - "8/8/8/8/8/6k1/6p1/6K1 w - -", + "r2r1n2/pp2bk2/2p1p2p/3q4/3PN1QP/2P3R1/P4PP1/5RK1 w - - 0 1", "8/8/8/8/8/6k1/6p1/6K1 w - -", "7k/7P/6K1/8/3B4/8/8/8 b - -", // Chess 960 "setoption name UCI_Chess960 value true", "bbqnnrkr/pppppppp/8/8/8/8/PPPPPPPP/BBQNNRKR w HFhf - 0 1 moves g2g3 d7d5 d2d4 c8h3 c1g5 e8d6 g5e7 f7f6", "nqbnrkrb/pppppppp/8/8/8/8/PPPPPPPP/NQBNRKRB w KQkq - 0 1", - "setoption name UCI_Chess960 value false" -}; + "setoption name UCI_Chess960 value false"}; -} // namespace +} // namespace namespace Stockfish { @@ -109,56 +104,53 @@ namespace Stockfish { std::vector setup_bench(const Position& current, std::istream& is) { - std::vector fens, list; - std::string go, token; + std::vector fens, list; + std::string go, token; - // Assign default values to missing arguments - std::string ttSize = (is >> token) ? token : "16"; - std::string threads = (is >> token) ? token : "1"; - std::string limit = (is >> token) ? token : "13"; - std::string fenFile = (is >> token) ? token : "default"; - std::string limitType = (is >> token) ? token : "depth"; + // Assign default values to missing arguments + std::string ttSize = (is >> token) ? token : "16"; + std::string threads = (is >> token) ? token : "1"; + std::string limit = (is >> token) ? token : "13"; + std::string fenFile = (is >> token) ? token : "default"; + std::string limitType = (is >> token) ? token : "depth"; - go = limitType == "eval" ? "eval" : "go " + limitType + " " + limit; + go = limitType == "eval" ? "eval" : "go " + limitType + " " + limit; - if (fenFile == "default") - fens = Defaults; + if (fenFile == "default") + fens = Defaults; - else if (fenFile == "current") - fens.push_back(current.fen()); + else if (fenFile == "current") + fens.push_back(current.fen()); - else - { - std::string fen; - std::ifstream file(fenFile); + else { + std::string fen; + std::ifstream file(fenFile); - if (!file.is_open()) - { - std::cerr << "Unable to open file " << fenFile << std::endl; - exit(EXIT_FAILURE); - } + if (!file.is_open()) { + std::cerr << "Unable to open file " << fenFile << std::endl; + exit(EXIT_FAILURE); + } - while (getline(file, fen)) - if (!fen.empty()) - fens.push_back(fen); + while (getline(file, fen)) + if (!fen.empty()) + fens.push_back(fen); - file.close(); - } + file.close(); + } - list.emplace_back("setoption name Threads value " + threads); - list.emplace_back("setoption name Hash value " + ttSize); - list.emplace_back("ucinewgame"); + list.emplace_back("setoption name Threads value " + threads); + list.emplace_back("setoption name Hash value " + ttSize); + list.emplace_back("ucinewgame"); - for (const std::string& fen : fens) - if (fen.find("setoption") != std::string::npos) - list.emplace_back(fen); - else - { - list.emplace_back("position fen " + fen); - list.emplace_back(go); - } + for (const std::string& fen : fens) + if (fen.find("setoption") != std::string::npos) + list.emplace_back(fen); + else { + list.emplace_back("position fen " + fen); + list.emplace_back(go); + } - return list; + return list; } -} // namespace Stockfish +} // namespace Stockfish diff --git a/src/benchmark.h b/src/benchmark.h index 64acf833ac0..e6206d19349 100644 --- a/src/benchmark.h +++ b/src/benchmark.h @@ -29,6 +29,6 @@ class Position; std::vector setup_bench(const Position&, std::istream&); -} // namespace Stockfish +} // namespace Stockfish -#endif // #ifndef BENCHMARK_H_INCLUDED +#endif // #ifndef BENCHMARK_H_INCLUDED diff --git a/src/bitboard.cpp b/src/bitboard.cpp index 89eeee611f8..040fd924877 100644 --- a/src/bitboard.cpp +++ b/src/bitboard.cpp @@ -39,10 +39,10 @@ Magic BishopMagics[SQUARE_NB]; namespace { - Bitboard RookTable[0x19000]; // To store rook attacks - Bitboard BishopTable[0x1480]; // To store bishop attacks +Bitboard RookTable[0x19000]; // To store rook attacks +Bitboard BishopTable[0x1480]; // To store bishop attacks - void init_magics(PieceType pt, Bitboard table[], Magic magics[]); +void init_magics(PieceType pt, Bitboard table[], Magic magics[]); } @@ -60,18 +60,17 @@ inline Bitboard safe_destination(Square s, int step) { std::string Bitboards::pretty(Bitboard b) { - std::string s = "+---+---+---+---+---+---+---+---+\n"; + std::string s = "+---+---+---+---+---+---+---+---+\n"; - for (Rank r = RANK_8; r >= RANK_1; --r) - { - for (File f = FILE_A; f <= FILE_H; ++f) - s += b & make_square(f, r) ? "| X " : "| "; + for (Rank r = RANK_8; r >= RANK_1; --r) { + for (File f = FILE_A; f <= FILE_H; ++f) + s += b & make_square(f, r) ? "| X " : "| "; - s += "| " + std::to_string(1 + r) + "\n+---+---+---+---+---+---+---+---+\n"; - } - s += " a b c d e f g h\n"; + s += "| " + std::to_string(1 + r) + "\n+---+---+---+---+---+---+---+---+\n"; + } + s += " a b c d e f g h\n"; - return s; + return s; } @@ -80,78 +79,74 @@ std::string Bitboards::pretty(Bitboard b) { void Bitboards::init() { - for (unsigned i = 0; i < (1 << 16); ++i) - PopCnt16[i] = uint8_t(std::bitset<16>(i).count()); - - for (Square s1 = SQ_A1; s1 <= SQ_H8; ++s1) - for (Square s2 = SQ_A1; s2 <= SQ_H8; ++s2) - SquareDistance[s1][s2] = std::max(distance(s1, s2), distance(s1, s2)); - - init_magics(ROOK, RookTable, RookMagics); - init_magics(BISHOP, BishopTable, BishopMagics); - - for (Square s1 = SQ_A1; s1 <= SQ_H8; ++s1) - { - PawnAttacks[WHITE][s1] = pawn_attacks_bb(square_bb(s1)); - PawnAttacks[BLACK][s1] = pawn_attacks_bb(square_bb(s1)); - - for (int step : {-9, -8, -7, -1, 1, 7, 8, 9} ) - PseudoAttacks[KING][s1] |= safe_destination(s1, step); - - for (int step : {-17, -15, -10, -6, 6, 10, 15, 17} ) - PseudoAttacks[KNIGHT][s1] |= safe_destination(s1, step); - - PseudoAttacks[QUEEN][s1] = PseudoAttacks[BISHOP][s1] = attacks_bb(s1, 0); - PseudoAttacks[QUEEN][s1] |= PseudoAttacks[ ROOK][s1] = attacks_bb< ROOK>(s1, 0); - - for (PieceType pt : { BISHOP, ROOK }) - for (Square s2 = SQ_A1; s2 <= SQ_H8; ++s2) - { - if (PseudoAttacks[pt][s1] & s2) - { - LineBB[s1][s2] = (attacks_bb(pt, s1, 0) & attacks_bb(pt, s2, 0)) | s1 | s2; - BetweenBB[s1][s2] = (attacks_bb(pt, s1, square_bb(s2)) & attacks_bb(pt, s2, square_bb(s1))); - } - BetweenBB[s1][s2] |= s2; - } - } + for (unsigned i = 0; i < (1 << 16); ++i) + PopCnt16[i] = uint8_t(std::bitset<16>(i).count()); + + for (Square s1 = SQ_A1; s1 <= SQ_H8; ++s1) + for (Square s2 = SQ_A1; s2 <= SQ_H8; ++s2) + SquareDistance[s1][s2] = std::max(distance(s1, s2), distance(s1, s2)); + + init_magics(ROOK, RookTable, RookMagics); + init_magics(BISHOP, BishopTable, BishopMagics); + + for (Square s1 = SQ_A1; s1 <= SQ_H8; ++s1) { + PawnAttacks[WHITE][s1] = pawn_attacks_bb(square_bb(s1)); + PawnAttacks[BLACK][s1] = pawn_attacks_bb(square_bb(s1)); + + for (int step : {-9, -8, -7, -1, 1, 7, 8, 9}) + PseudoAttacks[KING][s1] |= safe_destination(s1, step); + + for (int step : {-17, -15, -10, -6, 6, 10, 15, 17}) + PseudoAttacks[KNIGHT][s1] |= safe_destination(s1, step); + + PseudoAttacks[QUEEN][s1] = PseudoAttacks[BISHOP][s1] = attacks_bb(s1, 0); + PseudoAttacks[QUEEN][s1] |= PseudoAttacks[ROOK][s1] = attacks_bb(s1, 0); + + for (PieceType pt : {BISHOP, ROOK}) + for (Square s2 = SQ_A1; s2 <= SQ_H8; ++s2) { + if (PseudoAttacks[pt][s1] & s2) { + LineBB[s1][s2] = (attacks_bb(pt, s1, 0) & attacks_bb(pt, s2, 0)) | s1 | s2; + BetweenBB[s1][s2] = + (attacks_bb(pt, s1, square_bb(s2)) & attacks_bb(pt, s2, square_bb(s1))); + } + BetweenBB[s1][s2] |= s2; + } + } } namespace { - Bitboard sliding_attack(PieceType pt, Square sq, Bitboard occupied) { +Bitboard sliding_attack(PieceType pt, Square sq, Bitboard occupied) { - Bitboard attacks = 0; - Direction RookDirections[4] = {NORTH, SOUTH, EAST, WEST}; + Bitboard attacks = 0; + Direction RookDirections[4] = {NORTH, SOUTH, EAST, WEST}; Direction BishopDirections[4] = {NORTH_EAST, SOUTH_EAST, SOUTH_WEST, NORTH_WEST}; - for (Direction d : (pt == ROOK ? RookDirections : BishopDirections)) - { + for (Direction d : (pt == ROOK ? RookDirections : BishopDirections)) { Square s = sq; while (safe_destination(s, d) && !(occupied & s)) attacks |= (s += d); } return attacks; - } +} - // init_magics() computes all rook and bishop attacks at startup. Magic - // bitboards are used to look up attacks of sliding pieces. As a reference see - // www.chessprogramming.org/Magic_Bitboards. In particular, here we use the so - // called "fancy" approach. +// init_magics() computes all rook and bishop attacks at startup. Magic +// bitboards are used to look up attacks of sliding pieces. As a reference see +// www.chessprogramming.org/Magic_Bitboards. In particular, here we use the so +// called "fancy" approach. - void init_magics(PieceType pt, Bitboard table[], Magic magics[]) { +void init_magics(PieceType pt, Bitboard table[], Magic magics[]) { // Optimal PRNG seeds to pick the correct magics in the shortest time - int seeds[][RANK_NB] = { { 8977, 44560, 54343, 38998, 5731, 95205, 104912, 17020 }, - { 728, 10316, 55013, 32803, 12281, 15100, 16645, 255 } }; + int seeds[][RANK_NB] = {{8977, 44560, 54343, 38998, 5731, 95205, 104912, 17020}, + {728, 10316, 55013, 32803, 12281, 15100, 16645, 255}}; Bitboard occupancy[4096], reference[4096], edges, b; - int epoch[4096] = {}, cnt = 0, size = 0; + int epoch[4096] = {}, cnt = 0, size = 0; - for (Square s = SQ_A1; s <= SQ_H8; ++s) - { + for (Square s = SQ_A1; s <= SQ_H8; ++s) { // Board edges are not considered in the relevant occupancies edges = ((Rank1BB | Rank8BB) & ~rank_bb(s)) | ((FileABB | FileHBB) & ~file_bb(s)); @@ -161,8 +156,8 @@ namespace { // the number of 1s of the mask. Hence we deduce the size of the shift to // apply to the 64 or 32 bits word to get the index. Magic& m = magics[s]; - m.mask = sliding_attack(pt, s, 0) & ~edges; - m.shift = (Is64Bit ? 64 : 32) - popcount(m.mask); + m.mask = sliding_attack(pt, s, 0) & ~edges; + m.shift = (Is64Bit ? 64 : 32) - popcount(m.mask); // Set the offset for the attacks table of the square. We have individual // table sizes for each square with "Fancy Magic Bitboards". @@ -189,9 +184,8 @@ namespace { // Find a magic for square 's' picking up an (almost) random number // until we find the one that passes the verification test. - for (int i = 0; i < size; ) - { - for (m.magic = 0; popcount((m.magic * m.mask) >> 56) < 6; ) + for (int i = 0; i < size;) { + for (m.magic = 0; popcount((m.magic * m.mask) >> 56) < 6;) m.magic = rng.sparse_rand(); // A good magic must map every possible occupancy to an index that @@ -200,21 +194,18 @@ namespace { // effect of verifying the magic. Keep track of the attempt count // and save it in epoch[], little speed-up trick to avoid resetting // m.attacks[] after every failed attempt. - for (++cnt, i = 0; i < size; ++i) - { + for (++cnt, i = 0; i < size; ++i) { unsigned idx = m.index(occupancy[i]); - if (epoch[idx] < cnt) - { - epoch[idx] = cnt; + if (epoch[idx] < cnt) { + epoch[idx] = cnt; m.attacks[idx] = reference[i]; - } - else if (m.attacks[idx] != reference[i]) + } else if (m.attacks[idx] != reference[i]) break; } } } - } +} } -} // namespace Stockfish +} // namespace Stockfish diff --git a/src/bitboard.h b/src/bitboard.h index 0908c957058..9c974573b08 100644 --- a/src/bitboard.h +++ b/src/bitboard.h @@ -32,10 +32,10 @@ namespace Stockfish { namespace Bitboards { -void init(); +void init(); std::string pretty(Bitboard b); -} // namespace Stockfish::Bitboards +} // namespace Stockfish::Bitboards constexpr Bitboard FileABB = 0x0101010101010101ULL; constexpr Bitboard FileBBB = FileABB << 1; @@ -66,85 +66,80 @@ extern Bitboard PawnAttacks[COLOR_NB][SQUARE_NB]; // Magic holds all magic bitboards relevant data for a single square struct Magic { - Bitboard mask; - Bitboard magic; - Bitboard* attacks; - unsigned shift; + Bitboard mask; + Bitboard magic; + Bitboard* attacks; + unsigned shift; - // Compute the attack's index using the 'magic bitboards' approach - unsigned index(Bitboard occupied) const { + // Compute the attack's index using the 'magic bitboards' approach + unsigned index(Bitboard occupied) const { - if (HasPext) - return unsigned(pext(occupied, mask)); + if (HasPext) + return unsigned(pext(occupied, mask)); - if (Is64Bit) - return unsigned(((occupied & mask) * magic) >> shift); + if (Is64Bit) + return unsigned(((occupied & mask) * magic) >> shift); - unsigned lo = unsigned(occupied) & unsigned(mask); - unsigned hi = unsigned(occupied >> 32) & unsigned(mask >> 32); - return (lo * unsigned(magic) ^ hi * unsigned(magic >> 32)) >> shift; - } + unsigned lo = unsigned(occupied) & unsigned(mask); + unsigned hi = unsigned(occupied >> 32) & unsigned(mask >> 32); + return (lo * unsigned(magic) ^ hi * unsigned(magic >> 32)) >> shift; + } }; extern Magic RookMagics[SQUARE_NB]; extern Magic BishopMagics[SQUARE_NB]; inline Bitboard square_bb(Square s) { - assert(is_ok(s)); - return (1ULL << s); + assert(is_ok(s)); + return (1ULL << s); } // Overloads of bitwise operators between a Bitboard and a Square for testing // whether a given bit is set in a bitboard, and for setting and clearing bits. -inline Bitboard operator&( Bitboard b, Square s) { return b & square_bb(s); } -inline Bitboard operator|( Bitboard b, Square s) { return b | square_bb(s); } -inline Bitboard operator^( Bitboard b, Square s) { return b ^ square_bb(s); } +inline Bitboard operator&(Bitboard b, Square s) { return b & square_bb(s); } +inline Bitboard operator|(Bitboard b, Square s) { return b | square_bb(s); } +inline Bitboard operator^(Bitboard b, Square s) { return b ^ square_bb(s); } inline Bitboard& operator|=(Bitboard& b, Square s) { return b |= square_bb(s); } inline Bitboard& operator^=(Bitboard& b, Square s) { return b ^= square_bb(s); } -inline Bitboard operator&(Square s, Bitboard b) { return b & s; } -inline Bitboard operator|(Square s, Bitboard b) { return b | s; } -inline Bitboard operator^(Square s, Bitboard b) { return b ^ s; } +inline Bitboard operator&(Square s, Bitboard b) { return b & s; } +inline Bitboard operator|(Square s, Bitboard b) { return b | s; } +inline Bitboard operator^(Square s, Bitboard b) { return b ^ s; } -inline Bitboard operator|(Square s1, Square s2) { return square_bb(s1) | s2; } +inline Bitboard operator|(Square s1, Square s2) { return square_bb(s1) | s2; } -constexpr bool more_than_one(Bitboard b) { - return b & (b - 1); -} +constexpr bool more_than_one(Bitboard b) { return b & (b - 1); } // rank_bb() and file_bb() return a bitboard representing all the squares on // the given file or rank. -constexpr Bitboard rank_bb(Rank r) { - return Rank1BB << (8 * r); -} +constexpr Bitboard rank_bb(Rank r) { return Rank1BB << (8 * r); } -constexpr Bitboard rank_bb(Square s) { - return rank_bb(rank_of(s)); -} +constexpr Bitboard rank_bb(Square s) { return rank_bb(rank_of(s)); } -constexpr Bitboard file_bb(File f) { - return FileABB << f; -} +constexpr Bitboard file_bb(File f) { return FileABB << f; } -constexpr Bitboard file_bb(Square s) { - return file_bb(file_of(s)); -} +constexpr Bitboard file_bb(Square s) { return file_bb(file_of(s)); } // shift() moves a bitboard one or two steps as specified by the direction D template constexpr Bitboard shift(Bitboard b) { - return D == NORTH ? b << 8 : D == SOUTH ? b >> 8 - : D == NORTH+NORTH? b <<16 : D == SOUTH+SOUTH? b >>16 - : D == EAST ? (b & ~FileHBB) << 1 : D == WEST ? (b & ~FileABB) >> 1 - : D == NORTH_EAST ? (b & ~FileHBB) << 9 : D == NORTH_WEST ? (b & ~FileABB) << 7 - : D == SOUTH_EAST ? (b & ~FileHBB) >> 7 : D == SOUTH_WEST ? (b & ~FileABB) >> 9 - : 0; + return D == NORTH ? b << 8 + : D == SOUTH ? b >> 8 + : D == NORTH + NORTH ? b << 16 + : D == SOUTH + SOUTH ? b >> 16 + : D == EAST ? (b & ~FileHBB) << 1 + : D == WEST ? (b & ~FileABB) >> 1 + : D == NORTH_EAST ? (b & ~FileHBB) << 9 + : D == NORTH_WEST ? (b & ~FileABB) << 7 + : D == SOUTH_EAST ? (b & ~FileHBB) >> 7 + : D == SOUTH_WEST ? (b & ~FileABB) >> 9 + : 0; } @@ -153,14 +148,14 @@ constexpr Bitboard shift(Bitboard b) { template constexpr Bitboard pawn_attacks_bb(Bitboard b) { - return C == WHITE ? shift(b) | shift(b) - : shift(b) | shift(b); + return C == WHITE ? shift(b) | shift(b) + : shift(b) | shift(b); } inline Bitboard pawn_attacks_bb(Color c, Square s) { - assert(is_ok(s)); - return PawnAttacks[c][s]; + assert(is_ok(s)); + return PawnAttacks[c][s]; } // line_bb() returns a bitboard representing an entire line (from board edge @@ -170,9 +165,9 @@ inline Bitboard pawn_attacks_bb(Color c, Square s) { inline Bitboard line_bb(Square s1, Square s2) { - assert(is_ok(s1) && is_ok(s2)); + assert(is_ok(s1) && is_ok(s2)); - return LineBB[s1][s2]; + return LineBB[s1][s2]; } @@ -186,26 +181,34 @@ inline Bitboard line_bb(Square s1, Square s2) { inline Bitboard between_bb(Square s1, Square s2) { - assert(is_ok(s1) && is_ok(s2)); + assert(is_ok(s1) && is_ok(s2)); - return BetweenBB[s1][s2]; + return BetweenBB[s1][s2]; } // aligned() returns true if the squares s1, s2 and s3 are aligned either on a // straight or on a diagonal line. -inline bool aligned(Square s1, Square s2, Square s3) { - return line_bb(s1, s2) & s3; -} +inline bool aligned(Square s1, Square s2, Square s3) { return line_bb(s1, s2) & s3; } // distance() functions return the distance between x and y, defined as the // number of steps for a king in x to reach y. -template inline int distance(Square x, Square y); -template<> inline int distance(Square x, Square y) { return std::abs(file_of(x) - file_of(y)); } -template<> inline int distance(Square x, Square y) { return std::abs(rank_of(x) - rank_of(y)); } -template<> inline int distance(Square x, Square y) { return SquareDistance[x][y]; } +template +inline int distance(Square x, Square y); +template<> +inline int distance(Square x, Square y) { + return std::abs(file_of(x) - file_of(y)); +} +template<> +inline int distance(Square x, Square y) { + return std::abs(rank_of(x) - rank_of(y)); +} +template<> +inline int distance(Square x, Square y) { + return SquareDistance[x][y]; +} inline int edge_distance(File f) { return std::min(f, File(FILE_H - f)); } @@ -215,9 +218,9 @@ inline int edge_distance(File f) { return std::min(f, File(FILE_H - f)); } template inline Bitboard attacks_bb(Square s) { - assert((Pt != PAWN) && (is_ok(s))); + assert((Pt != PAWN) && (is_ok(s))); - return PseudoAttacks[Pt][s]; + return PseudoAttacks[Pt][s]; } @@ -228,28 +231,26 @@ inline Bitboard attacks_bb(Square s) { template inline Bitboard attacks_bb(Square s, Bitboard occupied) { - assert((Pt != PAWN) && (is_ok(s))); + assert((Pt != PAWN) && (is_ok(s))); - switch (Pt) - { - case BISHOP: return BishopMagics[s].attacks[BishopMagics[s].index(occupied)]; - case ROOK : return RookMagics[s].attacks[ RookMagics[s].index(occupied)]; - case QUEEN : return attacks_bb(s, occupied) | attacks_bb(s, occupied); - default : return PseudoAttacks[Pt][s]; - } + switch (Pt) { + case BISHOP : return BishopMagics[s].attacks[BishopMagics[s].index(occupied)]; + case ROOK : return RookMagics[s].attacks[RookMagics[s].index(occupied)]; + case QUEEN : return attacks_bb(s, occupied) | attacks_bb(s, occupied); + default : return PseudoAttacks[Pt][s]; + } } inline Bitboard attacks_bb(PieceType pt, Square s, Bitboard occupied) { - assert((pt != PAWN) && (is_ok(s))); + assert((pt != PAWN) && (is_ok(s))); - switch (pt) - { - case BISHOP: return attacks_bb(s, occupied); - case ROOK : return attacks_bb< ROOK>(s, occupied); - case QUEEN : return attacks_bb(s, occupied) | attacks_bb(s, occupied); - default : return PseudoAttacks[pt][s]; - } + switch (pt) { + case BISHOP : return attacks_bb(s, occupied); + case ROOK : return attacks_bb(s, occupied); + case QUEEN : return attacks_bb(s, occupied) | attacks_bb(s, occupied); + default : return PseudoAttacks[pt][s]; + } } @@ -259,16 +260,19 @@ inline int popcount(Bitboard b) { #ifndef USE_POPCNT - union { Bitboard bb; uint16_t u[4]; } v = { b }; - return PopCnt16[v.u[0]] + PopCnt16[v.u[1]] + PopCnt16[v.u[2]] + PopCnt16[v.u[3]]; + union { + Bitboard bb; + uint16_t u[4]; + } v = {b}; + return PopCnt16[v.u[0]] + PopCnt16[v.u[1]] + PopCnt16[v.u[2]] + PopCnt16[v.u[3]]; #elif defined(_MSC_VER) - return int(_mm_popcnt_u64(b)); + return int(_mm_popcnt_u64(b)); -#else // Assumed gcc or compatible compiler +#else // Assumed gcc or compatible compiler - return __builtin_popcountll(b); + return __builtin_popcountll(b); #endif } @@ -279,66 +283,66 @@ inline int popcount(Bitboard b) { #if defined(__GNUC__) // GCC, Clang, ICX inline Square lsb(Bitboard b) { - assert(b); - return Square(__builtin_ctzll(b)); + assert(b); + return Square(__builtin_ctzll(b)); } inline Square msb(Bitboard b) { - assert(b); - return Square(63 ^ __builtin_clzll(b)); + assert(b); + return Square(63 ^ __builtin_clzll(b)); } #elif defined(_MSC_VER) // MSVC -#ifdef _WIN64 // MSVC, WIN64 + #ifdef _WIN64 // MSVC, WIN64 inline Square lsb(Bitboard b) { - assert(b); - unsigned long idx; - _BitScanForward64(&idx, b); - return (Square) idx; + assert(b); + unsigned long idx; + _BitScanForward64(&idx, b); + return (Square) idx; } inline Square msb(Bitboard b) { - assert(b); - unsigned long idx; - _BitScanReverse64(&idx, b); - return (Square) idx; + assert(b); + unsigned long idx; + _BitScanReverse64(&idx, b); + return (Square) idx; } -#else // MSVC, WIN32 + #else // MSVC, WIN32 inline Square lsb(Bitboard b) { - assert(b); - unsigned long idx; - - if (b & 0xffffffff) { - _BitScanForward(&idx, int32_t(b)); - return Square(idx); - } else { - _BitScanForward(&idx, int32_t(b >> 32)); - return Square(idx + 32); - } + assert(b); + unsigned long idx; + + if (b & 0xffffffff) { + _BitScanForward(&idx, int32_t(b)); + return Square(idx); + } else { + _BitScanForward(&idx, int32_t(b >> 32)); + return Square(idx + 32); + } } inline Square msb(Bitboard b) { - assert(b); - unsigned long idx; - - if (b >> 32) { - _BitScanReverse(&idx, int32_t(b >> 32)); - return Square(idx + 32); - } else { - _BitScanReverse(&idx, int32_t(b)); - return Square(idx); - } + assert(b); + unsigned long idx; + + if (b >> 32) { + _BitScanReverse(&idx, int32_t(b >> 32)); + return Square(idx + 32); + } else { + _BitScanReverse(&idx, int32_t(b)); + return Square(idx); + } } -#endif + #endif #else // Compiler is neither GCC nor MSVC compatible -#error "Compiler not supported." + #error "Compiler not supported." #endif @@ -346,19 +350,19 @@ inline Square msb(Bitboard b) { // square of a non-zero bitboard. It is equivalent to square_bb(lsb(bb)). inline Bitboard least_significant_square_bb(Bitboard b) { - assert(b); - return b & -b; + assert(b); + return b & -b; } // pop_lsb() finds and clears the least significant bit in a non-zero bitboard inline Square pop_lsb(Bitboard& b) { - assert(b); - const Square s = lsb(b); - b &= b - 1; - return s; + assert(b); + const Square s = lsb(b); + b &= b - 1; + return s; } -} // namespace Stockfish +} // namespace Stockfish -#endif // #ifndef BITBOARD_H_INCLUDED +#endif // #ifndef BITBOARD_H_INCLUDED diff --git a/src/evaluate.cpp b/src/evaluate.cpp index 3eb7ee850a3..5b24034556f 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -43,11 +43,11 @@ // const unsigned int gEmbeddedNNUESize; // the size of the embedded file // Note that this does not work in Microsoft Visual Studio. #if !defined(_MSC_VER) && !defined(NNUE_EMBEDDING_OFF) - INCBIN(EmbeddedNNUE, EvalFileDefaultName); +INCBIN(EmbeddedNNUE, EvalFileDefaultName); #else - const unsigned char gEmbeddedNNUEData[1] = {0x0}; - const unsigned char *const gEmbeddedNNUEEnd = &gEmbeddedNNUEData[1]; - const unsigned int gEmbeddedNNUESize = 1; +const unsigned char gEmbeddedNNUEData[1] = {0x0}; +const unsigned char* const gEmbeddedNNUEEnd = &gEmbeddedNNUEData[1]; +const unsigned int gEmbeddedNNUESize = 1; #endif @@ -55,70 +55,76 @@ namespace Stockfish { namespace Eval { - std::string currentEvalFileName = "None"; +std::string currentEvalFileName = "None"; - // NNUE::init() tries to load a NNUE network at startup time, or when the engine - // receives a UCI command "setoption name EvalFile value nn-[a-z0-9]{12}.nnue" - // The name of the NNUE network is always retrieved from the EvalFile option. - // We search the given network in three locations: internally (the default - // network may be embedded in the binary), in the active working directory and - // in the engine directory. Distro packagers may define the DEFAULT_NNUE_DIRECTORY - // variable to have the engine search in a special directory in their distro. +// NNUE::init() tries to load a NNUE network at startup time, or when the engine +// receives a UCI command "setoption name EvalFile value nn-[a-z0-9]{12}.nnue" +// The name of the NNUE network is always retrieved from the EvalFile option. +// We search the given network in three locations: internally (the default +// network may be embedded in the binary), in the active working directory and +// in the engine directory. Distro packagers may define the DEFAULT_NNUE_DIRECTORY +// variable to have the engine search in a special directory in their distro. - void NNUE::init() { +void NNUE::init() { std::string eval_file = std::string(Options["EvalFile"]); if (eval_file.empty()) eval_file = EvalFileDefaultName; - #if defined(DEFAULT_NNUE_DIRECTORY) - std::vector dirs = { "" , "" , CommandLine::binaryDirectory , stringify(DEFAULT_NNUE_DIRECTORY) }; - #else - std::vector dirs = { "" , "" , CommandLine::binaryDirectory }; - #endif +#if defined(DEFAULT_NNUE_DIRECTORY) + std::vector dirs = {"", "", CommandLine::binaryDirectory, + stringify(DEFAULT_NNUE_DIRECTORY)}; +#else + std::vector dirs = {"", "", CommandLine::binaryDirectory}; +#endif for (const std::string& directory : dirs) - if (currentEvalFileName != eval_file) - { - if (directory != "") - { + if (currentEvalFileName != eval_file) { + if (directory != "") { std::ifstream stream(directory + eval_file, std::ios::binary); if (NNUE::load_eval(eval_file, stream)) currentEvalFileName = eval_file; } - if (directory == "" && eval_file == EvalFileDefaultName) - { + if (directory == "" && eval_file == EvalFileDefaultName) { // C++ way to prepare a buffer for a memory stream - class MemoryBuffer : public std::basic_streambuf { - public: MemoryBuffer(char* p, size_t n) { setg(p, p, p + n); setp(p, p + n); } + class MemoryBuffer: public std::basic_streambuf { + public: + MemoryBuffer(char* p, size_t n) { + setg(p, p, p + n); + setp(p, p + n); + } }; - MemoryBuffer buffer(const_cast(reinterpret_cast(gEmbeddedNNUEData)), - size_t(gEmbeddedNNUESize)); - (void) gEmbeddedNNUEEnd; // Silence warning on unused variable + MemoryBuffer buffer( + const_cast(reinterpret_cast(gEmbeddedNNUEData)), + size_t(gEmbeddedNNUESize)); + (void) gEmbeddedNNUEEnd; // Silence warning on unused variable std::istream stream(&buffer); if (NNUE::load_eval(eval_file, stream)) currentEvalFileName = eval_file; } } - } +} - // NNUE::verify() verifies that the last net used was loaded successfully - void NNUE::verify() { +// NNUE::verify() verifies that the last net used was loaded successfully +void NNUE::verify() { std::string eval_file = std::string(Options["EvalFile"]); if (eval_file.empty()) eval_file = EvalFileDefaultName; - if (currentEvalFileName != eval_file) - { + if (currentEvalFileName != eval_file) { - std::string msg1 = "Network evaluation parameters compatible with the engine must be available."; + std::string msg1 = + "Network evaluation parameters compatible with the engine must be available."; std::string msg2 = "The network file " + eval_file + " was not loaded successfully."; - std::string msg3 = "The UCI option EvalFile might need to specify the full path, including the directory name, to the network file."; - std::string msg4 = "The default net can be downloaded from: https://tests.stockfishchess.org/api/nn/" + std::string(EvalFileDefaultName); + std::string msg3 = + "The UCI option EvalFile might need to specify the full path, including the directory name, to the network file."; + std::string msg4 = + "The default net can be downloaded from: https://tests.stockfishchess.org/api/nn/" + + std::string(EvalFileDefaultName); std::string msg5 = "The engine will be terminated now."; sync_cout << "info string ERROR: " << msg1 << sync_endl; @@ -131,7 +137,7 @@ namespace Eval { } sync_cout << "info string NNUE evaluation using " << eval_file << sync_endl; - } +} } @@ -140,8 +146,8 @@ namespace Eval { // an approximation of the material advantage on the board in terms of pawns. Value Eval::simple_eval(const Position& pos, Color c) { - return PawnValue * (pos.count(c) - pos.count(~c)) - + (pos.non_pawn_material(c) - pos.non_pawn_material(~c)); + return PawnValue * (pos.count(c) - pos.count(~c)) + + (pos.non_pawn_material(c) - pos.non_pawn_material(~c)); } @@ -150,43 +156,40 @@ Value Eval::simple_eval(const Position& pos, Color c) { Value Eval::evaluate(const Position& pos) { - assert(!pos.checkers()); + assert(!pos.checkers()); - Value v; - Color stm = pos.side_to_move(); - int shuffling = pos.rule50_count(); - int simpleEval = simple_eval(pos, stm) + (int(pos.key() & 7) - 3); + Value v; + Color stm = pos.side_to_move(); + int shuffling = pos.rule50_count(); + int simpleEval = simple_eval(pos, stm) + (int(pos.key() & 7) - 3); - bool lazy = abs(simpleEval) >= RookValue + KnightValue - + 16 * shuffling * shuffling - + abs(pos.this_thread()->bestValue) - + abs(pos.this_thread()->rootSimpleEval); + bool lazy = abs(simpleEval) >= RookValue + KnightValue + 16 * shuffling * shuffling + + abs(pos.this_thread()->bestValue) + + abs(pos.this_thread()->rootSimpleEval); - if (lazy) - v = Value(simpleEval); - else - { - int nnueComplexity; - Value nnue = NNUE::evaluate(pos, true, &nnueComplexity); + if (lazy) + v = Value(simpleEval); + else { + int nnueComplexity; + Value nnue = NNUE::evaluate(pos, true, &nnueComplexity); - Value optimism = pos.this_thread()->optimism[stm]; + Value optimism = pos.this_thread()->optimism[stm]; - // Blend optimism and eval with nnue complexity and material imbalance - optimism += optimism * (nnueComplexity + abs(simpleEval - nnue)) / 512; - nnue -= nnue * (nnueComplexity + abs(simpleEval - nnue)) / 32768; + // Blend optimism and eval with nnue complexity and material imbalance + optimism += optimism * (nnueComplexity + abs(simpleEval - nnue)) / 512; + nnue -= nnue * (nnueComplexity + abs(simpleEval - nnue)) / 32768; - int npm = pos.non_pawn_material() / 64; - v = ( nnue * (915 + npm + 9 * pos.count()) - + optimism * (154 + npm )) / 1024; - } + int npm = pos.non_pawn_material() / 64; + v = (nnue * (915 + npm + 9 * pos.count()) + optimism * (154 + npm)) / 1024; + } - // Damp down the evaluation linearly when shuffling - v = v * (200 - shuffling) / 214; + // Damp down the evaluation linearly when shuffling + v = v * (200 - shuffling) / 214; - // Guarantee evaluation does not hit the tablebase range - v = std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1); + // Guarantee evaluation does not hit the tablebase range + v = std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1); - return v; + return v; } // trace() is like evaluate(), but instead of returning a value, it returns @@ -196,33 +199,33 @@ Value Eval::evaluate(const Position& pos) { std::string Eval::trace(Position& pos) { - if (pos.checkers()) - return "Final evaluation: none (in check)"; + if (pos.checkers()) + return "Final evaluation: none (in check)"; - // Reset any global variable used in eval - pos.this_thread()->bestValue = VALUE_ZERO; - pos.this_thread()->rootSimpleEval = VALUE_ZERO; - pos.this_thread()->optimism[WHITE] = VALUE_ZERO; - pos.this_thread()->optimism[BLACK] = VALUE_ZERO; + // Reset any global variable used in eval + pos.this_thread()->bestValue = VALUE_ZERO; + pos.this_thread()->rootSimpleEval = VALUE_ZERO; + pos.this_thread()->optimism[WHITE] = VALUE_ZERO; + pos.this_thread()->optimism[BLACK] = VALUE_ZERO; - std::stringstream ss; - ss << std::showpoint << std::noshowpos << std::fixed << std::setprecision(2); - ss << '\n' << NNUE::trace(pos) << '\n'; + std::stringstream ss; + ss << std::showpoint << std::noshowpos << std::fixed << std::setprecision(2); + ss << '\n' << NNUE::trace(pos) << '\n'; - ss << std::showpoint << std::showpos << std::fixed << std::setprecision(2) << std::setw(15); + ss << std::showpoint << std::showpos << std::fixed << std::setprecision(2) << std::setw(15); - Value v; - v = NNUE::evaluate(pos, false); - v = pos.side_to_move() == WHITE ? v : -v; - ss << "NNUE evaluation " << 0.01 * UCI::to_cp(v) << " (white side)\n"; + Value v; + v = NNUE::evaluate(pos, false); + v = pos.side_to_move() == WHITE ? v : -v; + ss << "NNUE evaluation " << 0.01 * UCI::to_cp(v) << " (white side)\n"; - v = evaluate(pos); - v = pos.side_to_move() == WHITE ? v : -v; - ss << "Final evaluation " << 0.01 * UCI::to_cp(v) << " (white side)"; - ss << " [with scaled NNUE, ...]"; - ss << "\n"; + v = evaluate(pos); + v = pos.side_to_move() == WHITE ? v : -v; + ss << "Final evaluation " << 0.01 * UCI::to_cp(v) << " (white side)"; + ss << " [with scaled NNUE, ...]"; + ss << "\n"; - return ss.str(); + return ss.str(); } -} // namespace Stockfish +} // namespace Stockfish diff --git a/src/evaluate.h b/src/evaluate.h index 26f2fc4f985..2ab477eced2 100644 --- a/src/evaluate.h +++ b/src/evaluate.h @@ -29,27 +29,27 @@ class Position; namespace Eval { - std::string trace(Position& pos); +std::string trace(Position& pos); - Value simple_eval(const Position& pos, Color c); - Value evaluate(const Position& pos); +Value simple_eval(const Position& pos, Color c); +Value evaluate(const Position& pos); - extern std::string currentEvalFileName; +extern std::string currentEvalFileName; - // The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue - // for the build process (profile-build and fishtest) to work. Do not change the - // name of the macro, as it is used in the Makefile. - #define EvalFileDefaultName "nn-0000000000a0.nnue" +// The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue +// for the build process (profile-build and fishtest) to work. Do not change the +// name of the macro, as it is used in the Makefile. +#define EvalFileDefaultName "nn-0000000000a0.nnue" - namespace NNUE { +namespace NNUE { - void init(); - void verify(); +void init(); +void verify(); - } // namespace NNUE +} // namespace NNUE -} // namespace Eval +} // namespace Eval -} // namespace Stockfish +} // namespace Stockfish -#endif // #ifndef EVALUATE_H_INCLUDED +#endif // #ifndef EVALUATE_H_INCLUDED diff --git a/src/main.cpp b/src/main.cpp index eee149fb455..04879cc4673 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -33,19 +33,19 @@ using namespace Stockfish; int main(int argc, char* argv[]) { - std::cout << engine_info() << std::endl; + std::cout << engine_info() << std::endl; - CommandLine::init(argc, argv); - UCI::init(Options); - Tune::init(); - Bitboards::init(); - Position::init(); - Threads.set(size_t(Options["Threads"])); - Search::clear(); // After threads are up - Eval::NNUE::init(); + CommandLine::init(argc, argv); + UCI::init(Options); + Tune::init(); + Bitboards::init(); + Position::init(); + Threads.set(size_t(Options["Threads"])); + Search::clear(); // After threads are up + Eval::NNUE::init(); - UCI::loop(argc, argv); + UCI::loop(argc, argv); - Threads.set(0); - return 0; + Threads.set(0); + return 0; } diff --git a/src/misc.cpp b/src/misc.cpp index 5abdaf07a31..9d9e6b87cc0 100644 --- a/src/misc.cpp +++ b/src/misc.cpp @@ -19,30 +19,31 @@ #include "misc.h" #ifdef _WIN32 -#if _WIN32_WINNT < 0x0601 -#undef _WIN32_WINNT -#define _WIN32_WINNT 0x0601 // Force to include needed API prototypes -#endif + #if _WIN32_WINNT < 0x0601 + #undef _WIN32_WINNT + #define _WIN32_WINNT 0x0601 // Force to include needed API prototypes + #endif -#ifndef NOMINMAX -#define NOMINMAX -#endif + #ifndef NOMINMAX + #define NOMINMAX + #endif -#include + #include // The needed Windows API for processor groups could be missed from old Windows // versions, so instead of calling them directly (forcing the linker to resolve // the calls at compile time), try to load them at runtime. To do this we need // first to define the corresponding function pointers. extern "C" { -using fun1_t = bool(*)(LOGICAL_PROCESSOR_RELATIONSHIP, - PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, PDWORD); -using fun2_t = bool(*)(USHORT, PGROUP_AFFINITY); -using fun3_t = bool(*)(HANDLE, CONST GROUP_AFFINITY*, PGROUP_AFFINITY); -using fun4_t = bool(*)(USHORT, PGROUP_AFFINITY, USHORT, PUSHORT); -using fun5_t = WORD(*)(); -using fun6_t = bool(*)(HANDLE, DWORD, PHANDLE); -using fun7_t = bool(*)(LPCSTR, LPCSTR, PLUID); -using fun8_t = bool(*)(HANDLE, BOOL, PTOKEN_PRIVILEGES, DWORD, PTOKEN_PRIVILEGES, PDWORD); +using fun1_t = bool (*)(LOGICAL_PROCESSOR_RELATIONSHIP, + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, + PDWORD); +using fun2_t = bool (*)(USHORT, PGROUP_AFFINITY); +using fun3_t = bool (*)(HANDLE, CONST GROUP_AFFINITY*, PGROUP_AFFINITY); +using fun4_t = bool (*)(USHORT, PGROUP_AFFINITY, USHORT, PUSHORT); +using fun5_t = WORD (*)(); +using fun6_t = bool (*)(HANDLE, DWORD, PHANDLE); +using fun7_t = bool (*)(LPCSTR, LPCSTR, PLUID); +using fun8_t = bool (*)(HANDLE, BOOL, PTOKEN_PRIVILEGES, DWORD, PTOKEN_PRIVILEGES, PDWORD); } #endif @@ -59,12 +60,14 @@ using fun8_t = bool(*)(HANDLE, BOOL, PTOKEN_PRIVILEGES, DWORD, PTOKEN_PRIVILEGES #include "types.h" #if defined(__linux__) && !defined(__ANDROID__) -#include + #include #endif -#if defined(__APPLE__) || defined(__ANDROID__) || defined(__OpenBSD__) || (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) && !defined(_WIN32)) || defined(__e2k__) -#define POSIXALIGNEDALLOC -#include +#if defined(__APPLE__) || defined(__ANDROID__) || defined(__OpenBSD__) \ + || (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) && !defined(_WIN32)) \ + || defined(__e2k__) + #define POSIXALIGNEDALLOC + #include #endif namespace Stockfish { @@ -80,65 +83,66 @@ constexpr std::string_view version = "dev"; // usual I/O functionality, all without changing a single line of code! // Idea from http://groups.google.com/group/comp.lang.c++/msg/1d941c0f26ea0d81 -struct Tie: public std::streambuf { // MSVC requires split streambuf for cin and cout +struct Tie: public std::streambuf { // MSVC requires split streambuf for cin and cout - Tie(std::streambuf* b, std::streambuf* l) : buf(b), logBuf(l) {} + Tie(std::streambuf* b, std::streambuf* l) : + buf(b), + logBuf(l) {} - int sync() override { return logBuf->pubsync(), buf->pubsync(); } - int overflow(int c) override { return log(buf->sputc(char(c)), "<< "); } - int underflow() override { return buf->sgetc(); } - int uflow() override { return log(buf->sbumpc(), ">> "); } + int sync() override { return logBuf->pubsync(), buf->pubsync(); } + int overflow(int c) override { return log(buf->sputc(char(c)), "<< "); } + int underflow() override { return buf->sgetc(); } + int uflow() override { return log(buf->sbumpc(), ">> "); } - std::streambuf *buf, *logBuf; + std::streambuf *buf, *logBuf; - int log(int c, const char* prefix) { + int log(int c, const char* prefix) { - static int last = '\n'; // Single log file + static int last = '\n'; // Single log file - if (last == '\n') - logBuf->sputn(prefix, 3); + if (last == '\n') + logBuf->sputn(prefix, 3); - return last = logBuf->sputc(char(c)); - } + return last = logBuf->sputc(char(c)); + } }; class Logger { - Logger() : in(std::cin.rdbuf(), file.rdbuf()), out(std::cout.rdbuf(), file.rdbuf()) {} - ~Logger() { start(""); } + Logger() : + in(std::cin.rdbuf(), file.rdbuf()), + out(std::cout.rdbuf(), file.rdbuf()) {} + ~Logger() { start(""); } - std::ofstream file; - Tie in, out; + std::ofstream file; + Tie in, out; -public: - static void start(const std::string& fname) { + public: + static void start(const std::string& fname) { - static Logger l; + static Logger l; - if (l.file.is_open()) - { - std::cout.rdbuf(l.out.buf); - std::cin.rdbuf(l.in.buf); - l.file.close(); - } + if (l.file.is_open()) { + std::cout.rdbuf(l.out.buf); + std::cin.rdbuf(l.in.buf); + l.file.close(); + } - if (!fname.empty()) - { - l.file.open(fname, std::ifstream::out); + if (!fname.empty()) { + l.file.open(fname, std::ifstream::out); - if (!l.file.is_open()) - { - std::cerr << "Unable to open debug log file " << fname << std::endl; - exit(EXIT_FAILURE); - } + if (!l.file.is_open()) { + std::cerr << "Unable to open debug log file " << fname << std::endl; + exit(EXIT_FAILURE); + } - std::cin.rdbuf(&l.in); - std::cout.rdbuf(&l.out); + std::cin.rdbuf(&l.in); + std::cout.rdbuf(&l.out); + } } - } }; -} // namespace +} // namespace // engine_info() returns the full name of the current Stockfish version. @@ -152,36 +156,35 @@ class Logger { // Stockfish version std::string engine_info(bool to_uci) { - std::stringstream ss; - ss << "Stockfish " << version << std::setfill('0'); - - if constexpr (version == "dev") - { - ss << "-"; - #ifdef GIT_DATE - ss << stringify(GIT_DATE); - #else - constexpr std::string_view months("Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec"); - std::string month, day, year; - std::stringstream date(__DATE__); // From compiler, format is "Sep 21 2008" - - date >> month >> day >> year; - ss << year << std::setw(2) << std::setfill('0') << (1 + months.find(month) / 4) << std::setw(2) << std::setfill('0') << day; - #endif - - ss << "-"; - - #ifdef GIT_SHA - ss << stringify(GIT_SHA); - #else - ss << "nogit"; - #endif - } - - ss << (to_uci ? "\nid author ": " by ") - << "the Stockfish developers (see AUTHORS file)"; - - return ss.str(); + std::stringstream ss; + ss << "Stockfish " << version << std::setfill('0'); + + if constexpr (version == "dev") { + ss << "-"; +#ifdef GIT_DATE + ss << stringify(GIT_DATE); +#else + constexpr std::string_view months("Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec"); + std::string month, day, year; + std::stringstream date(__DATE__); // From compiler, format is "Sep 21 2008" + + date >> month >> day >> year; + ss << year << std::setw(2) << std::setfill('0') << (1 + months.find(month) / 4) + << std::setw(2) << std::setfill('0') << day; +#endif + + ss << "-"; + +#ifdef GIT_SHA + ss << stringify(GIT_SHA); +#else + ss << "nogit"; +#endif + } + + ss << (to_uci ? "\nid author " : " by ") << "the Stockfish developers (see AUTHORS file)"; + + return ss.str(); } @@ -189,119 +192,118 @@ std::string engine_info(bool to_uci) { std::string compiler_info() { - #define make_version_string(major, minor, patch) stringify(major) "." stringify(minor) "." stringify(patch) - -// Predefined macros hell: -// -// __GNUC__ Compiler is GCC, Clang or ICX -// __clang__ Compiler is Clang or ICX -// __INTEL_LLVM_COMPILER Compiler is ICX -// _MSC_VER Compiler is MSVC -// _WIN32 Building on Windows (any) -// _WIN64 Building on Windows 64 bit - - std::string compiler = "\nCompiled by : "; - - #if defined(__INTEL_LLVM_COMPILER) - compiler += "ICX "; - compiler += stringify(__INTEL_LLVM_COMPILER); - #elif defined(__clang__) - compiler += "clang++ "; - compiler += make_version_string(__clang_major__, __clang_minor__, __clang_patchlevel__); - #elif _MSC_VER - compiler += "MSVC "; - compiler += "(version "; - compiler += stringify(_MSC_FULL_VER) "." stringify(_MSC_BUILD); - compiler += ")"; - #elif defined(__e2k__) && defined(__LCC__) +#define make_version_string(major, minor, patch) \ + stringify(major) "." stringify(minor) "." stringify(patch) + + // Predefined macros hell: + // + // __GNUC__ Compiler is GCC, Clang or ICX + // __clang__ Compiler is Clang or ICX + // __INTEL_LLVM_COMPILER Compiler is ICX + // _MSC_VER Compiler is MSVC + // _WIN32 Building on Windows (any) + // _WIN64 Building on Windows 64 bit + + std::string compiler = "\nCompiled by : "; + +#if defined(__INTEL_LLVM_COMPILER) + compiler += "ICX "; + compiler += stringify(__INTEL_LLVM_COMPILER); +#elif defined(__clang__) + compiler += "clang++ "; + compiler += make_version_string(__clang_major__, __clang_minor__, __clang_patchlevel__); +#elif _MSC_VER + compiler += "MSVC "; + compiler += "(version "; + compiler += stringify(_MSC_FULL_VER) "." stringify(_MSC_BUILD); + compiler += ")"; +#elif defined(__e2k__) && defined(__LCC__) #define dot_ver2(n) \ - compiler += char('.'); \ - compiler += char('0' + (n) / 10); \ - compiler += char('0' + (n) % 10); - - compiler += "MCST LCC "; - compiler += "(version "; - compiler += std::to_string(__LCC__ / 100); - dot_ver2(__LCC__ % 100) - dot_ver2(__LCC_MINOR__) - compiler += ")"; - #elif __GNUC__ - compiler += "g++ (GNUC) "; - compiler += make_version_string(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__); - #else - compiler += "Unknown compiler "; - compiler += "(unknown version)"; - #endif - - #if defined(__APPLE__) - compiler += " on Apple"; - #elif defined(__CYGWIN__) - compiler += " on Cygwin"; - #elif defined(__MINGW64__) - compiler += " on MinGW64"; - #elif defined(__MINGW32__) - compiler += " on MinGW32"; - #elif defined(__ANDROID__) - compiler += " on Android"; - #elif defined(__linux__) - compiler += " on Linux"; - #elif defined(_WIN64) - compiler += " on Microsoft Windows 64-bit"; - #elif defined(_WIN32) - compiler += " on Microsoft Windows 32-bit"; - #else - compiler += " on unknown system"; - #endif - - compiler += "\nCompilation architecture : "; - #if defined(ARCH) - compiler += stringify(ARCH); - #else - compiler += "(undefined architecture)"; - #endif - - compiler += "\nCompilation settings : "; - compiler += (Is64Bit ? "64bit" : "32bit"); - #if defined(USE_VNNI) + compiler += char('.'); \ + compiler += char('0' + (n) / 10); \ + compiler += char('0' + (n) % 10); + + compiler += "MCST LCC "; + compiler += "(version "; + compiler += std::to_string(__LCC__ / 100); + dot_ver2(__LCC__ % 100) dot_ver2(__LCC_MINOR__) compiler += ")"; +#elif __GNUC__ + compiler += "g++ (GNUC) "; + compiler += make_version_string(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__); +#else + compiler += "Unknown compiler "; + compiler += "(unknown version)"; +#endif + +#if defined(__APPLE__) + compiler += " on Apple"; +#elif defined(__CYGWIN__) + compiler += " on Cygwin"; +#elif defined(__MINGW64__) + compiler += " on MinGW64"; +#elif defined(__MINGW32__) + compiler += " on MinGW32"; +#elif defined(__ANDROID__) + compiler += " on Android"; +#elif defined(__linux__) + compiler += " on Linux"; +#elif defined(_WIN64) + compiler += " on Microsoft Windows 64-bit"; +#elif defined(_WIN32) + compiler += " on Microsoft Windows 32-bit"; +#else + compiler += " on unknown system"; +#endif + + compiler += "\nCompilation architecture : "; +#if defined(ARCH) + compiler += stringify(ARCH); +#else + compiler += "(undefined architecture)"; +#endif + + compiler += "\nCompilation settings : "; + compiler += (Is64Bit ? "64bit" : "32bit"); +#if defined(USE_VNNI) compiler += " VNNI"; - #endif - #if defined(USE_AVX512) +#endif +#if defined(USE_AVX512) compiler += " AVX512"; - #endif - compiler += (HasPext ? " BMI2" : ""); - #if defined(USE_AVX2) +#endif + compiler += (HasPext ? " BMI2" : ""); +#if defined(USE_AVX2) compiler += " AVX2"; - #endif - #if defined(USE_SSE41) +#endif +#if defined(USE_SSE41) compiler += " SSE41"; - #endif - #if defined(USE_SSSE3) +#endif +#if defined(USE_SSSE3) compiler += " SSSE3"; - #endif - #if defined(USE_SSE2) +#endif +#if defined(USE_SSE2) compiler += " SSE2"; - #endif - compiler += (HasPopCnt ? " POPCNT" : ""); - #if defined(USE_NEON_DOTPROD) +#endif + compiler += (HasPopCnt ? " POPCNT" : ""); +#if defined(USE_NEON_DOTPROD) compiler += " NEON_DOTPROD"; - #elif defined(USE_NEON) +#elif defined(USE_NEON) compiler += " NEON"; - #endif +#endif - #if !defined(NDEBUG) +#if !defined(NDEBUG) compiler += " DEBUG"; - #endif +#endif - compiler += "\nCompiler __VERSION__ macro : "; - #ifdef __VERSION__ - compiler += __VERSION__; - #else - compiler += "(undefined macro)"; - #endif + compiler += "\nCompiler __VERSION__ macro : "; +#ifdef __VERSION__ + compiler += __VERSION__; +#else + compiler += "(undefined macro)"; +#endif - compiler += "\n"; + compiler += "\n"; - return compiler; + return compiler; } @@ -312,7 +314,7 @@ namespace { template struct DebugInfo { - std::atomic data[N] = { 0 }; + std::atomic data[N] = {0}; constexpr inline std::atomic& operator[](int index) { return data[index]; } }; @@ -357,42 +359,31 @@ void dbg_correl_of(int64_t value1, int64_t value2, int slot) { void dbg_print() { int64_t n; - auto E = [&n](int64_t x) { return double(x) / n; }; - auto sqr = [](double x) { return x * x; }; + auto E = [&n](int64_t x) { return double(x) / n; }; + auto sqr = [](double x) { return x * x; }; for (int i = 0; i < MaxDebugSlots; ++i) if ((n = hit[i][0])) - std::cerr << "Hit #" << i - << ": Total " << n << " Hits " << hit[i][1] - << " Hit Rate (%) " << 100.0 * E(hit[i][1]) - << std::endl; + std::cerr << "Hit #" << i << ": Total " << n << " Hits " << hit[i][1] + << " Hit Rate (%) " << 100.0 * E(hit[i][1]) << std::endl; for (int i = 0; i < MaxDebugSlots; ++i) - if ((n = mean[i][0])) - { - std::cerr << "Mean #" << i - << ": Total " << n << " Mean " << E(mean[i][1]) - << std::endl; + if ((n = mean[i][0])) { + std::cerr << "Mean #" << i << ": Total " << n << " Mean " << E(mean[i][1]) << std::endl; } for (int i = 0; i < MaxDebugSlots; ++i) - if ((n = stdev[i][0])) - { + if ((n = stdev[i][0])) { double r = sqrt(E(stdev[i][2]) - sqr(E(stdev[i][1]))); - std::cerr << "Stdev #" << i - << ": Total " << n << " Stdev " << r - << std::endl; + std::cerr << "Stdev #" << i << ": Total " << n << " Stdev " << r << std::endl; } for (int i = 0; i < MaxDebugSlots; ++i) - if ((n = correl[i][0])) - { + if ((n = correl[i][0])) { double r = (E(correl[i][5]) - E(correl[i][1]) * E(correl[i][3])) - / ( sqrt(E(correl[i][2]) - sqr(E(correl[i][1]))) - * sqrt(E(correl[i][4]) - sqr(E(correl[i][3])))); - std::cerr << "Correl. #" << i - << ": Total " << n << " Coefficient " << r - << std::endl; + / (sqrt(E(correl[i][2]) - sqr(E(correl[i][1]))) + * sqrt(E(correl[i][4]) - sqr(E(correl[i][3])))); + std::cerr << "Correl. #" << i << ": Total " << n << " Coefficient " << r << std::endl; } } @@ -402,15 +393,15 @@ void dbg_print() { std::ostream& operator<<(std::ostream& os, SyncCout sc) { - static std::mutex m; + static std::mutex m; - if (sc == IO_LOCK) - m.lock(); + if (sc == IO_LOCK) + m.lock(); - if (sc == IO_UNLOCK) - m.unlock(); + if (sc == IO_UNLOCK) + m.unlock(); - return os; + return os; } @@ -429,11 +420,11 @@ void prefetch(void*) {} void prefetch(void* addr) { -# if defined(_MSC_VER) - _mm_prefetch((char*)addr, _MM_HINT_T0); -# else - __builtin_prefetch(addr); -# endif + #if defined(_MSC_VER) + _mm_prefetch((char*) addr, _MM_HINT_T0); + #else + __builtin_prefetch(addr); + #endif } #endif @@ -446,27 +437,27 @@ void prefetch(void* addr) { void* std_aligned_alloc(size_t alignment, size_t size) { #if defined(POSIXALIGNEDALLOC) - void *mem; - return posix_memalign(&mem, alignment, size) ? nullptr : mem; + void* mem; + return posix_memalign(&mem, alignment, size) ? nullptr : mem; #elif defined(_WIN32) && !defined(_M_ARM) && !defined(_M_ARM64) - return _mm_malloc(size, alignment); + return _mm_malloc(size, alignment); #elif defined(_WIN32) - return _aligned_malloc(size, alignment); + return _aligned_malloc(size, alignment); #else - return std::aligned_alloc(alignment, size); + return std::aligned_alloc(alignment, size); #endif } void std_aligned_free(void* ptr) { #if defined(POSIXALIGNEDALLOC) - free(ptr); + free(ptr); #elif defined(_WIN32) && !defined(_M_ARM) && !defined(_M_ARM64) - _mm_free(ptr); + _mm_free(ptr); #elif defined(_WIN32) - _aligned_free(ptr); + _aligned_free(ptr); #else - free(ptr); + free(ptr); #endif } @@ -476,104 +467,102 @@ void std_aligned_free(void* ptr) { static void* aligned_large_pages_alloc_windows([[maybe_unused]] size_t allocSize) { - #if !defined(_WIN64) + #if !defined(_WIN64) return nullptr; - #else - - HANDLE hProcessToken { }; - LUID luid { }; - void* mem = nullptr; - - const size_t largePageSize = GetLargePageMinimum(); - if (!largePageSize) - return nullptr; - - // Dynamically link OpenProcessToken, LookupPrivilegeValue and AdjustTokenPrivileges - - HMODULE hAdvapi32 = GetModuleHandle(TEXT("advapi32.dll")); - - if (!hAdvapi32) - hAdvapi32 = LoadLibrary(TEXT("advapi32.dll")); - - auto fun6 = fun6_t((void(*)())GetProcAddress(hAdvapi32, "OpenProcessToken")); - if (!fun6) - return nullptr; - auto fun7 = fun7_t((void(*)())GetProcAddress(hAdvapi32, "LookupPrivilegeValueA")); - if (!fun7) - return nullptr; - auto fun8 = fun8_t((void(*)())GetProcAddress(hAdvapi32, "AdjustTokenPrivileges")); - if (!fun8) - return nullptr; - - // We need SeLockMemoryPrivilege, so try to enable it for the process - if (!fun6( // OpenProcessToken() - GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hProcessToken)) - return nullptr; - - if (fun7( // LookupPrivilegeValue(nullptr, SE_LOCK_MEMORY_NAME, &luid) - nullptr, "SeLockMemoryPrivilege", &luid)) - { - TOKEN_PRIVILEGES tp { }; - TOKEN_PRIVILEGES prevTp { }; - DWORD prevTpLen = 0; - - tp.PrivilegeCount = 1; - tp.Privileges[0].Luid = luid; - tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; - - // Try to enable SeLockMemoryPrivilege. Note that even if AdjustTokenPrivileges() succeeds, - // we still need to query GetLastError() to ensure that the privileges were actually obtained. - if (fun8( // AdjustTokenPrivileges() - hProcessToken, FALSE, &tp, sizeof(TOKEN_PRIVILEGES), &prevTp, &prevTpLen) && - GetLastError() == ERROR_SUCCESS) - { - // Round up size to full pages and allocate - allocSize = (allocSize + largePageSize - 1) & ~size_t(largePageSize - 1); - mem = VirtualAlloc( - nullptr, allocSize, MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES, PAGE_READWRITE); - - // Privilege no longer needed, restore previous state - fun8( // AdjustTokenPrivileges () + #else + + HANDLE hProcessToken{}; + LUID luid{}; + void* mem = nullptr; + + const size_t largePageSize = GetLargePageMinimum(); + if (!largePageSize) + return nullptr; + + // Dynamically link OpenProcessToken, LookupPrivilegeValue and AdjustTokenPrivileges + + HMODULE hAdvapi32 = GetModuleHandle(TEXT("advapi32.dll")); + + if (!hAdvapi32) + hAdvapi32 = LoadLibrary(TEXT("advapi32.dll")); + + auto fun6 = fun6_t((void (*)()) GetProcAddress(hAdvapi32, "OpenProcessToken")); + if (!fun6) + return nullptr; + auto fun7 = fun7_t((void (*)()) GetProcAddress(hAdvapi32, "LookupPrivilegeValueA")); + if (!fun7) + return nullptr; + auto fun8 = fun8_t((void (*)()) GetProcAddress(hAdvapi32, "AdjustTokenPrivileges")); + if (!fun8) + return nullptr; + + // We need SeLockMemoryPrivilege, so try to enable it for the process + if (!fun6( // OpenProcessToken() + GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hProcessToken)) + return nullptr; + + if (fun7( // LookupPrivilegeValue(nullptr, SE_LOCK_MEMORY_NAME, &luid) + nullptr, "SeLockMemoryPrivilege", &luid)) { + TOKEN_PRIVILEGES tp{}; + TOKEN_PRIVILEGES prevTp{}; + DWORD prevTpLen = 0; + + tp.PrivilegeCount = 1; + tp.Privileges[0].Luid = luid; + tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; + + // Try to enable SeLockMemoryPrivilege. Note that even if AdjustTokenPrivileges() succeeds, + // we still need to query GetLastError() to ensure that the privileges were actually obtained. + if (fun8( // AdjustTokenPrivileges() + hProcessToken, FALSE, &tp, sizeof(TOKEN_PRIVILEGES), &prevTp, &prevTpLen) + && GetLastError() == ERROR_SUCCESS) { + // Round up size to full pages and allocate + allocSize = (allocSize + largePageSize - 1) & ~size_t(largePageSize - 1); + mem = VirtualAlloc(nullptr, allocSize, MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES, + PAGE_READWRITE); + + // Privilege no longer needed, restore previous state + fun8( // AdjustTokenPrivileges () hProcessToken, FALSE, &prevTp, 0, nullptr, nullptr); - } - } + } + } - CloseHandle(hProcessToken); + CloseHandle(hProcessToken); - return mem; + return mem; - #endif + #endif } void* aligned_large_pages_alloc(size_t allocSize) { - // Try to allocate large pages - void* mem = aligned_large_pages_alloc_windows(allocSize); + // Try to allocate large pages + void* mem = aligned_large_pages_alloc_windows(allocSize); - // Fall back to regular, page-aligned, allocation if necessary - if (!mem) - mem = VirtualAlloc(nullptr, allocSize, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE); + // Fall back to regular, page-aligned, allocation if necessary + if (!mem) + mem = VirtualAlloc(nullptr, allocSize, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE); - return mem; + return mem; } #else void* aligned_large_pages_alloc(size_t allocSize) { -#if defined(__linux__) - constexpr size_t alignment = 2 * 1024 * 1024; // assumed 2MB page size -#else - constexpr size_t alignment = 4096; // assumed small page size -#endif - - // round up to multiples of alignment - size_t size = ((allocSize + alignment - 1) / alignment) * alignment; - void *mem = std_aligned_alloc(alignment, size); -#if defined(MADV_HUGEPAGE) - madvise(mem, size, MADV_HUGEPAGE); -#endif - return mem; + #if defined(__linux__) + constexpr size_t alignment = 2 * 1024 * 1024; // assumed 2MB page size + #else + constexpr size_t alignment = 4096; // assumed small page size + #endif + + // round up to multiples of alignment + size_t size = ((allocSize + alignment - 1) / alignment) * alignment; + void* mem = std_aligned_alloc(alignment, size); + #if defined(MADV_HUGEPAGE) + madvise(mem, size, MADV_HUGEPAGE); + #endif + return mem; } #endif @@ -585,21 +574,17 @@ void* aligned_large_pages_alloc(size_t allocSize) { void aligned_large_pages_free(void* mem) { - if (mem && !VirtualFree(mem, 0, MEM_RELEASE)) - { - DWORD err = GetLastError(); - std::cerr << "Failed to free large page memory. Error code: 0x" - << std::hex << err - << std::dec << std::endl; - exit(EXIT_FAILURE); - } + if (mem && !VirtualFree(mem, 0, MEM_RELEASE)) { + DWORD err = GetLastError(); + std::cerr << "Failed to free large page memory. Error code: 0x" << std::hex << err + << std::dec << std::endl; + exit(EXIT_FAILURE); + } } #else -void aligned_large_pages_free(void *mem) { - std_aligned_free(mem); -} +void aligned_large_pages_free(void* mem) { std_aligned_free(mem); } #endif @@ -618,69 +603,66 @@ void bindThisThread(size_t) {} static int best_node(size_t idx) { - int threads = 0; - int nodes = 0; - int cores = 0; - DWORD returnLength = 0; - DWORD byteOffset = 0; - - // Early exit if the needed API is not available at runtime - HMODULE k32 = GetModuleHandle(TEXT("Kernel32.dll")); - auto fun1 = (fun1_t)(void(*)())GetProcAddress(k32, "GetLogicalProcessorInformationEx"); - if (!fun1) - return -1; - - // First call to GetLogicalProcessorInformationEx() to get returnLength. - // We expect the call to fail due to null buffer. - if (fun1(RelationAll, nullptr, &returnLength)) - return -1; - - // Once we know returnLength, allocate the buffer - SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *buffer, *ptr; - ptr = buffer = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)malloc(returnLength); - - // Second call to GetLogicalProcessorInformationEx(), now we expect to succeed - if (!fun1(RelationAll, buffer, &returnLength)) - { - free(buffer); - return -1; - } - - while (byteOffset < returnLength) - { - if (ptr->Relationship == RelationNumaNode) - nodes++; - - else if (ptr->Relationship == RelationProcessorCore) - { - cores++; - threads += (ptr->Processor.Flags == LTP_PC_SMT) ? 2 : 1; - } - - assert(ptr->Size); - byteOffset += ptr->Size; - ptr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)(((char*)ptr) + ptr->Size); - } - - free(buffer); - - std::vector groups; - - // Run as many threads as possible on the same node until the core limit is - // reached, then move on to filling the next node. - for (int n = 0; n < nodes; n++) - for (int i = 0; i < cores / nodes; i++) - groups.push_back(n); - - // In case a core has more than one logical processor (we assume 2) and we - // have still threads to allocate, then spread them evenly across available - // nodes. - for (int t = 0; t < threads - cores; t++) - groups.push_back(t % nodes); - - // If we still have more threads than the total number of logical processors - // then return -1 and let the OS to decide what to do. - return idx < groups.size() ? groups[idx] : -1; + int threads = 0; + int nodes = 0; + int cores = 0; + DWORD returnLength = 0; + DWORD byteOffset = 0; + + // Early exit if the needed API is not available at runtime + HMODULE k32 = GetModuleHandle(TEXT("Kernel32.dll")); + auto fun1 = (fun1_t) (void (*)()) GetProcAddress(k32, "GetLogicalProcessorInformationEx"); + if (!fun1) + return -1; + + // First call to GetLogicalProcessorInformationEx() to get returnLength. + // We expect the call to fail due to null buffer. + if (fun1(RelationAll, nullptr, &returnLength)) + return -1; + + // Once we know returnLength, allocate the buffer + SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *buffer, *ptr; + ptr = buffer = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*) malloc(returnLength); + + // Second call to GetLogicalProcessorInformationEx(), now we expect to succeed + if (!fun1(RelationAll, buffer, &returnLength)) { + free(buffer); + return -1; + } + + while (byteOffset < returnLength) { + if (ptr->Relationship == RelationNumaNode) + nodes++; + + else if (ptr->Relationship == RelationProcessorCore) { + cores++; + threads += (ptr->Processor.Flags == LTP_PC_SMT) ? 2 : 1; + } + + assert(ptr->Size); + byteOffset += ptr->Size; + ptr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*) (((char*) ptr) + ptr->Size); + } + + free(buffer); + + std::vector groups; + + // Run as many threads as possible on the same node until the core limit is + // reached, then move on to filling the next node. + for (int n = 0; n < nodes; n++) + for (int i = 0; i < cores / nodes; i++) + groups.push_back(n); + + // In case a core has more than one logical processor (we assume 2) and we + // have still threads to allocate, then spread them evenly across available + // nodes. + for (int t = 0; t < threads - cores; t++) + groups.push_back(t % nodes); + + // If we still have more threads than the total number of logical processors + // then return -1 and let the OS to decide what to do. + return idx < groups.size() ? groups[idx] : -1; } @@ -688,58 +670,56 @@ static int best_node(size_t idx) { void bindThisThread(size_t idx) { - // Use only local variables to be thread-safe - int node = best_node(idx); - - if (node == -1) - return; - - // Early exit if the needed API are not available at runtime - HMODULE k32 = GetModuleHandle(TEXT("Kernel32.dll")); - auto fun2 = fun2_t((void(*)())GetProcAddress(k32, "GetNumaNodeProcessorMaskEx")); - auto fun3 = fun3_t((void(*)())GetProcAddress(k32, "SetThreadGroupAffinity")); - auto fun4 = fun4_t((void(*)())GetProcAddress(k32, "GetNumaNodeProcessorMask2")); - auto fun5 = fun5_t((void(*)())GetProcAddress(k32, "GetMaximumProcessorGroupCount")); - - if (!fun2 || !fun3) - return; - - if (!fun4 || !fun5) - { - GROUP_AFFINITY affinity; - if (fun2(node, &affinity)) // GetNumaNodeProcessorMaskEx - fun3(GetCurrentThread(), &affinity, nullptr); // SetThreadGroupAffinity - } - else - { - // If a numa node has more than one processor group, we assume they are - // sized equal and we spread threads evenly across the groups. - USHORT elements, returnedElements; - elements = fun5(); // GetMaximumProcessorGroupCount - GROUP_AFFINITY *affinity = (GROUP_AFFINITY*)malloc(elements * sizeof(GROUP_AFFINITY)); - if (fun4(node, affinity, elements, &returnedElements)) // GetNumaNodeProcessorMask2 - fun3(GetCurrentThread(), &affinity[idx % returnedElements], nullptr); // SetThreadGroupAffinity - free(affinity); - } + // Use only local variables to be thread-safe + int node = best_node(idx); + + if (node == -1) + return; + + // Early exit if the needed API are not available at runtime + HMODULE k32 = GetModuleHandle(TEXT("Kernel32.dll")); + auto fun2 = fun2_t((void (*)()) GetProcAddress(k32, "GetNumaNodeProcessorMaskEx")); + auto fun3 = fun3_t((void (*)()) GetProcAddress(k32, "SetThreadGroupAffinity")); + auto fun4 = fun4_t((void (*)()) GetProcAddress(k32, "GetNumaNodeProcessorMask2")); + auto fun5 = fun5_t((void (*)()) GetProcAddress(k32, "GetMaximumProcessorGroupCount")); + + if (!fun2 || !fun3) + return; + + if (!fun4 || !fun5) { + GROUP_AFFINITY affinity; + if (fun2(node, &affinity)) // GetNumaNodeProcessorMaskEx + fun3(GetCurrentThread(), &affinity, nullptr); // SetThreadGroupAffinity + } else { + // If a numa node has more than one processor group, we assume they are + // sized equal and we spread threads evenly across the groups. + USHORT elements, returnedElements; + elements = fun5(); // GetMaximumProcessorGroupCount + GROUP_AFFINITY* affinity = (GROUP_AFFINITY*) malloc(elements * sizeof(GROUP_AFFINITY)); + if (fun4(node, affinity, elements, &returnedElements)) // GetNumaNodeProcessorMask2 + fun3(GetCurrentThread(), &affinity[idx % returnedElements], + nullptr); // SetThreadGroupAffinity + free(affinity); + } } #endif -} // namespace WinProcGroup +} // namespace WinProcGroup #ifdef _WIN32 -#include -#define GETCWD _getcwd + #include + #define GETCWD _getcwd #else -#include -#define GETCWD getcwd + #include + #define GETCWD getcwd #endif namespace CommandLine { -std::string argv0; // path+name of the executable binary, as given by argv[0] -std::string binaryDirectory; // path of the executable directory -std::string workingDirectory; // path of the working directory +std::string argv0; // path+name of the executable binary, as given by argv[0] +std::string binaryDirectory; // path of the executable directory +std::string workingDirectory; // path of the working directory void init([[maybe_unused]] int argc, char* argv[]) { std::string pathSeparator; @@ -749,27 +729,27 @@ void init([[maybe_unused]] int argc, char* argv[]) { #ifdef _WIN32 pathSeparator = "\\"; - #ifdef _MSC_VER + #ifdef _MSC_VER // Under windows argv[0] may not have the extension. Also _get_pgmptr() had // issues in some Windows 10 versions, so check returned values carefully. char* pgmptr = nullptr; if (!_get_pgmptr(&pgmptr) && pgmptr != nullptr && *pgmptr) argv0 = pgmptr; - #endif + #endif #else pathSeparator = "/"; #endif // extract the working directory workingDirectory = ""; - char buff[40000]; + char buff[40000]; char* cwd = GETCWD(buff, 40000); if (cwd) workingDirectory = cwd; // extract the binary directory path from argv0 binaryDirectory = argv0; - size_t pos = binaryDirectory.find_last_of("\\/"); + size_t pos = binaryDirectory.find_last_of("\\/"); if (pos == std::string::npos) binaryDirectory = "." + pathSeparator; else @@ -781,6 +761,6 @@ void init([[maybe_unused]] int argc, char* argv[]) { } -} // namespace CommandLine +} // namespace CommandLine -} // namespace Stockfish +} // namespace Stockfish diff --git a/src/misc.h b/src/misc.h index 60602048b9c..3cd3315a8ed 100644 --- a/src/misc.h +++ b/src/misc.h @@ -33,12 +33,13 @@ namespace Stockfish { std::string engine_info(bool to_uci = false); std::string compiler_info(); -void prefetch(void* addr); -void start_logger(const std::string& fname); -void* std_aligned_alloc(size_t alignment, size_t size); -void std_aligned_free(void* ptr); -void* aligned_large_pages_alloc(size_t size); // memory aligned by page size, min alignment: 4096 bytes -void aligned_large_pages_free(void* mem); // nop if mem == nullptr +void prefetch(void* addr); +void start_logger(const std::string& fname); +void* std_aligned_alloc(size_t alignment, size_t size); +void std_aligned_free(void* ptr); +void* aligned_large_pages_alloc( + size_t size); // memory aligned by page size, min alignment: 4096 bytes +void aligned_large_pages_free(void* mem); // nop if mem == nullptr void dbg_hit_on(bool cond, int slot = 0); void dbg_mean_of(int64_t value, int slot = 0); @@ -46,15 +47,19 @@ void dbg_stdev_of(int64_t value, int slot = 0); void dbg_correl_of(int64_t value1, int64_t value2, int slot = 0); void dbg_print(); -using TimePoint = std::chrono::milliseconds::rep; // A value in milliseconds +using TimePoint = std::chrono::milliseconds::rep; // A value in milliseconds static_assert(sizeof(TimePoint) == sizeof(int64_t), "TimePoint should be 64 bits"); inline TimePoint now() { - return std::chrono::duration_cast - (std::chrono::steady_clock::now().time_since_epoch()).count(); + return std::chrono::duration_cast( + std::chrono::steady_clock::now().time_since_epoch()) + .count(); } -enum SyncCout { IO_LOCK, IO_UNLOCK }; +enum SyncCout { + IO_LOCK, + IO_UNLOCK +}; std::ostream& operator<<(std::ostream&, SyncCout); #define sync_cout std::cout << IO_LOCK @@ -64,34 +69,37 @@ std::ostream& operator<<(std::ostream&, SyncCout); // align_ptr_up() : get the first aligned element of an array. // ptr must point to an array of size at least `sizeof(T) * N + alignment` bytes, // where N is the number of elements in the array. -template -T* align_ptr_up(T* ptr) -{ - static_assert(alignof(T) < Alignment); +template +T* align_ptr_up(T* ptr) { + static_assert(alignof(T) < Alignment); - const uintptr_t ptrint = reinterpret_cast(reinterpret_cast(ptr)); - return reinterpret_cast(reinterpret_cast((ptrint + (Alignment - 1)) / Alignment * Alignment)); + const uintptr_t ptrint = reinterpret_cast(reinterpret_cast(ptr)); + return reinterpret_cast( + reinterpret_cast((ptrint + (Alignment - 1)) / Alignment * Alignment)); } // IsLittleEndian : true if and only if the binary is compiled on a little-endian machine -static inline const union { uint32_t i; char c[4]; } Le = { 0x01020304 }; +static inline const union { + uint32_t i; + char c[4]; +} Le = {0x01020304}; static inline const bool IsLittleEndian = (Le.c[0] == 4); -template +template class ValueList { -public: - std::size_t size() const { return size_; } - void push_back(const T& value) { values_[size_++] = value; } - const T* begin() const { return values_; } - const T* end() const { return values_ + size_; } - const T& operator[](int index) const { return values_[index]; } + public: + std::size_t size() const { return size_; } + void push_back(const T& value) { values_[size_++] = value; } + const T* begin() const { return values_; } + const T* end() const { return values_ + size_; } + const T& operator[](int index) const { return values_[index]; } -private: - T values_[MaxSize]; - std::size_t size_ = 0; + private: + T values_[MaxSize]; + std::size_t size_ = 0; }; @@ -112,23 +120,31 @@ class ValueList { class PRNG { - uint64_t s; + uint64_t s; - uint64_t rand64() { + uint64_t rand64() { - s ^= s >> 12, s ^= s << 25, s ^= s >> 27; - return s * 2685821657736338717LL; - } + s ^= s >> 12, s ^= s << 25, s ^= s >> 27; + return s * 2685821657736338717LL; + } -public: - PRNG(uint64_t seed) : s(seed) { assert(seed); } + public: + PRNG(uint64_t seed) : + s(seed) { + assert(seed); + } - template T rand() { return T(rand64()); } + template + T rand() { + return T(rand64()); + } - // Special generator used to fast init magic numbers. - // Output values only have 1/8th of their bits set on average. - template T sparse_rand() - { return T(rand64() & rand64() & rand64()); } + // Special generator used to fast init magic numbers. + // Output values only have 1/8th of their bits set on average. + template + T sparse_rand() { + return T(rand64() & rand64() & rand64()); + } }; inline uint64_t mul_hi64(uint64_t a, uint64_t b) { @@ -152,16 +168,16 @@ inline uint64_t mul_hi64(uint64_t a, uint64_t b) { // Peter Österlund. namespace WinProcGroup { - void bindThisThread(size_t idx); +void bindThisThread(size_t idx); } namespace CommandLine { - void init(int argc, char* argv[]); +void init(int argc, char* argv[]); - extern std::string binaryDirectory; // path of the executable directory - extern std::string workingDirectory; // path of the working directory +extern std::string binaryDirectory; // path of the executable directory +extern std::string workingDirectory; // path of the working directory } -} // namespace Stockfish +} // namespace Stockfish -#endif // #ifndef MISC_H_INCLUDED +#endif // #ifndef MISC_H_INCLUDED diff --git a/src/movegen.cpp b/src/movegen.cpp index 82ad60613c2..d8e643885a7 100644 --- a/src/movegen.cpp +++ b/src/movegen.cpp @@ -28,90 +28,81 @@ namespace Stockfish { namespace { - template - ExtMove* make_promotions(ExtMove* moveList, [[maybe_unused]] Square to) { +template +ExtMove* make_promotions(ExtMove* moveList, [[maybe_unused]] Square to) { - if constexpr (Type == CAPTURES || Type == EVASIONS || Type == NON_EVASIONS) - { + if constexpr (Type == CAPTURES || Type == EVASIONS || Type == NON_EVASIONS) { *moveList++ = make(to - D, to, QUEEN); - if constexpr (Enemy && Type == CAPTURES) - { + if constexpr (Enemy && Type == CAPTURES) { *moveList++ = make(to - D, to, ROOK); *moveList++ = make(to - D, to, BISHOP); *moveList++ = make(to - D, to, KNIGHT); } } - if constexpr ((Type == QUIETS && !Enemy) || Type == EVASIONS || Type == NON_EVASIONS) - { + if constexpr ((Type == QUIETS && !Enemy) || Type == EVASIONS || Type == NON_EVASIONS) { *moveList++ = make(to - D, to, ROOK); *moveList++ = make(to - D, to, BISHOP); *moveList++ = make(to - D, to, KNIGHT); } return moveList; - } +} - template - ExtMove* generate_pawn_moves(const Position& pos, ExtMove* moveList, Bitboard target) { +template +ExtMove* generate_pawn_moves(const Position& pos, ExtMove* moveList, Bitboard target) { constexpr Color Them = ~Us; - constexpr Bitboard TRank7BB = (Us == WHITE ? Rank7BB : Rank2BB); - constexpr Bitboard TRank3BB = (Us == WHITE ? Rank3BB : Rank6BB); + constexpr Bitboard TRank7BB = (Us == WHITE ? Rank7BB : Rank2BB); + constexpr Bitboard TRank3BB = (Us == WHITE ? Rank3BB : Rank6BB); constexpr Direction Up = pawn_push(Us); constexpr Direction UpRight = (Us == WHITE ? NORTH_EAST : SOUTH_WEST); constexpr Direction UpLeft = (Us == WHITE ? NORTH_WEST : SOUTH_EAST); const Bitboard emptySquares = ~pos.pieces(); - const Bitboard enemies = Type == EVASIONS ? pos.checkers() - : pos.pieces(Them); + const Bitboard enemies = Type == EVASIONS ? pos.checkers() : pos.pieces(Them); - Bitboard pawnsOn7 = pos.pieces(Us, PAWN) & TRank7BB; + Bitboard pawnsOn7 = pos.pieces(Us, PAWN) & TRank7BB; Bitboard pawnsNotOn7 = pos.pieces(Us, PAWN) & ~TRank7BB; // Single and double pawn pushes, no promotions - if constexpr (Type != CAPTURES) - { - Bitboard b1 = shift(pawnsNotOn7) & emptySquares; + if constexpr (Type != CAPTURES) { + Bitboard b1 = shift(pawnsNotOn7) & emptySquares; Bitboard b2 = shift(b1 & TRank3BB) & emptySquares; - if constexpr (Type == EVASIONS) // Consider only blocking squares + if constexpr (Type == EVASIONS) // Consider only blocking squares { b1 &= target; b2 &= target; } - if constexpr (Type == QUIET_CHECKS) - { + if constexpr (Type == QUIET_CHECKS) { // To make a quiet check, you either make a direct check by pushing a pawn // or push a blocker pawn that is not on the same file as the enemy king. // Discovered check promotion has been already generated amongst the captures. - Square ksq = pos.square(Them); + Square ksq = pos.square(Them); Bitboard dcCandidatePawns = pos.blockers_for_king(Them) & ~file_bb(ksq); - b1 &= pawn_attacks_bb(Them, ksq) | shift< Up>(dcCandidatePawns); - b2 &= pawn_attacks_bb(Them, ksq) | shift(dcCandidatePawns); + b1 &= pawn_attacks_bb(Them, ksq) | shift(dcCandidatePawns); + b2 &= pawn_attacks_bb(Them, ksq) | shift(dcCandidatePawns); } - while (b1) - { - Square to = pop_lsb(b1); + while (b1) { + Square to = pop_lsb(b1); *moveList++ = make_move(to - Up, to); } - while (b2) - { - Square to = pop_lsb(b2); + while (b2) { + Square to = pop_lsb(b2); *moveList++ = make_move(to - Up - Up, to); } } // Promotions and underpromotions - if (pawnsOn7) - { + if (pawnsOn7) { Bitboard b1 = shift(pawnsOn7) & enemies; - Bitboard b2 = shift(pawnsOn7) & enemies; - Bitboard b3 = shift(pawnsOn7) & emptySquares; + Bitboard b2 = shift(pawnsOn7) & enemies; + Bitboard b3 = shift(pawnsOn7) & emptySquares; if constexpr (Type == EVASIONS) b3 &= target; @@ -123,29 +114,25 @@ namespace { moveList = make_promotions(moveList, pop_lsb(b2)); while (b3) - moveList = make_promotions(moveList, pop_lsb(b3)); + moveList = make_promotions(moveList, pop_lsb(b3)); } // Standard and en passant captures - if constexpr (Type == CAPTURES || Type == EVASIONS || Type == NON_EVASIONS) - { + if constexpr (Type == CAPTURES || Type == EVASIONS || Type == NON_EVASIONS) { Bitboard b1 = shift(pawnsNotOn7) & enemies; - Bitboard b2 = shift(pawnsNotOn7) & enemies; + Bitboard b2 = shift(pawnsNotOn7) & enemies; - while (b1) - { - Square to = pop_lsb(b1); + while (b1) { + Square to = pop_lsb(b1); *moveList++ = make_move(to - UpRight, to); } - while (b2) - { - Square to = pop_lsb(b2); + while (b2) { + Square to = pop_lsb(b2); *moveList++ = make_move(to - UpLeft, to); } - if (pos.ep_square() != SQ_NONE) - { + if (pos.ep_square() != SQ_NONE) { assert(rank_of(pos.ep_square()) == relative_rank(Us, RANK_6)); // An en passant capture cannot resolve a discovered check @@ -162,20 +149,19 @@ namespace { } return moveList; - } +} - template - ExtMove* generate_moves(const Position& pos, ExtMove* moveList, Bitboard target) { +template +ExtMove* generate_moves(const Position& pos, ExtMove* moveList, Bitboard target) { static_assert(Pt != KING && Pt != PAWN, "Unsupported piece type in generate_moves()"); Bitboard bb = pos.pieces(Us, Pt); - while (bb) - { - Square from = pop_lsb(bb); - Bitboard b = attacks_bb(from, pos.pieces()) & target; + while (bb) { + Square from = pop_lsb(bb); + Bitboard b = attacks_bb(from, pos.pieces()) & target; // To check, you either move freely a blocker or make a direct check. if (Checks && (Pt == QUEEN || !(pos.blockers_for_king(~Us) & from))) @@ -186,35 +172,33 @@ namespace { } return moveList; - } +} - template - ExtMove* generate_all(const Position& pos, ExtMove* moveList) { +template +ExtMove* generate_all(const Position& pos, ExtMove* moveList) { static_assert(Type != LEGAL, "Unsupported type in generate_all()"); - constexpr bool Checks = Type == QUIET_CHECKS; // Reduce template instantiations - const Square ksq = pos.square(Us); - Bitboard target; + constexpr bool Checks = Type == QUIET_CHECKS; // Reduce template instantiations + const Square ksq = pos.square(Us); + Bitboard target; // Skip generating non-king moves when in double check - if (Type != EVASIONS || !more_than_one(pos.checkers())) - { - target = Type == EVASIONS ? between_bb(ksq, lsb(pos.checkers())) - : Type == NON_EVASIONS ? ~pos.pieces( Us) - : Type == CAPTURES ? pos.pieces(~Us) - : ~pos.pieces( ); // QUIETS || QUIET_CHECKS + if (Type != EVASIONS || !more_than_one(pos.checkers())) { + target = Type == EVASIONS ? between_bb(ksq, lsb(pos.checkers())) + : Type == NON_EVASIONS ? ~pos.pieces(Us) + : Type == CAPTURES ? pos.pieces(~Us) + : ~pos.pieces(); // QUIETS || QUIET_CHECKS moveList = generate_pawn_moves(pos, moveList, target); moveList = generate_moves(pos, moveList, target); moveList = generate_moves(pos, moveList, target); - moveList = generate_moves(pos, moveList, target); - moveList = generate_moves(pos, moveList, target); + moveList = generate_moves(pos, moveList, target); + moveList = generate_moves(pos, moveList, target); } - if (!Checks || pos.blockers_for_king(~Us) & ksq) - { + if (!Checks || pos.blockers_for_king(~Us) & ksq) { Bitboard b = attacks_bb(ksq) & (Type == EVASIONS ? ~pos.pieces(Us) : target); if (Checks) b &= ~attacks_bb(pos.square(~Us)); @@ -223,15 +207,15 @@ namespace { *moveList++ = make_move(ksq, pop_lsb(b)); if ((Type == QUIETS || Type == NON_EVASIONS) && pos.can_castle(Us & ANY_CASTLING)) - for (CastlingRights cr : { Us & KING_SIDE, Us & QUEEN_SIDE } ) + for (CastlingRights cr : {Us & KING_SIDE, Us & QUEEN_SIDE}) if (!pos.castling_impeded(cr) && pos.can_castle(cr)) *moveList++ = make(ksq, pos.castling_rook_square(cr)); } return moveList; - } +} -} // namespace +} // namespace // Generates all pseudo-legal captures plus queen promotions @@ -246,13 +230,13 @@ namespace { template ExtMove* generate(const Position& pos, ExtMove* moveList) { - static_assert(Type != LEGAL, "Unsupported type in generate()"); - assert((Type == EVASIONS) == bool(pos.checkers())); + static_assert(Type != LEGAL, "Unsupported type in generate()"); + assert((Type == EVASIONS) == bool(pos.checkers())); - Color us = pos.side_to_move(); + Color us = pos.side_to_move(); - return us == WHITE ? generate_all(pos, moveList) - : generate_all(pos, moveList); + return us == WHITE ? generate_all(pos, moveList) + : generate_all(pos, moveList); } // Explicit template instantiations @@ -268,21 +252,21 @@ template ExtMove* generate(const Position&, ExtMove*); template<> ExtMove* generate(const Position& pos, ExtMove* moveList) { - Color us = pos.side_to_move(); - Bitboard pinned = pos.blockers_for_king(us) & pos.pieces(us); - Square ksq = pos.square(us); - ExtMove* cur = moveList; - - moveList = pos.checkers() ? generate(pos, moveList) - : generate(pos, moveList); - while (cur != moveList) - if ( ((pinned & from_sq(*cur)) || from_sq(*cur) == ksq || type_of(*cur) == EN_PASSANT) - && !pos.legal(*cur)) - *cur = (--moveList)->move; - else - ++cur; - - return moveList; + Color us = pos.side_to_move(); + Bitboard pinned = pos.blockers_for_king(us) & pos.pieces(us); + Square ksq = pos.square(us); + ExtMove* cur = moveList; + + moveList = + pos.checkers() ? generate(pos, moveList) : generate(pos, moveList); + while (cur != moveList) + if (((pinned & from_sq(*cur)) || from_sq(*cur) == ksq || type_of(*cur) == EN_PASSANT) + && !pos.legal(*cur)) + *cur = (--moveList)->move; + else + ++cur; + + return moveList; } -} // namespace Stockfish +} // namespace Stockfish diff --git a/src/movegen.h b/src/movegen.h index e913a13ea13..9a39d1c50ea 100644 --- a/src/movegen.h +++ b/src/movegen.h @@ -19,7 +19,7 @@ #ifndef MOVEGEN_H_INCLUDED #define MOVEGEN_H_INCLUDED -#include // IWYU pragma: keep +#include // IWYU pragma: keep #include #include "types.h" @@ -29,29 +29,27 @@ namespace Stockfish { class Position; enum GenType { - CAPTURES, - QUIETS, - QUIET_CHECKS, - EVASIONS, - NON_EVASIONS, - LEGAL + CAPTURES, + QUIETS, + QUIET_CHECKS, + EVASIONS, + NON_EVASIONS, + LEGAL }; struct ExtMove { - Move move; - int value; + Move move; + int value; - operator Move() const { return move; } - void operator=(Move m) { move = m; } + operator Move() const { return move; } + void operator=(Move m) { move = m; } - // Inhibit unwanted implicit conversions to Move - // with an ambiguity that yields to a compile error. - operator float() const = delete; + // Inhibit unwanted implicit conversions to Move + // with an ambiguity that yields to a compile error. + operator float() const = delete; }; -inline bool operator<(const ExtMove& f, const ExtMove& s) { - return f.value < s.value; -} +inline bool operator<(const ExtMove& f, const ExtMove& s) { return f.value < s.value; } template ExtMove* generate(const Position& pos, ExtMove* moveList); @@ -62,18 +60,17 @@ ExtMove* generate(const Position& pos, ExtMove* moveList); template struct MoveList { - explicit MoveList(const Position& pos) : last(generate(pos, moveList)) {} - const ExtMove* begin() const { return moveList; } - const ExtMove* end() const { return last; } - size_t size() const { return last - moveList; } - bool contains(Move move) const { - return std::find(begin(), end(), move) != end(); - } + explicit MoveList(const Position& pos) : + last(generate(pos, moveList)) {} + const ExtMove* begin() const { return moveList; } + const ExtMove* end() const { return last; } + size_t size() const { return last - moveList; } + bool contains(Move move) const { return std::find(begin(), end(), move) != end(); } -private: - ExtMove moveList[MAX_MOVES], *last; + private: + ExtMove moveList[MAX_MOVES], *last; }; -} // namespace Stockfish +} // namespace Stockfish -#endif // #ifndef MOVEGEN_H_INCLUDED +#endif // #ifndef MOVEGEN_H_INCLUDED diff --git a/src/movepick.cpp b/src/movepick.cpp index 5bb0fd6c274..713a86a99db 100644 --- a/src/movepick.cpp +++ b/src/movepick.cpp @@ -30,29 +30,42 @@ namespace Stockfish { namespace { - enum Stages { - MAIN_TT, CAPTURE_INIT, GOOD_CAPTURE, REFUTATION, QUIET_INIT, QUIET, BAD_CAPTURE, - EVASION_TT, EVASION_INIT, EVASION, - PROBCUT_TT, PROBCUT_INIT, PROBCUT, - QSEARCH_TT, QCAPTURE_INIT, QCAPTURE, QCHECK_INIT, QCHECK - }; - - // partial_insertion_sort() sorts moves in descending order up to and including - // a given limit. The order of moves smaller than the limit is left unspecified. - void partial_insertion_sort(ExtMove* begin, ExtMove* end, int limit) { +enum Stages { + MAIN_TT, + CAPTURE_INIT, + GOOD_CAPTURE, + REFUTATION, + QUIET_INIT, + QUIET, + BAD_CAPTURE, + EVASION_TT, + EVASION_INIT, + EVASION, + PROBCUT_TT, + PROBCUT_INIT, + PROBCUT, + QSEARCH_TT, + QCAPTURE_INIT, + QCAPTURE, + QCHECK_INIT, + QCHECK +}; + +// partial_insertion_sort() sorts moves in descending order up to and including +// a given limit. The order of moves smaller than the limit is left unspecified. +void partial_insertion_sort(ExtMove* begin, ExtMove* end, int limit) { for (ExtMove *sortedEnd = begin, *p = begin + 1; p < end; ++p) - if (p->value >= limit) - { + if (p->value >= limit) { ExtMove tmp = *p, *q; - *p = *++sortedEnd; + *p = *++sortedEnd; for (q = sortedEnd; q != begin && *(q - 1) < tmp; --q) *q = *(q - 1); *q = tmp; } - } +} -} // namespace +} // namespace // Constructors of the MovePicker class. As arguments, we pass information @@ -62,44 +75,57 @@ namespace { // move ordering is at the current node. // MovePicker constructor for the main search -MovePicker::MovePicker(const Position& p, Move ttm, Depth d, const ButterflyHistory* mh, - const CapturePieceToHistory* cph, - const PieceToHistory** ch, - Move cm, - const Move* killers) - : pos(p), mainHistory(mh), captureHistory(cph), continuationHistory(ch), - ttMove(ttm), refutations{{killers[0], 0}, {killers[1], 0}, {cm, 0}}, depth(d) -{ - assert(d > 0); - - stage = (pos.checkers() ? EVASION_TT : MAIN_TT) + - !(ttm && pos.pseudo_legal(ttm)); +MovePicker::MovePicker(const Position& p, + Move ttm, + Depth d, + const ButterflyHistory* mh, + const CapturePieceToHistory* cph, + const PieceToHistory** ch, + Move cm, + const Move* killers) : + pos(p), + mainHistory(mh), + captureHistory(cph), + continuationHistory(ch), + ttMove(ttm), + refutations{{killers[0], 0}, {killers[1], 0}, {cm, 0}}, + depth(d) { + assert(d > 0); + + stage = (pos.checkers() ? EVASION_TT : MAIN_TT) + !(ttm && pos.pseudo_legal(ttm)); } // MovePicker constructor for quiescence search -MovePicker::MovePicker(const Position& p, Move ttm, Depth d, const ButterflyHistory* mh, - const CapturePieceToHistory* cph, - const PieceToHistory** ch, - Square rs) - : pos(p), mainHistory(mh), captureHistory(cph), continuationHistory(ch), ttMove(ttm), recaptureSquare(rs), depth(d) -{ - assert(d <= 0); - - stage = (pos.checkers() ? EVASION_TT : QSEARCH_TT) + - !( ttm - && pos.pseudo_legal(ttm)); +MovePicker::MovePicker(const Position& p, + Move ttm, + Depth d, + const ButterflyHistory* mh, + const CapturePieceToHistory* cph, + const PieceToHistory** ch, + Square rs) : + pos(p), + mainHistory(mh), + captureHistory(cph), + continuationHistory(ch), + ttMove(ttm), + recaptureSquare(rs), + depth(d) { + assert(d <= 0); + + stage = (pos.checkers() ? EVASION_TT : QSEARCH_TT) + !(ttm && pos.pseudo_legal(ttm)); } // MovePicker constructor for ProbCut: we generate captures with SEE greater // than or equal to the given threshold. -MovePicker::MovePicker(const Position& p, Move ttm, Value th, const CapturePieceToHistory* cph) - : pos(p), captureHistory(cph), ttMove(ttm), threshold(th) -{ - assert(!pos.checkers()); - - stage = PROBCUT_TT + !(ttm && pos.capture_stage(ttm) - && pos.pseudo_legal(ttm) - && pos.see_ge(ttm, threshold)); +MovePicker::MovePicker(const Position& p, Move ttm, Value th, const CapturePieceToHistory* cph) : + pos(p), + captureHistory(cph), + ttMove(ttm), + threshold(th) { + assert(!pos.checkers()); + + stage = PROBCUT_TT + + !(ttm && pos.capture_stage(ttm) && pos.pseudo_legal(ttm) && pos.see_ge(ttm, threshold)); } // MovePicker::score() assigns a numerical value to each move in a list, used @@ -108,76 +134,76 @@ MovePicker::MovePicker(const Position& p, Move ttm, Value th, const CapturePiece template void MovePicker::score() { - static_assert(Type == CAPTURES || Type == QUIETS || Type == EVASIONS, "Wrong type"); - - [[maybe_unused]] Bitboard threatenedByPawn, threatenedByMinor, threatenedByRook, threatenedPieces; - if constexpr (Type == QUIETS) - { - Color us = pos.side_to_move(); - - threatenedByPawn = pos.attacks_by(~us); - threatenedByMinor = pos.attacks_by(~us) | pos.attacks_by(~us) | threatenedByPawn; - threatenedByRook = pos.attacks_by(~us) | threatenedByMinor; - - // Pieces threatened by pieces of lesser material value - threatenedPieces = (pos.pieces(us, QUEEN) & threatenedByRook) - | (pos.pieces(us, ROOK) & threatenedByMinor) - | (pos.pieces(us, KNIGHT, BISHOP) & threatenedByPawn); - } - - for (auto& m : *this) - if constexpr (Type == CAPTURES) - m.value = (7 * int(PieceValue[pos.piece_on(to_sq(m))]) - + (*captureHistory)[pos.moved_piece(m)][to_sq(m)][type_of(pos.piece_on(to_sq(m)))]) / 16; - - else if constexpr (Type == QUIETS) - { - Piece pc = pos.moved_piece(m); - PieceType pt = type_of(pos.moved_piece(m)); - Square from = from_sq(m); - Square to = to_sq(m); - - // histories - m.value = 2 * (*mainHistory)[pos.side_to_move()][from_to(m)]; - m.value += 2 * (*continuationHistory[0])[pc][to]; - m.value += (*continuationHistory[1])[pc][to]; - m.value += (*continuationHistory[2])[pc][to] / 4; - m.value += (*continuationHistory[3])[pc][to]; - m.value += (*continuationHistory[5])[pc][to]; - - // bonus for checks - m.value += bool(pos.check_squares(pt) & to) * 16384; - - // bonus for escaping from capture - m.value += threatenedPieces & from ? - (pt == QUEEN && !(to & threatenedByRook) ? 50000 - : pt == ROOK && !(to & threatenedByMinor) ? 25000 - : !(to & threatenedByPawn) ? 15000 - : 0 ) - : 0 ; - - // malus for putting piece en prise - m.value -= !(threatenedPieces & from) ? - (pt == QUEEN ? bool(to & threatenedByRook) * 50000 - + bool(to & threatenedByMinor) * 10000 - + bool(to & threatenedByPawn) * 20000 - : pt == ROOK ? bool(to & threatenedByMinor) * 25000 - + bool(to & threatenedByPawn) * 10000 - : pt != PAWN ? bool(to & threatenedByPawn) * 15000 - : 0 ) - : 0 ; - } - - else // Type == EVASIONS - { - if (pos.capture_stage(m)) - m.value = PieceValue[pos.piece_on(to_sq(m))] - - Value(type_of(pos.moved_piece(m))) - + (1 << 28); - else - m.value = (*mainHistory)[pos.side_to_move()][from_to(m)] - + (*continuationHistory[0])[pos.moved_piece(m)][to_sq(m)]; - } + static_assert(Type == CAPTURES || Type == QUIETS || Type == EVASIONS, "Wrong type"); + + [[maybe_unused]] Bitboard threatenedByPawn, threatenedByMinor, threatenedByRook, + threatenedPieces; + if constexpr (Type == QUIETS) { + Color us = pos.side_to_move(); + + threatenedByPawn = pos.attacks_by(~us); + threatenedByMinor = + pos.attacks_by(~us) | pos.attacks_by(~us) | threatenedByPawn; + threatenedByRook = pos.attacks_by(~us) | threatenedByMinor; + + // Pieces threatened by pieces of lesser material value + threatenedPieces = (pos.pieces(us, QUEEN) & threatenedByRook) + | (pos.pieces(us, ROOK) & threatenedByMinor) + | (pos.pieces(us, KNIGHT, BISHOP) & threatenedByPawn); + } + + for (auto& m : *this) + if constexpr (Type == CAPTURES) + m.value = + (7 * int(PieceValue[pos.piece_on(to_sq(m))]) + + (*captureHistory)[pos.moved_piece(m)][to_sq(m)][type_of(pos.piece_on(to_sq(m)))]) + / 16; + + else if constexpr (Type == QUIETS) { + Piece pc = pos.moved_piece(m); + PieceType pt = type_of(pos.moved_piece(m)); + Square from = from_sq(m); + Square to = to_sq(m); + + // histories + m.value = 2 * (*mainHistory)[pos.side_to_move()][from_to(m)]; + m.value += 2 * (*continuationHistory[0])[pc][to]; + m.value += (*continuationHistory[1])[pc][to]; + m.value += (*continuationHistory[2])[pc][to] / 4; + m.value += (*continuationHistory[3])[pc][to]; + m.value += (*continuationHistory[5])[pc][to]; + + // bonus for checks + m.value += bool(pos.check_squares(pt) & to) * 16384; + + // bonus for escaping from capture + m.value += threatenedPieces & from ? (pt == QUEEN && !(to & threatenedByRook) ? 50000 + : pt == ROOK && !(to & threatenedByMinor) ? 25000 + : !(to & threatenedByPawn) ? 15000 + : 0) + : 0; + + // malus for putting piece en prise + m.value -= !(threatenedPieces & from) + ? (pt == QUEEN ? bool(to & threatenedByRook) * 50000 + + bool(to & threatenedByMinor) * 10000 + + bool(to & threatenedByPawn) * 20000 + : pt == ROOK ? bool(to & threatenedByMinor) * 25000 + + bool(to & threatenedByPawn) * 10000 + : pt != PAWN ? bool(to & threatenedByPawn) * 15000 + : 0) + : 0; + } + + else // Type == EVASIONS + { + if (pos.capture_stage(m)) + m.value = PieceValue[pos.piece_on(to_sq(m))] - Value(type_of(pos.moved_piece(m))) + + (1 << 28); + else + m.value = (*mainHistory)[pos.side_to_move()][from_to(m)] + + (*continuationHistory[0])[pos.moved_piece(m)][to_sq(m)]; + } } // MovePicker::select() returns the next move satisfying a predicate function. @@ -185,17 +211,16 @@ void MovePicker::score() { template Move MovePicker::select(Pred filter) { - while (cur < endMoves) - { - if constexpr (T == Best) - std::swap(*cur, *std::max_element(cur, endMoves)); + while (cur < endMoves) { + if constexpr (T == Best) + std::swap(*cur, *std::max_element(cur, endMoves)); - if (*cur != ttMove && filter()) - return *cur++; + if (*cur != ttMove && filter()) + return *cur++; - cur++; - } - return MOVE_NONE; + cur++; + } + return MOVE_NONE; } // MovePicker::next_move() is the most important method of the MovePicker class. It @@ -204,122 +229,118 @@ Move MovePicker::select(Pred filter) { Move MovePicker::next_move(bool skipQuiets) { top: - switch (stage) { - - case MAIN_TT: - case EVASION_TT: - case QSEARCH_TT: - case PROBCUT_TT: - ++stage; - return ttMove; - - case CAPTURE_INIT: - case PROBCUT_INIT: - case QCAPTURE_INIT: - cur = endBadCaptures = moves; - endMoves = generate(pos, cur); - - score(); - partial_insertion_sort(cur, endMoves, std::numeric_limits::min()); - ++stage; - goto top; - - case GOOD_CAPTURE: - if (select([&](){ - return pos.see_ge(*cur, Value(-cur->value)) ? - // Move losing capture to endBadCaptures to be tried later - true : (*endBadCaptures++ = *cur, false); })) - return *(cur - 1); - - // Prepare the pointers to loop over the refutations array - cur = std::begin(refutations); - endMoves = std::end(refutations); - - // If the countermove is the same as a killer, skip it - if ( refutations[0].move == refutations[2].move - || refutations[1].move == refutations[2].move) - --endMoves; - - ++stage; - [[fallthrough]]; - - case REFUTATION: - if (select([&](){ return *cur != MOVE_NONE - && !pos.capture_stage(*cur) - && pos.pseudo_legal(*cur); })) - return *(cur - 1); - ++stage; - [[fallthrough]]; - - case QUIET_INIT: - if (!skipQuiets) - { - cur = endBadCaptures; - endMoves = generate(pos, cur); - - score(); - partial_insertion_sort(cur, endMoves, -3000 * depth); - } - - ++stage; - [[fallthrough]]; - - case QUIET: - if ( !skipQuiets - && select([&](){return *cur != refutations[0].move - && *cur != refutations[1].move - && *cur != refutations[2].move;})) - return *(cur - 1); - - // Prepare the pointers to loop over the bad captures - cur = moves; - endMoves = endBadCaptures; - - ++stage; - [[fallthrough]]; - - case BAD_CAPTURE: - return select([](){ return true; }); - - case EVASION_INIT: - cur = moves; - endMoves = generate(pos, cur); - - score(); - ++stage; - [[fallthrough]]; - - case EVASION: - return select([](){ return true; }); - - case PROBCUT: - return select([&](){ return pos.see_ge(*cur, threshold); }); - - case QCAPTURE: - if (select([&](){ return depth > DEPTH_QS_RECAPTURES - || to_sq(*cur) == recaptureSquare; })) - return *(cur - 1); - - // If we did not find any move and we do not try checks, we have finished - if (depth != DEPTH_QS_CHECKS) - return MOVE_NONE; - - ++stage; - [[fallthrough]]; - - case QCHECK_INIT: - cur = moves; - endMoves = generate(pos, cur); - - ++stage; - [[fallthrough]]; - - case QCHECK: - return select([](){ return true; }); - } - - assert(false); - return MOVE_NONE; // Silence warning + switch (stage) { + + case MAIN_TT : + case EVASION_TT : + case QSEARCH_TT : + case PROBCUT_TT : ++stage; return ttMove; + + case CAPTURE_INIT : + case PROBCUT_INIT : + case QCAPTURE_INIT : + cur = endBadCaptures = moves; + endMoves = generate(pos, cur); + + score(); + partial_insertion_sort(cur, endMoves, std::numeric_limits::min()); + ++stage; + goto top; + + case GOOD_CAPTURE : + if (select([&]() { + return pos.see_ge(*cur, Value(-cur->value)) + ? + // Move losing capture to endBadCaptures to be tried later + true + : (*endBadCaptures++ = *cur, false); + })) + return *(cur - 1); + + // Prepare the pointers to loop over the refutations array + cur = std::begin(refutations); + endMoves = std::end(refutations); + + // If the countermove is the same as a killer, skip it + if (refutations[0].move == refutations[2].move + || refutations[1].move == refutations[2].move) + --endMoves; + + ++stage; + [[fallthrough]]; + + case REFUTATION : + if (select([&]() { + return *cur != MOVE_NONE && !pos.capture_stage(*cur) && pos.pseudo_legal(*cur); + })) + return *(cur - 1); + ++stage; + [[fallthrough]]; + + case QUIET_INIT : + if (!skipQuiets) { + cur = endBadCaptures; + endMoves = generate(pos, cur); + + score(); + partial_insertion_sort(cur, endMoves, -3000 * depth); + } + + ++stage; + [[fallthrough]]; + + case QUIET : + if (!skipQuiets && select([&]() { + return *cur != refutations[0].move && *cur != refutations[1].move + && *cur != refutations[2].move; + })) + return *(cur - 1); + + // Prepare the pointers to loop over the bad captures + cur = moves; + endMoves = endBadCaptures; + + ++stage; + [[fallthrough]]; + + case BAD_CAPTURE : return select([]() { return true; }); + + case EVASION_INIT : + cur = moves; + endMoves = generate(pos, cur); + + score(); + ++stage; + [[fallthrough]]; + + case EVASION : return select([]() { return true; }); + + case PROBCUT : return select([&]() { return pos.see_ge(*cur, threshold); }); + + case QCAPTURE : + if (select( + [&]() { return depth > DEPTH_QS_RECAPTURES || to_sq(*cur) == recaptureSquare; })) + return *(cur - 1); + + // If we did not find any move and we do not try checks, we have finished + if (depth != DEPTH_QS_CHECKS) + return MOVE_NONE; + + ++stage; + [[fallthrough]]; + + case QCHECK_INIT : + cur = moves; + endMoves = generate(pos, cur); + + ++stage; + [[fallthrough]]; + + case QCHECK : return select([]() { return true; }); + } + + assert(false); + return MOVE_NONE; // Silence warning } -} // namespace Stockfish +} // namespace Stockfish diff --git a/src/movepick.h b/src/movepick.h index 457defa525a..65e93dda6fe 100644 --- a/src/movepick.h +++ b/src/movepick.h @@ -24,7 +24,7 @@ #include #include #include -#include // IWYU pragma: keep +#include // IWYU pragma: keep #include "movegen.h" #include "types.h" @@ -39,22 +39,22 @@ class Position; template class StatsEntry { - T entry; + T entry; -public: - void operator=(const T& v) { entry = v; } - T* operator&() { return &entry; } - T* operator->() { return &entry; } - operator const T&() const { return entry; } + public: + void operator=(const T& v) { entry = v; } + T* operator&() { return &entry; } + T* operator->() { return &entry; } + operator const T&() const { return entry; } - void operator<<(int bonus) { - assert(abs(bonus) <= D); // Ensure range is [-D, D] - static_assert(D <= std::numeric_limits::max(), "D overflows T"); + void operator<<(int bonus) { + assert(abs(bonus) <= D); // Ensure range is [-D, D] + static_assert(D <= std::numeric_limits::max(), "D overflows T"); - entry += (bonus * D - entry * abs(bonus)) / (D * 5 / 4); + entry += (bonus * D - entry * abs(bonus)) / (D * 5 / 4); - assert(abs(entry) <= D); - } + assert(abs(entry) <= D); + } }; // Stats is a generic N-dimensional array used to store various statistics. @@ -62,28 +62,32 @@ class StatsEntry { // template parameter D limits the range of updates in [-D, D] when we update // values with the << operator, while the last parameters (Size and Sizes) // encode the dimensions of the array. -template -struct Stats : public std::array, Size> -{ - using stats = Stats; +template +struct Stats: public std::array, Size> { + using stats = Stats; - void fill(const T& v) { + void fill(const T& v) { - // For standard-layout 'this' points to the first struct member - assert(std::is_standard_layout_v); + // For standard-layout 'this' points to the first struct member + assert(std::is_standard_layout_v); - using entry = StatsEntry; - entry* p = reinterpret_cast(this); - std::fill(p, p + sizeof(*this) / sizeof(entry), v); - } + using entry = StatsEntry; + entry* p = reinterpret_cast(this); + std::fill(p, p + sizeof(*this) / sizeof(entry), v); + } }; -template -struct Stats : public std::array, Size> {}; +template +struct Stats: public std::array, Size> {}; // In stats table, D=0 means that the template parameter is not used -enum StatsParams { NOT_USED = 0 }; -enum StatsType { NoCaptures, Captures }; +enum StatsParams { + NOT_USED = 0 +}; +enum StatsType { + NoCaptures, + Captures +}; // ButterflyHistory records how often quiet moves have been successful or // unsuccessful during the current search, and is used for reduction and move @@ -117,42 +121,53 @@ using ContinuationHistory = Stats // likely to get a cut-off first. class MovePicker { - enum PickType { Next, Best }; - -public: - MovePicker(const MovePicker&) = delete; - MovePicker& operator=(const MovePicker&) = delete; - MovePicker(const Position&, Move, Depth, const ButterflyHistory*, - const CapturePieceToHistory*, - const PieceToHistory**, - Move, - const Move*); - MovePicker(const Position&, Move, Depth, const ButterflyHistory*, - const CapturePieceToHistory*, - const PieceToHistory**, - Square); - MovePicker(const Position&, Move, Value, const CapturePieceToHistory*); - Move next_move(bool skipQuiets = false); - -private: - template Move select(Pred); - template void score(); - ExtMove* begin() { return cur; } - ExtMove* end() { return endMoves; } - - const Position& pos; - const ButterflyHistory* mainHistory; - const CapturePieceToHistory* captureHistory; - const PieceToHistory** continuationHistory; - Move ttMove; - ExtMove refutations[3], *cur, *endMoves, *endBadCaptures; - int stage; - Square recaptureSquare; - Value threshold; - Depth depth; - ExtMove moves[MAX_MOVES]; + enum PickType { + Next, + Best + }; + + public: + MovePicker(const MovePicker&) = delete; + MovePicker& operator=(const MovePicker&) = delete; + MovePicker(const Position&, + Move, + Depth, + const ButterflyHistory*, + const CapturePieceToHistory*, + const PieceToHistory**, + Move, + const Move*); + MovePicker(const Position&, + Move, + Depth, + const ButterflyHistory*, + const CapturePieceToHistory*, + const PieceToHistory**, + Square); + MovePicker(const Position&, Move, Value, const CapturePieceToHistory*); + Move next_move(bool skipQuiets = false); + + private: + template + Move select(Pred); + template + void score(); + ExtMove* begin() { return cur; } + ExtMove* end() { return endMoves; } + + const Position& pos; + const ButterflyHistory* mainHistory; + const CapturePieceToHistory* captureHistory; + const PieceToHistory** continuationHistory; + Move ttMove; + ExtMove refutations[3], *cur, *endMoves, *endBadCaptures; + int stage; + Square recaptureSquare; + Value threshold; + Depth depth; + ExtMove moves[MAX_MOVES]; }; -} // namespace Stockfish +} // namespace Stockfish -#endif // #ifndef MOVEPICK_H_INCLUDED +#endif // #ifndef MOVEPICK_H_INCLUDED diff --git a/src/nnue/evaluate_nnue.cpp b/src/nnue/evaluate_nnue.cpp index 1f821cf9a3b..40681f452c5 100644 --- a/src/nnue/evaluate_nnue.cpp +++ b/src/nnue/evaluate_nnue.cpp @@ -39,136 +39,144 @@ namespace Stockfish::Eval::NNUE { - // Input feature converter - LargePagePtr featureTransformer; +// Input feature converter +LargePagePtr featureTransformer; - // Evaluation function - AlignedPtr network[LayerStacks]; +// Evaluation function +AlignedPtr network[LayerStacks]; - // Evaluation function file name - std::string fileName; - std::string netDescription; +// Evaluation function file name +std::string fileName; +std::string netDescription; - namespace Detail { +namespace Detail { - // Initialize the evaluation function parameters - template - void initialize(AlignedPtr& pointer) { +// Initialize the evaluation function parameters +template +void initialize(AlignedPtr& pointer) { pointer.reset(reinterpret_cast(std_aligned_alloc(alignof(T), sizeof(T)))); std::memset(pointer.get(), 0, sizeof(T)); - } +} - template - void initialize(LargePagePtr& pointer) { +template +void initialize(LargePagePtr& pointer) { - static_assert(alignof(T) <= 4096, "aligned_large_pages_alloc() may fail for such a big alignment requirement of T"); + static_assert(alignof(T) <= 4096, + "aligned_large_pages_alloc() may fail for such a big alignment requirement of T"); pointer.reset(reinterpret_cast(aligned_large_pages_alloc(sizeof(T)))); std::memset(pointer.get(), 0, sizeof(T)); - } +} - // Read evaluation function parameters - template - bool read_parameters(std::istream& stream, T& reference) { +// Read evaluation function parameters +template +bool read_parameters(std::istream& stream, T& reference) { std::uint32_t header; header = read_little_endian(stream); - if (!stream || header != T::get_hash_value()) return false; + if (!stream || header != T::get_hash_value()) + return false; return reference.read_parameters(stream); - } +} - // Write evaluation function parameters - template - bool write_parameters(std::ostream& stream, const T& reference) { +// Write evaluation function parameters +template +bool write_parameters(std::ostream& stream, const T& reference) { write_little_endian(stream, T::get_hash_value()); return reference.write_parameters(stream); - } +} - } // namespace Detail +} // namespace Detail - // Initialize the evaluation function parameters - static void initialize() { +// Initialize the evaluation function parameters +static void initialize() { Detail::initialize(featureTransformer); for (std::size_t i = 0; i < LayerStacks; ++i) - Detail::initialize(network[i]); - } + Detail::initialize(network[i]); +} - // Read network header - static bool read_header(std::istream& stream, std::uint32_t* hashValue, std::string* desc) - { +// Read network header +static bool read_header(std::istream& stream, std::uint32_t* hashValue, std::string* desc) { std::uint32_t version, size; - version = read_little_endian(stream); - *hashValue = read_little_endian(stream); - size = read_little_endian(stream); - if (!stream || version != Version) return false; + version = read_little_endian(stream); + *hashValue = read_little_endian(stream); + size = read_little_endian(stream); + if (!stream || version != Version) + return false; desc->resize(size); stream.read(&(*desc)[0], size); return !stream.fail(); - } +} - // Write network header - static bool write_header(std::ostream& stream, std::uint32_t hashValue, const std::string& desc) - { +// Write network header +static bool write_header(std::ostream& stream, std::uint32_t hashValue, const std::string& desc) { write_little_endian(stream, Version); write_little_endian(stream, hashValue); - write_little_endian(stream, (std::uint32_t)desc.size()); + write_little_endian(stream, (std::uint32_t) desc.size()); stream.write(&desc[0], desc.size()); return !stream.fail(); - } +} - // Read network parameters - static bool read_parameters(std::istream& stream) { +// Read network parameters +static bool read_parameters(std::istream& stream) { std::uint32_t hashValue; - if (!read_header(stream, &hashValue, &netDescription)) return false; - if (hashValue != HashValue) return false; - if (!Detail::read_parameters(stream, *featureTransformer)) return false; + if (!read_header(stream, &hashValue, &netDescription)) + return false; + if (hashValue != HashValue) + return false; + if (!Detail::read_parameters(stream, *featureTransformer)) + return false; for (std::size_t i = 0; i < LayerStacks; ++i) - if (!Detail::read_parameters(stream, *(network[i]))) return false; + if (!Detail::read_parameters(stream, *(network[i]))) + return false; return stream && stream.peek() == std::ios::traits_type::eof(); - } +} - // Write network parameters - static bool write_parameters(std::ostream& stream) { +// Write network parameters +static bool write_parameters(std::ostream& stream) { - if (!write_header(stream, HashValue, netDescription)) return false; - if (!Detail::write_parameters(stream, *featureTransformer)) return false; + if (!write_header(stream, HashValue, netDescription)) + return false; + if (!Detail::write_parameters(stream, *featureTransformer)) + return false; for (std::size_t i = 0; i < LayerStacks; ++i) - if (!Detail::write_parameters(stream, *(network[i]))) return false; + if (!Detail::write_parameters(stream, *(network[i]))) + return false; return bool(stream); - } +} - void hint_common_parent_position(const Position& pos) { +void hint_common_parent_position(const Position& pos) { featureTransformer->hint_common_access(pos); - } +} - // Evaluation function. Perform differential calculation. - Value evaluate(const Position& pos, bool adjusted, int* complexity) { +// Evaluation function. Perform differential calculation. +Value evaluate(const Position& pos, bool adjusted, int* complexity) { // We manually align the arrays on the stack because with gcc < 9.3 // overaligning stack variables with alignas() doesn't work correctly. constexpr uint64_t alignment = CacheLineSize; - constexpr int delta = 24; + constexpr int delta = 24; #if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN) - TransformedFeatureType transformedFeaturesUnaligned[ - FeatureTransformer::BufferSize + alignment / sizeof(TransformedFeatureType)]; + TransformedFeatureType + transformedFeaturesUnaligned[FeatureTransformer::BufferSize + + alignment / sizeof(TransformedFeatureType)]; auto* transformedFeatures = align_ptr_up(&transformedFeaturesUnaligned[0]); #else - alignas(alignment) - TransformedFeatureType transformedFeatures[FeatureTransformer::BufferSize]; + alignas(alignment) TransformedFeatureType transformedFeatures[FeatureTransformer::BufferSize]; #endif ASSERT_ALIGNED(transformedFeatures, alignment); - const int bucket = (pos.count() - 1) / 4; - const auto psqt = featureTransformer->transform(pos, transformedFeatures, bucket); + const int bucket = (pos.count() - 1) / 4; + const auto psqt = featureTransformer->transform(pos, transformedFeatures, bucket); const auto positional = network[bucket]->propagate(transformedFeatures); if (complexity) @@ -176,33 +184,34 @@ namespace Stockfish::Eval::NNUE { // Give more value to positional evaluation when adjusted flag is set if (adjusted) - return static_cast(((1024 - delta) * psqt + (1024 + delta) * positional) / (1024 * OutputScale)); + return static_cast(((1024 - delta) * psqt + (1024 + delta) * positional) + / (1024 * OutputScale)); else return static_cast((psqt + positional) / OutputScale); - } +} - struct NnueEvalTrace { +struct NnueEvalTrace { static_assert(LayerStacks == PSQTBuckets); - Value psqt[LayerStacks]; - Value positional[LayerStacks]; + Value psqt[LayerStacks]; + Value positional[LayerStacks]; std::size_t correctBucket; - }; +}; - static NnueEvalTrace trace_evaluate(const Position& pos) { +static NnueEvalTrace trace_evaluate(const Position& pos) { // We manually align the arrays on the stack because with gcc < 9.3 // overaligning stack variables with alignas() doesn't work correctly. constexpr uint64_t alignment = CacheLineSize; #if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN) - TransformedFeatureType transformedFeaturesUnaligned[ - FeatureTransformer::BufferSize + alignment / sizeof(TransformedFeatureType)]; + TransformedFeatureType + transformedFeaturesUnaligned[FeatureTransformer::BufferSize + + alignment / sizeof(TransformedFeatureType)]; auto* transformedFeatures = align_ptr_up(&transformedFeaturesUnaligned[0]); #else - alignas(alignment) - TransformedFeatureType transformedFeatures[FeatureTransformer::BufferSize]; + alignas(alignment) TransformedFeatureType transformedFeatures[FeatureTransformer::BufferSize]; #endif ASSERT_ALIGNED(transformedFeatures, alignment); @@ -210,124 +219,121 @@ namespace Stockfish::Eval::NNUE { NnueEvalTrace t{}; t.correctBucket = (pos.count() - 1) / 4; for (IndexType bucket = 0; bucket < LayerStacks; ++bucket) { - const auto materialist = featureTransformer->transform(pos, transformedFeatures, bucket); - const auto positional = network[bucket]->propagate(transformedFeatures); + const auto materialist = featureTransformer->transform(pos, transformedFeatures, bucket); + const auto positional = network[bucket]->propagate(transformedFeatures); - t.psqt[bucket] = static_cast( materialist / OutputScale ); - t.positional[bucket] = static_cast( positional / OutputScale ); + t.psqt[bucket] = static_cast(materialist / OutputScale); + t.positional[bucket] = static_cast(positional / OutputScale); } return t; - } +} - constexpr std::string_view PieceToChar(" PNBRQK pnbrqk"); +constexpr std::string_view PieceToChar(" PNBRQK pnbrqk"); - // format_cp_compact() converts a Value into (centi)pawns and writes it in a buffer. - // The buffer must have capacity for at least 5 chars. - static void format_cp_compact(Value v, char* buffer) { +// format_cp_compact() converts a Value into (centi)pawns and writes it in a buffer. +// The buffer must have capacity for at least 5 chars. +static void format_cp_compact(Value v, char* buffer) { buffer[0] = (v < 0 ? '-' : v > 0 ? '+' : ' '); int cp = std::abs(UCI::to_cp(v)); - if (cp >= 10000) - { - buffer[1] = '0' + cp / 10000; cp %= 10000; - buffer[2] = '0' + cp / 1000; cp %= 1000; + if (cp >= 10000) { + buffer[1] = '0' + cp / 10000; + cp %= 10000; + buffer[2] = '0' + cp / 1000; + cp %= 1000; buffer[3] = '0' + cp / 100; buffer[4] = ' '; - } - else if (cp >= 1000) - { - buffer[1] = '0' + cp / 1000; cp %= 1000; - buffer[2] = '0' + cp / 100; cp %= 100; + } else if (cp >= 1000) { + buffer[1] = '0' + cp / 1000; + cp %= 1000; + buffer[2] = '0' + cp / 100; + cp %= 100; buffer[3] = '.'; buffer[4] = '0' + cp / 10; - } - else - { - buffer[1] = '0' + cp / 100; cp %= 100; + } else { + buffer[1] = '0' + cp / 100; + cp %= 100; buffer[2] = '.'; - buffer[3] = '0' + cp / 10; cp %= 10; + buffer[3] = '0' + cp / 10; + cp %= 10; buffer[4] = '0' + cp / 1; } - } +} - // format_cp_aligned_dot() converts a Value into pawns, always keeping two decimals - static void format_cp_aligned_dot(Value v, std::stringstream &stream) { +// format_cp_aligned_dot() converts a Value into pawns, always keeping two decimals +static void format_cp_aligned_dot(Value v, std::stringstream& stream) { const double pawns = std::abs(0.01 * UCI::to_cp(v)); - stream << (v < 0 ? '-' : v > 0 ? '+' : ' ') - << std::setiosflags(std::ios::fixed) - << std::setw(6) - << std::setprecision(2) - << pawns; - } + stream << (v < 0 ? '-' + : v > 0 ? '+' + : ' ') + << std::setiosflags(std::ios::fixed) << std::setw(6) << std::setprecision(2) << pawns; +} - // trace() returns a string with the value of each piece on a board, - // and a table for (PSQT, Layers) values bucket by bucket. - std::string trace(Position& pos) { +// trace() returns a string with the value of each piece on a board, +// and a table for (PSQT, Layers) values bucket by bucket. +std::string trace(Position& pos) { std::stringstream ss; - char board[3*8+1][8*8+2]; + char board[3 * 8 + 1][8 * 8 + 2]; std::memset(board, ' ', sizeof(board)); - for (int row = 0; row < 3*8+1; ++row) - board[row][8*8+1] = '\0'; + for (int row = 0; row < 3 * 8 + 1; ++row) + board[row][8 * 8 + 1] = '\0'; // A lambda to output one box of the board auto writeSquare = [&board](File file, Rank rank, Piece pc, Value value) { - - const int x = int(file) * 8; - const int y = (7 - int(rank)) * 3; - for (int i = 1; i < 8; ++i) - board[y][x+i] = board[y+3][x+i] = '-'; - for (int i = 1; i < 3; ++i) - board[y+i][x] = board[y+i][x+8] = '|'; - board[y][x] = board[y][x+8] = board[y+3][x+8] = board[y+3][x] = '+'; - if (pc != NO_PIECE) - board[y+1][x+4] = PieceToChar[pc]; - if (value != VALUE_NONE) - format_cp_compact(value, &board[y+2][x+2]); + const int x = int(file) * 8; + const int y = (7 - int(rank)) * 3; + for (int i = 1; i < 8; ++i) + board[y][x + i] = board[y + 3][x + i] = '-'; + for (int i = 1; i < 3; ++i) + board[y + i][x] = board[y + i][x + 8] = '|'; + board[y][x] = board[y][x + 8] = board[y + 3][x + 8] = board[y + 3][x] = '+'; + if (pc != NO_PIECE) + board[y + 1][x + 4] = PieceToChar[pc]; + if (value != VALUE_NONE) + format_cp_compact(value, &board[y + 2][x + 2]); }; // We estimate the value of each piece by doing a differential evaluation from // the current base eval, simulating the removal of the piece from its square. Value base = evaluate(pos); - base = pos.side_to_move() == WHITE ? base : -base; + base = pos.side_to_move() == WHITE ? base : -base; for (File f = FILE_A; f <= FILE_H; ++f) - for (Rank r = RANK_1; r <= RANK_8; ++r) - { - Square sq = make_square(f, r); - Piece pc = pos.piece_on(sq); - Value v = VALUE_NONE; - - if (pc != NO_PIECE && type_of(pc) != KING) - { - auto st = pos.state(); - - pos.remove_piece(sq); - st->accumulator.computed[WHITE] = false; - st->accumulator.computed[BLACK] = false; - - Value eval = evaluate(pos); - eval = pos.side_to_move() == WHITE ? eval : -eval; - v = base - eval; - - pos.put_piece(pc, sq); - st->accumulator.computed[WHITE] = false; - st->accumulator.computed[BLACK] = false; - } + for (Rank r = RANK_1; r <= RANK_8; ++r) { + Square sq = make_square(f, r); + Piece pc = pos.piece_on(sq); + Value v = VALUE_NONE; + + if (pc != NO_PIECE && type_of(pc) != KING) { + auto st = pos.state(); - writeSquare(f, r, pc, v); - } + pos.remove_piece(sq); + st->accumulator.computed[WHITE] = false; + st->accumulator.computed[BLACK] = false; + + Value eval = evaluate(pos); + eval = pos.side_to_move() == WHITE ? eval : -eval; + v = base - eval; + + pos.put_piece(pc, sq); + st->accumulator.computed[WHITE] = false; + st->accumulator.computed[BLACK] = false; + } + + writeSquare(f, r, pc, v); + } ss << " NNUE derived piece values:\n"; - for (int row = 0; row < 3*8+1; ++row) + for (int row = 0; row < 3 * 8 + 1; ++row) ss << board[row] << '\n'; ss << '\n'; @@ -340,70 +346,73 @@ namespace Stockfish::Eval::NNUE { << "| | (PSQT) | (Layers) | |\n" << "+------------+------------+------------+------------+\n"; - for (std::size_t bucket = 0; bucket < LayerStacks; ++bucket) - { - ss << "| " << bucket << " "; - ss << " | "; format_cp_aligned_dot(t.psqt[bucket], ss); ss << " " - << " | "; format_cp_aligned_dot(t.positional[bucket], ss); ss << " " - << " | "; format_cp_aligned_dot(t.psqt[bucket] + t.positional[bucket], ss); ss << " " - << " |"; - if (bucket == t.correctBucket) - ss << " <-- this bucket is used"; - ss << '\n'; + for (std::size_t bucket = 0; bucket < LayerStacks; ++bucket) { + ss << "| " << bucket << " "; + ss << " | "; + format_cp_aligned_dot(t.psqt[bucket], ss); + ss << " " + << " | "; + format_cp_aligned_dot(t.positional[bucket], ss); + ss << " " + << " | "; + format_cp_aligned_dot(t.psqt[bucket] + t.positional[bucket], ss); + ss << " " + << " |"; + if (bucket == t.correctBucket) + ss << " <-- this bucket is used"; + ss << '\n'; } ss << "+------------+------------+------------+------------+\n"; return ss.str(); - } +} - // Load eval, from a file stream or a memory stream - bool load_eval(std::string name, std::istream& stream) { +// Load eval, from a file stream or a memory stream +bool load_eval(std::string name, std::istream& stream) { initialize(); fileName = name; return read_parameters(stream); - } +} - // Save eval, to a file stream or a memory stream - bool save_eval(std::ostream& stream) { +// Save eval, to a file stream or a memory stream +bool save_eval(std::ostream& stream) { if (fileName.empty()) - return false; + return false; return write_parameters(stream); - } +} - // Save eval, to a file given by its name - bool save_eval(const std::optional& filename) { +// Save eval, to a file given by its name +bool save_eval(const std::optional& filename) { std::string actualFilename; std::string msg; if (filename.has_value()) actualFilename = filename.value(); - else - { - if (currentEvalFileName != EvalFileDefaultName) - { - msg = "Failed to export a net. A non-embedded net can only be saved if the filename is specified"; + else { + if (currentEvalFileName != EvalFileDefaultName) { + msg = + "Failed to export a net. A non-embedded net can only be saved if the filename is specified"; - sync_cout << msg << sync_endl; - return false; + sync_cout << msg << sync_endl; + return false; } actualFilename = EvalFileDefaultName; } std::ofstream stream(actualFilename, std::ios_base::binary); - bool saved = save_eval(stream); + bool saved = save_eval(stream); - msg = saved ? "Network saved successfully to " + actualFilename - : "Failed to export a net"; + msg = saved ? "Network saved successfully to " + actualFilename : "Failed to export a net"; sync_cout << msg << sync_endl; return saved; - } +} -} // namespace Stockfish::Eval::NNUE +} // namespace Stockfish::Eval::NNUE diff --git a/src/nnue/evaluate_nnue.h b/src/nnue/evaluate_nnue.h index 8faec6cce43..6edc212f4d7 100644 --- a/src/nnue/evaluate_nnue.h +++ b/src/nnue/evaluate_nnue.h @@ -32,48 +32,48 @@ #include "nnue_feature_transformer.h" namespace Stockfish { - class Position; - enum Value : int; +class Position; +enum Value : int; } namespace Stockfish::Eval::NNUE { - // Hash value of evaluation function structure - constexpr std::uint32_t HashValue = - FeatureTransformer::get_hash_value() ^ Network::get_hash_value(); +// Hash value of evaluation function structure +constexpr std::uint32_t HashValue = + FeatureTransformer::get_hash_value() ^ Network::get_hash_value(); - // Deleter for automating release of memory area - template - struct AlignedDeleter { +// Deleter for automating release of memory area +template +struct AlignedDeleter { void operator()(T* ptr) const { - ptr->~T(); - std_aligned_free(ptr); + ptr->~T(); + std_aligned_free(ptr); } - }; +}; - template - struct LargePageDeleter { +template +struct LargePageDeleter { void operator()(T* ptr) const { - ptr->~T(); - aligned_large_pages_free(ptr); + ptr->~T(); + aligned_large_pages_free(ptr); } - }; +}; - template - using AlignedPtr = std::unique_ptr>; +template +using AlignedPtr = std::unique_ptr>; - template - using LargePagePtr = std::unique_ptr>; +template +using LargePagePtr = std::unique_ptr>; - std::string trace(Position& pos); - Value evaluate(const Position& pos, bool adjusted = false, int* complexity = nullptr); - void hint_common_parent_position(const Position& pos); +std::string trace(Position& pos); +Value evaluate(const Position& pos, bool adjusted = false, int* complexity = nullptr); +void hint_common_parent_position(const Position& pos); - bool load_eval(std::string name, std::istream& stream); - bool save_eval(std::ostream& stream); - bool save_eval(const std::optional& filename); +bool load_eval(std::string name, std::istream& stream); +bool save_eval(std::ostream& stream); +bool save_eval(const std::optional& filename); } // namespace Stockfish::Eval::NNUE -#endif // #ifndef NNUE_EVALUATE_NNUE_H_INCLUDED +#endif // #ifndef NNUE_EVALUATE_NNUE_H_INCLUDED diff --git a/src/nnue/features/half_ka_v2_hm.cpp b/src/nnue/features/half_ka_v2_hm.cpp index 016934b8c4d..c9f8dd9409b 100644 --- a/src/nnue/features/half_ka_v2_hm.cpp +++ b/src/nnue/features/half_ka_v2_hm.cpp @@ -27,61 +27,58 @@ namespace Stockfish::Eval::NNUE::Features { - // Index of a feature for a given king position and another piece on some square - template - inline IndexType HalfKAv2_hm::make_index(Square s, Piece pc, Square ksq) { - return IndexType((int(s) ^ OrientTBL[Perspective][ksq]) + PieceSquareIndex[Perspective][pc] + KingBuckets[Perspective][ksq]); - } - - // Get a list of indices for active features - template - void HalfKAv2_hm::append_active_indices( - const Position& pos, - IndexList& active - ) { - Square ksq = pos.square(Perspective); - Bitboard bb = pos.pieces(); - while (bb) - { - Square s = pop_lsb(bb); - active.push_back(make_index(s, pos.piece_on(s), ksq)); +// Index of a feature for a given king position and another piece on some square +template +inline IndexType HalfKAv2_hm::make_index(Square s, Piece pc, Square ksq) { + return IndexType((int(s) ^ OrientTBL[Perspective][ksq]) + PieceSquareIndex[Perspective][pc] + + KingBuckets[Perspective][ksq]); +} + +// Get a list of indices for active features +template +void HalfKAv2_hm::append_active_indices(const Position& pos, IndexList& active) { + Square ksq = pos.square(Perspective); + Bitboard bb = pos.pieces(); + while (bb) { + Square s = pop_lsb(bb); + active.push_back(make_index(s, pos.piece_on(s), ksq)); } - } - - // Explicit template instantiations - template void HalfKAv2_hm::append_active_indices(const Position& pos, IndexList& active); - template void HalfKAv2_hm::append_active_indices(const Position& pos, IndexList& active); - - // append_changed_indices() : get a list of indices for recently changed features - template - void HalfKAv2_hm::append_changed_indices( - Square ksq, - const DirtyPiece& dp, - IndexList& removed, - IndexList& added - ) { +} + +// Explicit template instantiations +template void HalfKAv2_hm::append_active_indices(const Position& pos, IndexList& active); +template void HalfKAv2_hm::append_active_indices(const Position& pos, IndexList& active); + +// append_changed_indices() : get a list of indices for recently changed features +template +void HalfKAv2_hm::append_changed_indices(Square ksq, + const DirtyPiece& dp, + IndexList& removed, + IndexList& added) { for (int i = 0; i < dp.dirty_num; ++i) { - if (dp.from[i] != SQ_NONE) - removed.push_back(make_index(dp.from[i], dp.piece[i], ksq)); - if (dp.to[i] != SQ_NONE) - added.push_back(make_index(dp.to[i], dp.piece[i], ksq)); + if (dp.from[i] != SQ_NONE) + removed.push_back(make_index(dp.from[i], dp.piece[i], ksq)); + if (dp.to[i] != SQ_NONE) + added.push_back(make_index(dp.to[i], dp.piece[i], ksq)); } - } +} - // Explicit template instantiations - template void HalfKAv2_hm::append_changed_indices(Square ksq, const DirtyPiece& dp, IndexList& removed, IndexList& added); - template void HalfKAv2_hm::append_changed_indices(Square ksq, const DirtyPiece& dp, IndexList& removed, IndexList& added); +// Explicit template instantiations +template void HalfKAv2_hm::append_changed_indices(Square ksq, + const DirtyPiece& dp, + IndexList& removed, + IndexList& added); +template void HalfKAv2_hm::append_changed_indices(Square ksq, + const DirtyPiece& dp, + IndexList& removed, + IndexList& added); - int HalfKAv2_hm::update_cost(const StateInfo* st) { - return st->dirtyPiece.dirty_num; - } +int HalfKAv2_hm::update_cost(const StateInfo* st) { return st->dirtyPiece.dirty_num; } - int HalfKAv2_hm::refresh_cost(const Position& pos) { - return pos.count(); - } +int HalfKAv2_hm::refresh_cost(const Position& pos) { return pos.count(); } - bool HalfKAv2_hm::requires_refresh(const StateInfo* st, Color perspective) { +bool HalfKAv2_hm::requires_refresh(const StateInfo* st, Color perspective) { return st->dirtyPiece.piece[0] == make_piece(perspective, KING); - } +} } // namespace Stockfish::Eval::NNUE::Features diff --git a/src/nnue/features/half_ka_v2_hm.h b/src/nnue/features/half_ka_v2_hm.h index 9da1cc05531..540ff895a5a 100644 --- a/src/nnue/features/half_ka_v2_hm.h +++ b/src/nnue/features/half_ka_v2_hm.h @@ -28,41 +28,40 @@ #include "../nnue_common.h" namespace Stockfish { - struct StateInfo; - class Position; +struct StateInfo; +class Position; } namespace Stockfish::Eval::NNUE::Features { - // Feature HalfKAv2_hm: Combination of the position of own king - // and the position of pieces. Position mirrored such that king always on e..h files. - class HalfKAv2_hm { +// Feature HalfKAv2_hm: Combination of the position of own king +// and the position of pieces. Position mirrored such that king always on e..h files. +class HalfKAv2_hm { // unique number for each piece type on each square enum { - PS_NONE = 0, - PS_W_PAWN = 0, - PS_B_PAWN = 1 * SQUARE_NB, - PS_W_KNIGHT = 2 * SQUARE_NB, - PS_B_KNIGHT = 3 * SQUARE_NB, - PS_W_BISHOP = 4 * SQUARE_NB, - PS_B_BISHOP = 5 * SQUARE_NB, - PS_W_ROOK = 6 * SQUARE_NB, - PS_B_ROOK = 7 * SQUARE_NB, - PS_W_QUEEN = 8 * SQUARE_NB, - PS_B_QUEEN = 9 * SQUARE_NB, - PS_KING = 10 * SQUARE_NB, - PS_NB = 11 * SQUARE_NB + PS_NONE = 0, + PS_W_PAWN = 0, + PS_B_PAWN = 1 * SQUARE_NB, + PS_W_KNIGHT = 2 * SQUARE_NB, + PS_B_KNIGHT = 3 * SQUARE_NB, + PS_W_BISHOP = 4 * SQUARE_NB, + PS_B_BISHOP = 5 * SQUARE_NB, + PS_W_ROOK = 6 * SQUARE_NB, + PS_B_ROOK = 7 * SQUARE_NB, + PS_W_QUEEN = 8 * SQUARE_NB, + PS_B_QUEEN = 9 * SQUARE_NB, + PS_KING = 10 * SQUARE_NB, + PS_NB = 11 * SQUARE_NB }; static constexpr IndexType PieceSquareIndex[COLOR_NB][PIECE_NB] = { // convention: W - us, B - them // viewed from other side, W and B are reversed - { PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_KING, PS_NONE, - PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_KING, PS_NONE }, - { PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_KING, PS_NONE, - PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_KING, PS_NONE } - }; + {PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_KING, PS_NONE, + PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_KING, PS_NONE}, + {PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_KING, PS_NONE, + PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_KING, PS_NONE}}; // Index of a feature for a given king position and another piece on some square template @@ -77,9 +76,10 @@ namespace Stockfish::Eval::NNUE::Features { // Number of feature dimensions static constexpr IndexType Dimensions = - static_cast(SQUARE_NB) * static_cast(PS_NB) / 2; + static_cast(SQUARE_NB) * static_cast(PS_NB) / 2; #define B(v) (v * PS_NB) + // clang-format off static constexpr int KingBuckets[COLOR_NB][SQUARE_NB] = { { B(28), B(29), B(30), B(31), B(31), B(30), B(29), B(28), B(24), B(25), B(26), B(27), B(27), B(26), B(25), B(24), @@ -98,8 +98,9 @@ namespace Stockfish::Eval::NNUE::Features { B(24), B(25), B(26), B(27), B(27), B(26), B(25), B(24), B(28), B(29), B(30), B(31), B(31), B(30), B(29), B(28) } }; + // clang-format on #undef B - + // clang-format off // Orient a square according to perspective (rotates by 180 for black) static constexpr int OrientTBL[COLOR_NB][SQUARE_NB] = { { SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1, @@ -119,25 +120,20 @@ namespace Stockfish::Eval::NNUE::Features { SQ_H8, SQ_H8, SQ_H8, SQ_H8, SQ_A8, SQ_A8, SQ_A8, SQ_A8, SQ_H8, SQ_H8, SQ_H8, SQ_H8, SQ_A8, SQ_A8, SQ_A8, SQ_A8 } }; + // clang-format on // Maximum number of simultaneously active features. static constexpr IndexType MaxActiveDimensions = 32; - using IndexList = ValueList; + using IndexList = ValueList; // Get a list of indices for active features template - static void append_active_indices( - const Position& pos, - IndexList& active); + static void append_active_indices(const Position& pos, IndexList& active); // Get a list of indices for recently changed features template - static void append_changed_indices( - Square ksq, - const DirtyPiece& dp, - IndexList& removed, - IndexList& added - ); + static void + append_changed_indices(Square ksq, const DirtyPiece& dp, IndexList& removed, IndexList& added); // Returns the cost of updating one perspective, the most costly one. // Assumes no refresh needed. @@ -147,8 +143,8 @@ namespace Stockfish::Eval::NNUE::Features { // Returns whether the change stored in this StateInfo means that // a full accumulator refresh is required. static bool requires_refresh(const StateInfo* st, Color perspective); - }; +}; } // namespace Stockfish::Eval::NNUE::Features -#endif // #ifndef NNUE_FEATURES_HALF_KA_V2_HM_H_INCLUDED +#endif // #ifndef NNUE_FEATURES_HALF_KA_V2_HM_H_INCLUDED diff --git a/src/nnue/layers/affine_transform.h b/src/nnue/layers/affine_transform.h index fc65c34339f..e9c54b29626 100644 --- a/src/nnue/layers/affine_transform.h +++ b/src/nnue/layers/affine_transform.h @@ -42,95 +42,97 @@ namespace Stockfish::Eval::NNUE::Layers { // Fallback implementation for older/other architectures. // Requires the input to be padded to at least 16 values. #if !defined(USE_SSSE3) - template - static void affine_transform_non_ssse3(std::int32_t* output, const std::int8_t* weights, const std::int32_t* biases, const std::uint8_t* input) - { -# if defined(USE_SSE2) || defined(USE_NEON_DOTPROD) || defined(USE_NEON) -# if defined(USE_SSE2) +template +static void affine_transform_non_ssse3(std::int32_t* output, + const std::int8_t* weights, + const std::int32_t* biases, + const std::uint8_t* input) { + #if defined(USE_SSE2) || defined(USE_NEON_DOTPROD) || defined(USE_NEON) + #if defined(USE_SSE2) // At least a multiple of 16, with SSE2. - constexpr IndexType NumChunks = ceil_to_multiple(InputDimensions, 16) / 16; - const __m128i Zeros = _mm_setzero_si128(); - const auto inputVector = reinterpret_cast(input); + constexpr IndexType NumChunks = ceil_to_multiple(InputDimensions, 16) / 16; + const __m128i Zeros = _mm_setzero_si128(); + const auto inputVector = reinterpret_cast(input); -# elif defined(USE_NEON_DOTPROD) - constexpr IndexType NumChunks = ceil_to_multiple(InputDimensions, 16) / 16; - const auto inputVector = reinterpret_cast(input); + #elif defined(USE_NEON_DOTPROD) + constexpr IndexType NumChunks = ceil_to_multiple(InputDimensions, 16) / 16; + const auto inputVector = reinterpret_cast(input); -# elif defined(USE_NEON) - constexpr IndexType NumChunks = ceil_to_multiple(InputDimensions, 16) / 16; - const auto inputVector = reinterpret_cast(input); -# endif + #elif defined(USE_NEON) + constexpr IndexType NumChunks = ceil_to_multiple(InputDimensions, 16) / 16; + const auto inputVector = reinterpret_cast(input); + #endif for (IndexType i = 0; i < OutputDimensions; ++i) { - const IndexType offset = i * PaddedInputDimensions; - -# if defined(USE_SSE2) - __m128i sumLo = _mm_cvtsi32_si128(biases[i]); - __m128i sumHi = Zeros; - const auto row = reinterpret_cast(&weights[offset]); - for (IndexType j = 0; j < NumChunks; ++j) { - __m128i row_j = _mm_load_si128(&row[j]); - __m128i input_j = _mm_load_si128(&inputVector[j]); - __m128i extendedRowLo = _mm_srai_epi16(_mm_unpacklo_epi8(row_j, row_j), 8); - __m128i extendedRowHi = _mm_srai_epi16(_mm_unpackhi_epi8(row_j, row_j), 8); - __m128i extendedInputLo = _mm_unpacklo_epi8(input_j, Zeros); - __m128i extendedInputHi = _mm_unpackhi_epi8(input_j, Zeros); - __m128i productLo = _mm_madd_epi16(extendedRowLo, extendedInputLo); - __m128i productHi = _mm_madd_epi16(extendedRowHi, extendedInputHi); - sumLo = _mm_add_epi32(sumLo, productLo); - sumHi = _mm_add_epi32(sumHi, productHi); - } - __m128i sum = _mm_add_epi32(sumLo, sumHi); - __m128i sumHigh_64 = _mm_shuffle_epi32(sum, _MM_SHUFFLE(1, 0, 3, 2)); - sum = _mm_add_epi32(sum, sumHigh_64); - __m128i sum_second_32 = _mm_shufflelo_epi16(sum, _MM_SHUFFLE(1, 0, 3, 2)); - sum = _mm_add_epi32(sum, sum_second_32); - output[i] = _mm_cvtsi128_si32(sum); - -# elif defined(USE_NEON_DOTPROD) - int32x4_t sum = {biases[i]}; - const auto row = reinterpret_cast(&weights[offset]); - for (IndexType j = 0; j < NumChunks; ++j) { - sum = vdotq_s32(sum, inputVector[j], row[j]); - } - output[i] = vaddvq_s32(sum); - -# elif defined(USE_NEON) - int32x4_t sum = {biases[i]}; - const auto row = reinterpret_cast(&weights[offset]); - for (IndexType j = 0; j < NumChunks; ++j) { - int16x8_t product = vmull_s8(inputVector[j * 2], row[j * 2]); - product = vmlal_s8(product, inputVector[j * 2 + 1], row[j * 2 + 1]); - sum = vpadalq_s16(sum, product); - } - output[i] = sum[0] + sum[1] + sum[2] + sum[3]; - -# endif + const IndexType offset = i * PaddedInputDimensions; + + #if defined(USE_SSE2) + __m128i sumLo = _mm_cvtsi32_si128(biases[i]); + __m128i sumHi = Zeros; + const auto row = reinterpret_cast(&weights[offset]); + for (IndexType j = 0; j < NumChunks; ++j) { + __m128i row_j = _mm_load_si128(&row[j]); + __m128i input_j = _mm_load_si128(&inputVector[j]); + __m128i extendedRowLo = _mm_srai_epi16(_mm_unpacklo_epi8(row_j, row_j), 8); + __m128i extendedRowHi = _mm_srai_epi16(_mm_unpackhi_epi8(row_j, row_j), 8); + __m128i extendedInputLo = _mm_unpacklo_epi8(input_j, Zeros); + __m128i extendedInputHi = _mm_unpackhi_epi8(input_j, Zeros); + __m128i productLo = _mm_madd_epi16(extendedRowLo, extendedInputLo); + __m128i productHi = _mm_madd_epi16(extendedRowHi, extendedInputHi); + sumLo = _mm_add_epi32(sumLo, productLo); + sumHi = _mm_add_epi32(sumHi, productHi); + } + __m128i sum = _mm_add_epi32(sumLo, sumHi); + __m128i sumHigh_64 = _mm_shuffle_epi32(sum, _MM_SHUFFLE(1, 0, 3, 2)); + sum = _mm_add_epi32(sum, sumHigh_64); + __m128i sum_second_32 = _mm_shufflelo_epi16(sum, _MM_SHUFFLE(1, 0, 3, 2)); + sum = _mm_add_epi32(sum, sum_second_32); + output[i] = _mm_cvtsi128_si32(sum); + + #elif defined(USE_NEON_DOTPROD) + int32x4_t sum = {biases[i]}; + const auto row = reinterpret_cast(&weights[offset]); + for (IndexType j = 0; j < NumChunks; ++j) { + sum = vdotq_s32(sum, inputVector[j], row[j]); + } + output[i] = vaddvq_s32(sum); + + #elif defined(USE_NEON) + int32x4_t sum = {biases[i]}; + const auto row = reinterpret_cast(&weights[offset]); + for (IndexType j = 0; j < NumChunks; ++j) { + int16x8_t product = vmull_s8(inputVector[j * 2], row[j * 2]); + product = vmlal_s8(product, inputVector[j * 2 + 1], row[j * 2 + 1]); + sum = vpadalq_s16(sum, product); + } + output[i] = sum[0] + sum[1] + sum[2] + sum[3]; + + #endif } -# else - std::memcpy(output, biases, sizeof(std::int32_t) * OutputDimensions); - - // Traverse weights in transpose order to take advantage of input sparsity - for (IndexType i = 0; i < InputDimensions; ++i) - if (input[i]) { - const std::int8_t* w = &weights[i]; - const int in = input[i]; - for (IndexType j = 0; j < OutputDimensions; ++j) - output[j] += w[j * PaddedInputDimensions] * in; - } -# endif - } + #else + std::memcpy(output, biases, sizeof(std::int32_t) * OutputDimensions); + + // Traverse weights in transpose order to take advantage of input sparsity + for (IndexType i = 0; i < InputDimensions; ++i) + if (input[i]) { + const std::int8_t* w = &weights[i]; + const int in = input[i]; + for (IndexType j = 0; j < OutputDimensions; ++j) + output[j] += w[j * PaddedInputDimensions] * in; + } + #endif +} #endif - template - class AffineTransform { +template +class AffineTransform { public: // Input/output type - using InputType = std::uint8_t; + using InputType = std::uint8_t; using OutputType = std::int32_t; // Number of input/output dimensions - static constexpr IndexType InputDimensions = InDims; + static constexpr IndexType InputDimensions = InDims; static constexpr IndexType OutputDimensions = OutDims; static constexpr IndexType PaddedInputDimensions = @@ -142,175 +144,164 @@ namespace Stockfish::Eval::NNUE::Layers { // Hash value embedded in the evaluation file static constexpr std::uint32_t get_hash_value(std::uint32_t prevHash) { - std::uint32_t hashValue = 0xCC03DAE4u; - hashValue += OutputDimensions; - hashValue ^= prevHash >> 1; - hashValue ^= prevHash << 31; - return hashValue; + std::uint32_t hashValue = 0xCC03DAE4u; + hashValue += OutputDimensions; + hashValue ^= prevHash >> 1; + hashValue ^= prevHash << 31; + return hashValue; } - static constexpr IndexType get_weight_index_scrambled(IndexType i) - { - return - (i / 4) % (PaddedInputDimensions / 4) * OutputDimensions * 4 + - i / PaddedInputDimensions * 4 + - i % 4; + static constexpr IndexType get_weight_index_scrambled(IndexType i) { + return (i / 4) % (PaddedInputDimensions / 4) * OutputDimensions * 4 + + i / PaddedInputDimensions * 4 + i % 4; } - static constexpr IndexType get_weight_index(IndexType i) - { -#if defined (USE_SSSE3) - return get_weight_index_scrambled(i); + static constexpr IndexType get_weight_index(IndexType i) { +#if defined(USE_SSSE3) + return get_weight_index_scrambled(i); #else - return i; + return i; #endif } // Read network parameters bool read_parameters(std::istream& stream) { - read_little_endian(stream, biases, OutputDimensions); - for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) - weights[get_weight_index(i)] = read_little_endian(stream); + read_little_endian(stream, biases, OutputDimensions); + for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) + weights[get_weight_index(i)] = read_little_endian(stream); - return !stream.fail(); + return !stream.fail(); } // Write network parameters bool write_parameters(std::ostream& stream) const { - write_little_endian(stream, biases, OutputDimensions); + write_little_endian(stream, biases, OutputDimensions); - for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) - write_little_endian(stream, weights[get_weight_index(i)]); + for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) + write_little_endian(stream, weights[get_weight_index(i)]); - return !stream.fail(); + return !stream.fail(); } // Forward propagation - void propagate( - const InputType* input, OutputType* output) const { - -#if defined (USE_SSSE3) - - if constexpr (OutputDimensions > 1) - { - -#if defined (USE_AVX512) - using vec_t = __m512i; - #define vec_setzero _mm512_setzero_si512 - #define vec_set_32 _mm512_set1_epi32 - #define vec_add_dpbusd_32 Simd::m512_add_dpbusd_epi32 - #define vec_add_dpbusd_32x2 Simd::m512_add_dpbusd_epi32x2 - #define vec_hadd Simd::m512_hadd -#elif defined (USE_AVX2) - using vec_t = __m256i; - #define vec_setzero _mm256_setzero_si256 - #define vec_set_32 _mm256_set1_epi32 - #define vec_add_dpbusd_32 Simd::m256_add_dpbusd_epi32 - #define vec_add_dpbusd_32x2 Simd::m256_add_dpbusd_epi32x2 - #define vec_hadd Simd::m256_hadd -#elif defined (USE_SSSE3) - using vec_t = __m128i; - #define vec_setzero _mm_setzero_si128 - #define vec_set_32 _mm_set1_epi32 - #define vec_add_dpbusd_32 Simd::m128_add_dpbusd_epi32 - #define vec_add_dpbusd_32x2 Simd::m128_add_dpbusd_epi32x2 - #define vec_hadd Simd::m128_hadd -#endif - - static constexpr IndexType OutputSimdWidth = sizeof(vec_t) / sizeof(OutputType); - - static_assert(OutputDimensions % OutputSimdWidth == 0); - - constexpr IndexType NumChunks = ceil_to_multiple(InputDimensions, 8) / 4; - constexpr IndexType NumRegs = OutputDimensions / OutputSimdWidth; - - const auto input32 = reinterpret_cast(input); - const vec_t* biasvec = reinterpret_cast(biases); - vec_t acc[NumRegs]; - for (IndexType k = 0; k < NumRegs; ++k) - acc[k] = biasvec[k]; - - for (IndexType i = 0; i < NumChunks; i += 2) - { - const vec_t in0 = vec_set_32(input32[i + 0]); - const vec_t in1 = vec_set_32(input32[i + 1]); - const auto col0 = reinterpret_cast(&weights[(i + 0) * OutputDimensions * 4]); - const auto col1 = reinterpret_cast(&weights[(i + 1) * OutputDimensions * 4]); - for (IndexType k = 0; k < NumRegs; ++k) - vec_add_dpbusd_32x2(acc[k], in0, col0[k], in1, col1[k]); + void propagate(const InputType* input, OutputType* output) const { + +#if defined(USE_SSSE3) + + if constexpr (OutputDimensions > 1) { + + #if defined(USE_AVX512) + using vec_t = __m512i; + #define vec_setzero _mm512_setzero_si512 + #define vec_set_32 _mm512_set1_epi32 + #define vec_add_dpbusd_32 Simd::m512_add_dpbusd_epi32 + #define vec_add_dpbusd_32x2 Simd::m512_add_dpbusd_epi32x2 + #define vec_hadd Simd::m512_hadd + #elif defined(USE_AVX2) + using vec_t = __m256i; + #define vec_setzero _mm256_setzero_si256 + #define vec_set_32 _mm256_set1_epi32 + #define vec_add_dpbusd_32 Simd::m256_add_dpbusd_epi32 + #define vec_add_dpbusd_32x2 Simd::m256_add_dpbusd_epi32x2 + #define vec_hadd Simd::m256_hadd + #elif defined(USE_SSSE3) + using vec_t = __m128i; + #define vec_setzero _mm_setzero_si128 + #define vec_set_32 _mm_set1_epi32 + #define vec_add_dpbusd_32 Simd::m128_add_dpbusd_epi32 + #define vec_add_dpbusd_32x2 Simd::m128_add_dpbusd_epi32x2 + #define vec_hadd Simd::m128_hadd + #endif + + static constexpr IndexType OutputSimdWidth = sizeof(vec_t) / sizeof(OutputType); + + static_assert(OutputDimensions % OutputSimdWidth == 0); + + constexpr IndexType NumChunks = ceil_to_multiple(InputDimensions, 8) / 4; + constexpr IndexType NumRegs = OutputDimensions / OutputSimdWidth; + + const auto input32 = reinterpret_cast(input); + const vec_t* biasvec = reinterpret_cast(biases); + vec_t acc[NumRegs]; + for (IndexType k = 0; k < NumRegs; ++k) + acc[k] = biasvec[k]; + + for (IndexType i = 0; i < NumChunks; i += 2) { + const vec_t in0 = vec_set_32(input32[i + 0]); + const vec_t in1 = vec_set_32(input32[i + 1]); + const auto col0 = + reinterpret_cast(&weights[(i + 0) * OutputDimensions * 4]); + const auto col1 = + reinterpret_cast(&weights[(i + 1) * OutputDimensions * 4]); + for (IndexType k = 0; k < NumRegs; ++k) + vec_add_dpbusd_32x2(acc[k], in0, col0[k], in1, col1[k]); + } + + vec_t* outptr = reinterpret_cast(output); + for (IndexType k = 0; k < NumRegs; ++k) + outptr[k] = acc[k]; + + #undef vec_setzero + #undef vec_set_32 + #undef vec_add_dpbusd_32 + #undef vec_add_dpbusd_32x2 + #undef vec_hadd + + } else if constexpr (OutputDimensions == 1) { + + // We cannot use AVX512 for the last layer because there's only 32 inputs and the buffer is not padded to 64 elements. + #if defined(USE_AVX2) + using vec_t = __m256i; + #define vec_setzero _mm256_setzero_si256 + #define vec_set_32 _mm256_set1_epi32 + #define vec_add_dpbusd_32 Simd::m256_add_dpbusd_epi32 + #define vec_add_dpbusd_32x2 Simd::m256_add_dpbusd_epi32x2 + #define vec_hadd Simd::m256_hadd + #elif defined(USE_SSSE3) + using vec_t = __m128i; + #define vec_setzero _mm_setzero_si128 + #define vec_set_32 _mm_set1_epi32 + #define vec_add_dpbusd_32 Simd::m128_add_dpbusd_epi32 + #define vec_add_dpbusd_32x2 Simd::m128_add_dpbusd_epi32x2 + #define vec_hadd Simd::m128_hadd + #endif + + const auto inputVector = reinterpret_cast(input); + + static constexpr IndexType InputSimdWidth = sizeof(vec_t) / sizeof(InputType); + + static_assert(PaddedInputDimensions % InputSimdWidth == 0); + + constexpr IndexType NumChunks = PaddedInputDimensions / InputSimdWidth; + vec_t sum0 = vec_setzero(); + const auto row0 = reinterpret_cast(&weights[0]); + + for (int j = 0; j < int(NumChunks); ++j) { + const vec_t in = inputVector[j]; + vec_add_dpbusd_32(sum0, in, row0[j]); + } + output[0] = vec_hadd(sum0, biases[0]); + + #undef vec_setzero + #undef vec_set_32 + #undef vec_add_dpbusd_32 + #undef vec_add_dpbusd_32x2 + #undef vec_hadd } - - vec_t* outptr = reinterpret_cast(output); - for (IndexType k = 0; k < NumRegs; ++k) - outptr[k] = acc[k]; - -# undef vec_setzero -# undef vec_set_32 -# undef vec_add_dpbusd_32 -# undef vec_add_dpbusd_32x2 -# undef vec_hadd - - } - else if constexpr (OutputDimensions == 1) - { - -// We cannot use AVX512 for the last layer because there's only 32 inputs and the buffer is not padded to 64 elements. -#if defined (USE_AVX2) - using vec_t = __m256i; - #define vec_setzero _mm256_setzero_si256 - #define vec_set_32 _mm256_set1_epi32 - #define vec_add_dpbusd_32 Simd::m256_add_dpbusd_epi32 - #define vec_add_dpbusd_32x2 Simd::m256_add_dpbusd_epi32x2 - #define vec_hadd Simd::m256_hadd -#elif defined (USE_SSSE3) - using vec_t = __m128i; - #define vec_setzero _mm_setzero_si128 - #define vec_set_32 _mm_set1_epi32 - #define vec_add_dpbusd_32 Simd::m128_add_dpbusd_epi32 - #define vec_add_dpbusd_32x2 Simd::m128_add_dpbusd_epi32x2 - #define vec_hadd Simd::m128_hadd -#endif - - const auto inputVector = reinterpret_cast(input); - - static constexpr IndexType InputSimdWidth = sizeof(vec_t) / sizeof(InputType); - - static_assert(PaddedInputDimensions % InputSimdWidth == 0); - - constexpr IndexType NumChunks = PaddedInputDimensions / InputSimdWidth; - vec_t sum0 = vec_setzero(); - const auto row0 = reinterpret_cast(&weights[0]); - - for (int j = 0; j < int(NumChunks); ++j) - { - const vec_t in = inputVector[j]; - vec_add_dpbusd_32(sum0, in, row0[j]); - } - output[0] = vec_hadd(sum0, biases[0]); - -# undef vec_setzero -# undef vec_set_32 -# undef vec_add_dpbusd_32 -# undef vec_add_dpbusd_32x2 -# undef vec_hadd - - } #else - // Use old implementation for the other architectures. - affine_transform_non_ssse3< - InputDimensions, - PaddedInputDimensions, - OutputDimensions>(output, weights, biases, input); + // Use old implementation for the other architectures. + affine_transform_non_ssse3( + output, weights, biases, input); #endif } private: - using BiasType = OutputType; + using BiasType = OutputType; using WeightType = std::int8_t; alignas(CacheLineSize) BiasType biases[OutputDimensions]; alignas(CacheLineSize) WeightType weights[OutputDimensions * PaddedInputDimensions]; - }; +}; } // namespace Stockfish::Eval::NNUE::Layers -#endif // #ifndef NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED +#endif // #ifndef NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED diff --git a/src/nnue/layers/affine_transform_sparse_input.h b/src/nnue/layers/affine_transform_sparse_input.h index 1dc42109844..a8e1e2624e7 100644 --- a/src/nnue/layers/affine_transform_sparse_input.h +++ b/src/nnue/layers/affine_transform_sparse_input.h @@ -38,104 +38,106 @@ namespace Stockfish::Eval::NNUE::Layers { #if (USE_SSSE3 | (USE_NEON >= 8)) - alignas(CacheLineSize) static inline const std::array, 256> lookup_indices = [](){ - std::array, 256> v{}; - for (unsigned i = 0; i < 256; ++i) - { - std::uint64_t j = i, k = 0; - while(j) - v[i][k++] = pop_lsb(j); - } - return v; +alignas(CacheLineSize) static inline const + std::array, 256> lookup_indices = []() { + std::array, 256> v{}; + for (unsigned i = 0; i < 256; ++i) { + std::uint64_t j = i, k = 0; + while (j) + v[i][k++] = pop_lsb(j); + } + return v; }(); - // Find indices of nonzero numbers in an int32_t array - template - void find_nnz(const std::int32_t* input, std::uint16_t* out, IndexType& count_out) { -#if defined (USE_SSSE3) - #if defined (USE_AVX512) - using vec_t = __m512i; - #define vec_nnz(a) _mm512_cmpgt_epi32_mask(a, _mm512_setzero_si512()) - #elif defined (USE_AVX2) - using vec_t = __m256i; - #if defined(USE_VNNI) && !defined(USE_AVXVNNI) - #define vec_nnz(a) _mm256_cmpgt_epi32_mask(a, _mm256_setzero_si256()) - #else - #define vec_nnz(a) _mm256_movemask_ps(_mm256_castsi256_ps(_mm256_cmpgt_epi32(a, _mm256_setzero_si256()))) +// Find indices of nonzero numbers in an int32_t array +template +void find_nnz(const std::int32_t* input, std::uint16_t* out, IndexType& count_out) { + #if defined(USE_SSSE3) + #if defined(USE_AVX512) + using vec_t = __m512i; + #define vec_nnz(a) _mm512_cmpgt_epi32_mask(a, _mm512_setzero_si512()) + #elif defined(USE_AVX2) + using vec_t = __m256i; + #if defined(USE_VNNI) && !defined(USE_AVXVNNI) + #define vec_nnz(a) _mm256_cmpgt_epi32_mask(a, _mm256_setzero_si256()) + #else + #define vec_nnz(a) \ + _mm256_movemask_ps( \ + _mm256_castsi256_ps(_mm256_cmpgt_epi32(a, _mm256_setzero_si256()))) + #endif + #elif defined(USE_SSSE3) + using vec_t = __m128i; + #define vec_nnz(a) \ + _mm_movemask_ps(_mm_castsi128_ps(_mm_cmpgt_epi32(a, _mm_setzero_si128()))) #endif - #elif defined (USE_SSSE3) - using vec_t = __m128i; - #define vec_nnz(a) _mm_movemask_ps(_mm_castsi128_ps(_mm_cmpgt_epi32(a, _mm_setzero_si128()))) - #endif using vec128_t = __m128i; - #define vec128_zero _mm_setzero_si128() - #define vec128_set_16(a) _mm_set1_epi16(a) - #define vec128_load(a) _mm_load_si128(a) - #define vec128_storeu(a, b) _mm_storeu_si128(a, b) - #define vec128_add(a, b) _mm_add_epi16(a, b) -#elif defined (USE_NEON) - using vec_t = uint32x4_t; + #define vec128_zero _mm_setzero_si128() + #define vec128_set_16(a) _mm_set1_epi16(a) + #define vec128_load(a) _mm_load_si128(a) + #define vec128_storeu(a, b) _mm_storeu_si128(a, b) + #define vec128_add(a, b) _mm_add_epi16(a, b) + #elif defined(USE_NEON) + using vec_t = uint32x4_t; static const std::uint32_t Mask[4] = {1, 2, 4, 8}; - #define vec_nnz(a) vaddvq_u32(vandq_u32(vtstq_u32(a, a), vld1q_u32(Mask))) - using vec128_t = uint16x8_t; - #define vec128_zero vdupq_n_u16(0) - #define vec128_set_16(a) vdupq_n_u16(a) - #define vec128_load(a) vld1q_u16(reinterpret_cast(a)) - #define vec128_storeu(a, b) vst1q_u16(reinterpret_cast(a), b) - #define vec128_add(a, b) vaddq_u16(a, b) -#endif + #define vec_nnz(a) vaddvq_u32(vandq_u32(vtstq_u32(a, a), vld1q_u32(Mask))) + using vec128_t = uint16x8_t; + #define vec128_zero vdupq_n_u16(0) + #define vec128_set_16(a) vdupq_n_u16(a) + #define vec128_load(a) vld1q_u16(reinterpret_cast(a)) + #define vec128_storeu(a, b) vst1q_u16(reinterpret_cast(a), b) + #define vec128_add(a, b) vaddq_u16(a, b) + #endif constexpr IndexType InputSimdWidth = sizeof(vec_t) / sizeof(std::int32_t); // Inputs are processed InputSimdWidth at a time and outputs are processed 8 at a time so we process in chunks of max(InputSimdWidth, 8) - constexpr IndexType ChunkSize = std::max(InputSimdWidth, 8); - constexpr IndexType NumChunks = InputDimensions / ChunkSize; - constexpr IndexType InputsPerChunk = ChunkSize / InputSimdWidth; + constexpr IndexType ChunkSize = std::max(InputSimdWidth, 8); + constexpr IndexType NumChunks = InputDimensions / ChunkSize; + constexpr IndexType InputsPerChunk = ChunkSize / InputSimdWidth; constexpr IndexType OutputsPerChunk = ChunkSize / 8; - const auto inputVector = reinterpret_cast(input); - IndexType count = 0; - vec128_t base = vec128_zero; - const vec128_t increment = vec128_set_16(8); - for (IndexType i = 0; i < NumChunks; ++i) - { - // bitmask of nonzero values in this chunk - unsigned nnz = 0; - for (IndexType j = 0; j < InputsPerChunk; ++j) - { - const vec_t inputChunk = inputVector[i * InputsPerChunk + j]; - nnz |= unsigned(vec_nnz(inputChunk)) << (j * InputSimdWidth); - } - for (IndexType j = 0; j < OutputsPerChunk; ++j) - { - const auto lookup = (nnz >> (j * 8)) & 0xFF; - const auto offsets = vec128_load(reinterpret_cast(&lookup_indices[lookup])); - vec128_storeu(reinterpret_cast(out + count), vec128_add(base, offsets)); - count += popcount(lookup); - base = vec128_add(base, increment); - } + const auto inputVector = reinterpret_cast(input); + IndexType count = 0; + vec128_t base = vec128_zero; + const vec128_t increment = vec128_set_16(8); + for (IndexType i = 0; i < NumChunks; ++i) { + // bitmask of nonzero values in this chunk + unsigned nnz = 0; + for (IndexType j = 0; j < InputsPerChunk; ++j) { + const vec_t inputChunk = inputVector[i * InputsPerChunk + j]; + nnz |= unsigned(vec_nnz(inputChunk)) << (j * InputSimdWidth); + } + for (IndexType j = 0; j < OutputsPerChunk; ++j) { + const auto lookup = (nnz >> (j * 8)) & 0xFF; + const auto offsets = + vec128_load(reinterpret_cast(&lookup_indices[lookup])); + vec128_storeu(reinterpret_cast(out + count), vec128_add(base, offsets)); + count += popcount(lookup); + base = vec128_add(base, increment); + } } count_out = count; - } -# undef vec_nnz -# undef vec128_zero -# undef vec128_set_16 -# undef vec128_load -# undef vec128_storeu -# undef vec128_add +} + #undef vec_nnz + #undef vec128_zero + #undef vec128_set_16 + #undef vec128_load + #undef vec128_storeu + #undef vec128_add #endif - // Sparse input implementation - template - class AffineTransformSparseInput { +// Sparse input implementation +template +class AffineTransformSparseInput { public: // Input/output type - using InputType = std::uint8_t; + using InputType = std::uint8_t; using OutputType = std::int32_t; // Number of input/output dimensions - static constexpr IndexType InputDimensions = InDims; + static constexpr IndexType InputDimensions = InDims; static constexpr IndexType OutputDimensions = OutDims; - static_assert(OutputDimensions % 16 == 0, "Only implemented for OutputDimensions divisible by 16."); + static_assert(OutputDimensions % 16 == 0, + "Only implemented for OutputDimensions divisible by 16."); static constexpr IndexType PaddedInputDimensions = ceil_to_multiple(InputDimensions, MaxSimdWidth); @@ -152,127 +154,120 @@ namespace Stockfish::Eval::NNUE::Layers { // Hash value embedded in the evaluation file static constexpr std::uint32_t get_hash_value(std::uint32_t prevHash) { - std::uint32_t hashValue = 0xCC03DAE4u; - hashValue += OutputDimensions; - hashValue ^= prevHash >> 1; - hashValue ^= prevHash << 31; - return hashValue; + std::uint32_t hashValue = 0xCC03DAE4u; + hashValue += OutputDimensions; + hashValue ^= prevHash >> 1; + hashValue ^= prevHash << 31; + return hashValue; } - static constexpr IndexType get_weight_index_scrambled(IndexType i) - { - return - (i / ChunkSize) % (PaddedInputDimensions / ChunkSize) * OutputDimensions * ChunkSize + - i / PaddedInputDimensions * ChunkSize + - i % ChunkSize; + static constexpr IndexType get_weight_index_scrambled(IndexType i) { + return (i / ChunkSize) % (PaddedInputDimensions / ChunkSize) * OutputDimensions * ChunkSize + + i / PaddedInputDimensions * ChunkSize + i % ChunkSize; } - static constexpr IndexType get_weight_index(IndexType i) - { + static constexpr IndexType get_weight_index(IndexType i) { #if (USE_SSSE3 | (USE_NEON >= 8)) - return get_weight_index_scrambled(i); + return get_weight_index_scrambled(i); #else - return i; + return i; #endif } // Read network parameters bool read_parameters(std::istream& stream) { - read_little_endian(stream, biases, OutputDimensions); - for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) - weights[get_weight_index(i)] = read_little_endian(stream); + read_little_endian(stream, biases, OutputDimensions); + for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) + weights[get_weight_index(i)] = read_little_endian(stream); - return !stream.fail(); + return !stream.fail(); } // Write network parameters bool write_parameters(std::ostream& stream) const { - write_little_endian(stream, biases, OutputDimensions); + write_little_endian(stream, biases, OutputDimensions); - for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) - write_little_endian(stream, weights[get_weight_index(i)]); + for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) + write_little_endian(stream, weights[get_weight_index(i)]); - return !stream.fail(); + return !stream.fail(); } // Forward propagation - void propagate( - const InputType* input, OutputType* output) const { + void propagate(const InputType* input, OutputType* output) const { #if (USE_SSSE3 | (USE_NEON >= 8)) -#if defined (USE_AVX512) - using invec_t = __m512i; - using outvec_t = __m512i; - #define vec_set_32 _mm512_set1_epi32 - #define vec_add_dpbusd_32 Simd::m512_add_dpbusd_epi32 -#elif defined (USE_AVX2) - using invec_t = __m256i; - using outvec_t = __m256i; - #define vec_set_32 _mm256_set1_epi32 - #define vec_add_dpbusd_32 Simd::m256_add_dpbusd_epi32 -#elif defined (USE_SSSE3) - using invec_t = __m128i; - using outvec_t = __m128i; - #define vec_set_32 _mm_set1_epi32 - #define vec_add_dpbusd_32 Simd::m128_add_dpbusd_epi32 -#elif defined (USE_NEON_DOTPROD) - using invec_t = int8x16_t; - using outvec_t = int32x4_t; - #define vec_set_32(a) vreinterpretq_s8_u32(vdupq_n_u32(a)) - #define vec_add_dpbusd_32 Simd::dotprod_m128_add_dpbusd_epi32 -#elif defined (USE_NEON) - using invec_t = int8x16_t; - using outvec_t = int32x4_t; - #define vec_set_32(a) vreinterpretq_s8_u32(vdupq_n_u32(a)) - #define vec_add_dpbusd_32 Simd::neon_m128_add_dpbusd_epi32 -#endif - static constexpr IndexType OutputSimdWidth = sizeof(outvec_t) / sizeof(OutputType); - - constexpr IndexType NumChunks = ceil_to_multiple(InputDimensions, 8) / ChunkSize; - constexpr IndexType NumRegs = OutputDimensions / OutputSimdWidth; - std::uint16_t nnz[NumChunks]; - IndexType count; + #if defined(USE_AVX512) + using invec_t = __m512i; + using outvec_t = __m512i; + #define vec_set_32 _mm512_set1_epi32 + #define vec_add_dpbusd_32 Simd::m512_add_dpbusd_epi32 + #elif defined(USE_AVX2) + using invec_t = __m256i; + using outvec_t = __m256i; + #define vec_set_32 _mm256_set1_epi32 + #define vec_add_dpbusd_32 Simd::m256_add_dpbusd_epi32 + #elif defined(USE_SSSE3) + using invec_t = __m128i; + using outvec_t = __m128i; + #define vec_set_32 _mm_set1_epi32 + #define vec_add_dpbusd_32 Simd::m128_add_dpbusd_epi32 + #elif defined(USE_NEON_DOTPROD) + using invec_t = int8x16_t; + using outvec_t = int32x4_t; + #define vec_set_32(a) vreinterpretq_s8_u32(vdupq_n_u32(a)) + #define vec_add_dpbusd_32 Simd::dotprod_m128_add_dpbusd_epi32 + #elif defined(USE_NEON) + using invec_t = int8x16_t; + using outvec_t = int32x4_t; + #define vec_set_32(a) vreinterpretq_s8_u32(vdupq_n_u32(a)) + #define vec_add_dpbusd_32 Simd::neon_m128_add_dpbusd_epi32 + #endif + static constexpr IndexType OutputSimdWidth = sizeof(outvec_t) / sizeof(OutputType); - const auto input32 = reinterpret_cast(input); + constexpr IndexType NumChunks = ceil_to_multiple(InputDimensions, 8) / ChunkSize; + constexpr IndexType NumRegs = OutputDimensions / OutputSimdWidth; + std::uint16_t nnz[NumChunks]; + IndexType count; - // Find indices of nonzero 32bit blocks - find_nnz(input32, nnz, count); + const auto input32 = reinterpret_cast(input); - const outvec_t* biasvec = reinterpret_cast(biases); - outvec_t acc[NumRegs]; - for (IndexType k = 0; k < NumRegs; ++k) - acc[k] = biasvec[k]; + // Find indices of nonzero 32bit blocks + find_nnz(input32, nnz, count); - for (IndexType j = 0; j < count; ++j) - { - const auto i = nnz[j]; - const invec_t in = vec_set_32(input32[i]); - const auto col = reinterpret_cast(&weights[i * OutputDimensions * ChunkSize]); + const outvec_t* biasvec = reinterpret_cast(biases); + outvec_t acc[NumRegs]; for (IndexType k = 0; k < NumRegs; ++k) - vec_add_dpbusd_32(acc[k], in, col[k]); - } - - outvec_t* outptr = reinterpret_cast(output); - for (IndexType k = 0; k < NumRegs; ++k) - outptr[k] = acc[k]; -# undef vec_set_32 -# undef vec_add_dpbusd_32 + acc[k] = biasvec[k]; + + for (IndexType j = 0; j < count; ++j) { + const auto i = nnz[j]; + const invec_t in = vec_set_32(input32[i]); + const auto col = + reinterpret_cast(&weights[i * OutputDimensions * ChunkSize]); + for (IndexType k = 0; k < NumRegs; ++k) + vec_add_dpbusd_32(acc[k], in, col[k]); + } + + outvec_t* outptr = reinterpret_cast(output); + for (IndexType k = 0; k < NumRegs; ++k) + outptr[k] = acc[k]; + #undef vec_set_32 + #undef vec_add_dpbusd_32 #else - // Use dense implementation for the other architectures. - affine_transform_non_ssse3< - InputDimensions, - PaddedInputDimensions, - OutputDimensions>(output, weights, biases, input); + // Use dense implementation for the other architectures. + affine_transform_non_ssse3( + output, weights, biases, input); #endif } private: - using BiasType = OutputType; + using BiasType = OutputType; using WeightType = std::int8_t; alignas(CacheLineSize) BiasType biases[OutputDimensions]; alignas(CacheLineSize) WeightType weights[OutputDimensions * PaddedInputDimensions]; - }; +}; } // namespace Stockfish::Eval::NNUE::Layers -#endif // #ifndef NNUE_LAYERS_AFFINE_TRANSFORM_SPARSE_INPUT_H_INCLUDED +#endif // #ifndef NNUE_LAYERS_AFFINE_TRANSFORM_SPARSE_INPUT_H_INCLUDED diff --git a/src/nnue/layers/clipped_relu.h b/src/nnue/layers/clipped_relu.h index 48cd6c69345..f200d6e15df 100644 --- a/src/nnue/layers/clipped_relu.h +++ b/src/nnue/layers/clipped_relu.h @@ -29,136 +29,132 @@ namespace Stockfish::Eval::NNUE::Layers { - // Clipped ReLU - template - class ClippedReLU { +// Clipped ReLU +template +class ClippedReLU { public: // Input/output type - using InputType = std::int32_t; + using InputType = std::int32_t; using OutputType = std::uint8_t; // Number of input/output dimensions - static constexpr IndexType InputDimensions = InDims; + static constexpr IndexType InputDimensions = InDims; static constexpr IndexType OutputDimensions = InputDimensions; static constexpr IndexType PaddedOutputDimensions = - ceil_to_multiple(OutputDimensions, 32); + ceil_to_multiple(OutputDimensions, 32); using OutputBuffer = OutputType[PaddedOutputDimensions]; // Hash value embedded in the evaluation file static constexpr std::uint32_t get_hash_value(std::uint32_t prevHash) { - std::uint32_t hashValue = 0x538D24C7u; - hashValue += prevHash; - return hashValue; + std::uint32_t hashValue = 0x538D24C7u; + hashValue += prevHash; + return hashValue; } // Read network parameters - bool read_parameters(std::istream&) { - return true; - } + bool read_parameters(std::istream&) { return true; } // Write network parameters - bool write_parameters(std::ostream&) const { - return true; - } + bool write_parameters(std::ostream&) const { return true; } // Forward propagation - void propagate( - const InputType* input, OutputType* output) const { + void propagate(const InputType* input, OutputType* output) const { + +#if defined(USE_AVX2) + if constexpr (InputDimensions % SimdWidth == 0) { + constexpr IndexType NumChunks = InputDimensions / SimdWidth; + const __m256i Zero = _mm256_setzero_si256(); + const __m256i Offsets = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0); + const auto in = reinterpret_cast(input); + const auto out = reinterpret_cast<__m256i*>(output); + for (IndexType i = 0; i < NumChunks; ++i) { + const __m256i words0 = + _mm256_srai_epi16(_mm256_packs_epi32(_mm256_load_si256(&in[i * 4 + 0]), + _mm256_load_si256(&in[i * 4 + 1])), + WeightScaleBits); + const __m256i words1 = + _mm256_srai_epi16(_mm256_packs_epi32(_mm256_load_si256(&in[i * 4 + 2]), + _mm256_load_si256(&in[i * 4 + 3])), + WeightScaleBits); + _mm256_store_si256( + &out[i], _mm256_permutevar8x32_epi32( + _mm256_max_epi8(_mm256_packs_epi16(words0, words1), Zero), Offsets)); + } + } else { + constexpr IndexType NumChunks = InputDimensions / (SimdWidth / 2); + const __m128i Zero = _mm_setzero_si128(); + const auto in = reinterpret_cast(input); + const auto out = reinterpret_cast<__m128i*>(output); + for (IndexType i = 0; i < NumChunks; ++i) { + const __m128i words0 = _mm_srai_epi16( + _mm_packs_epi32(_mm_load_si128(&in[i * 4 + 0]), _mm_load_si128(&in[i * 4 + 1])), + WeightScaleBits); + const __m128i words1 = _mm_srai_epi16( + _mm_packs_epi32(_mm_load_si128(&in[i * 4 + 2]), _mm_load_si128(&in[i * 4 + 3])), + WeightScaleBits); + const __m128i packedbytes = _mm_packs_epi16(words0, words1); + _mm_store_si128(&out[i], _mm_max_epi8(packedbytes, Zero)); + } + } + constexpr IndexType Start = InputDimensions % SimdWidth == 0 + ? InputDimensions / SimdWidth * SimdWidth + : InputDimensions / (SimdWidth / 2) * (SimdWidth / 2); - #if defined(USE_AVX2) - if constexpr (InputDimensions % SimdWidth == 0) { +#elif defined(USE_SSE2) constexpr IndexType NumChunks = InputDimensions / SimdWidth; - const __m256i Zero = _mm256_setzero_si256(); - const __m256i Offsets = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0); - const auto in = reinterpret_cast(input); - const auto out = reinterpret_cast<__m256i*>(output); + + #ifdef USE_SSE41 + const __m128i Zero = _mm_setzero_si128(); + #else + const __m128i k0x80s = _mm_set1_epi8(-128); + #endif + + const auto in = reinterpret_cast(input); + const auto out = reinterpret_cast<__m128i*>(output); for (IndexType i = 0; i < NumChunks; ++i) { - const __m256i words0 = _mm256_srai_epi16(_mm256_packs_epi32( - _mm256_load_si256(&in[i * 4 + 0]), - _mm256_load_si256(&in[i * 4 + 1])), WeightScaleBits); - const __m256i words1 = _mm256_srai_epi16(_mm256_packs_epi32( - _mm256_load_si256(&in[i * 4 + 2]), - _mm256_load_si256(&in[i * 4 + 3])), WeightScaleBits); - _mm256_store_si256(&out[i], _mm256_permutevar8x32_epi32(_mm256_max_epi8( - _mm256_packs_epi16(words0, words1), Zero), Offsets)); + const __m128i words0 = _mm_srai_epi16( + _mm_packs_epi32(_mm_load_si128(&in[i * 4 + 0]), _mm_load_si128(&in[i * 4 + 1])), + WeightScaleBits); + const __m128i words1 = _mm_srai_epi16( + _mm_packs_epi32(_mm_load_si128(&in[i * 4 + 2]), _mm_load_si128(&in[i * 4 + 3])), + WeightScaleBits); + const __m128i packedbytes = _mm_packs_epi16(words0, words1); + _mm_store_si128(&out[i], + + #ifdef USE_SSE41 + _mm_max_epi8(packedbytes, Zero) + #else + _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s) + #endif + + ); } - } else { + constexpr IndexType Start = NumChunks * SimdWidth; + +#elif defined(USE_NEON) constexpr IndexType NumChunks = InputDimensions / (SimdWidth / 2); - const __m128i Zero = _mm_setzero_si128(); - const auto in = reinterpret_cast(input); - const auto out = reinterpret_cast<__m128i*>(output); + const int8x8_t Zero = {0}; + const auto in = reinterpret_cast(input); + const auto out = reinterpret_cast(output); for (IndexType i = 0; i < NumChunks; ++i) { - const __m128i words0 = _mm_srai_epi16(_mm_packs_epi32( - _mm_load_si128(&in[i * 4 + 0]), - _mm_load_si128(&in[i * 4 + 1])), WeightScaleBits); - const __m128i words1 = _mm_srai_epi16(_mm_packs_epi32( - _mm_load_si128(&in[i * 4 + 2]), - _mm_load_si128(&in[i * 4 + 3])), WeightScaleBits); - const __m128i packedbytes = _mm_packs_epi16(words0, words1); - _mm_store_si128(&out[i], _mm_max_epi8(packedbytes, Zero)); + int16x8_t shifted; + const auto pack = reinterpret_cast(&shifted); + pack[0] = vqshrn_n_s32(in[i * 2 + 0], WeightScaleBits); + pack[1] = vqshrn_n_s32(in[i * 2 + 1], WeightScaleBits); + out[i] = vmax_s8(vqmovn_s16(shifted), Zero); + } + constexpr IndexType Start = NumChunks * (SimdWidth / 2); +#else + constexpr IndexType Start = 0; +#endif + + for (IndexType i = Start; i < InputDimensions; ++i) { + output[i] = static_cast(std::clamp(input[i] >> WeightScaleBits, 0, 127)); } - } - constexpr IndexType Start = - InputDimensions % SimdWidth == 0 - ? InputDimensions / SimdWidth * SimdWidth - : InputDimensions / (SimdWidth / 2) * (SimdWidth / 2); - - #elif defined(USE_SSE2) - constexpr IndexType NumChunks = InputDimensions / SimdWidth; - - #ifdef USE_SSE41 - const __m128i Zero = _mm_setzero_si128(); - #else - const __m128i k0x80s = _mm_set1_epi8(-128); - #endif - - const auto in = reinterpret_cast(input); - const auto out = reinterpret_cast<__m128i*>(output); - for (IndexType i = 0; i < NumChunks; ++i) { - const __m128i words0 = _mm_srai_epi16(_mm_packs_epi32( - _mm_load_si128(&in[i * 4 + 0]), - _mm_load_si128(&in[i * 4 + 1])), WeightScaleBits); - const __m128i words1 = _mm_srai_epi16(_mm_packs_epi32( - _mm_load_si128(&in[i * 4 + 2]), - _mm_load_si128(&in[i * 4 + 3])), WeightScaleBits); - const __m128i packedbytes = _mm_packs_epi16(words0, words1); - _mm_store_si128(&out[i], - - #ifdef USE_SSE41 - _mm_max_epi8(packedbytes, Zero) - #else - _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s) - #endif - - ); - } - constexpr IndexType Start = NumChunks * SimdWidth; - - #elif defined(USE_NEON) - constexpr IndexType NumChunks = InputDimensions / (SimdWidth / 2); - const int8x8_t Zero = {0}; - const auto in = reinterpret_cast(input); - const auto out = reinterpret_cast(output); - for (IndexType i = 0; i < NumChunks; ++i) { - int16x8_t shifted; - const auto pack = reinterpret_cast(&shifted); - pack[0] = vqshrn_n_s32(in[i * 2 + 0], WeightScaleBits); - pack[1] = vqshrn_n_s32(in[i * 2 + 1], WeightScaleBits); - out[i] = vmax_s8(vqmovn_s16(shifted), Zero); - } - constexpr IndexType Start = NumChunks * (SimdWidth / 2); - #else - constexpr IndexType Start = 0; - #endif - - for (IndexType i = Start; i < InputDimensions; ++i) { - output[i] = static_cast( - std::clamp(input[i] >> WeightScaleBits, 0, 127)); - } } - }; +}; } // namespace Stockfish::Eval::NNUE::Layers -#endif // NNUE_LAYERS_CLIPPED_RELU_H_INCLUDED +#endif // NNUE_LAYERS_CLIPPED_RELU_H_INCLUDED diff --git a/src/nnue/layers/simd.h b/src/nnue/layers/simd.h index 349217edb7a..5425ca192bc 100644 --- a/src/nnue/layers/simd.h +++ b/src/nnue/layers/simd.h @@ -20,30 +20,30 @@ #define STOCKFISH_SIMD_H_INCLUDED #if defined(USE_AVX2) -# include + #include #elif defined(USE_SSE41) -# include + #include #elif defined(USE_SSSE3) -# include + #include #elif defined(USE_SSE2) -# include + #include #elif defined(USE_NEON) -# include + #include #endif namespace Stockfish::Simd { -#if defined (USE_AVX512) +#if defined(USE_AVX512) - [[maybe_unused]] static int m512_hadd(__m512i sum, int bias) { - return _mm512_reduce_add_epi32(sum) + bias; - } +[[maybe_unused]] static int m512_hadd(__m512i sum, int bias) { + return _mm512_reduce_add_epi32(sum) + bias; +} - /* +/* Parameters: sum0 = [zmm0.i128[0], zmm0.i128[1], zmm0.i128[2], zmm0.i128[3]] sum1 = [zmm1.i128[0], zmm1.i128[1], zmm1.i128[2], zmm1.i128[3]] @@ -58,186 +58,164 @@ namespace Stockfish::Simd { reduce_add_epi32(zmm0.i128[3]), reduce_add_epi32(zmm1.i128[3]), reduce_add_epi32(zmm2.i128[3]), reduce_add_epi32(zmm3.i128[3]) ] */ - [[maybe_unused]] static __m512i m512_hadd128x16_interleave( - __m512i sum0, __m512i sum1, __m512i sum2, __m512i sum3) { - - __m512i sum01a = _mm512_unpacklo_epi32(sum0, sum1); - __m512i sum01b = _mm512_unpackhi_epi32(sum0, sum1); - - __m512i sum23a = _mm512_unpacklo_epi32(sum2, sum3); - __m512i sum23b = _mm512_unpackhi_epi32(sum2, sum3); - - __m512i sum01 = _mm512_add_epi32(sum01a, sum01b); - __m512i sum23 = _mm512_add_epi32(sum23a, sum23b); - - __m512i sum0123a = _mm512_unpacklo_epi64(sum01, sum23); - __m512i sum0123b = _mm512_unpackhi_epi64(sum01, sum23); - - return _mm512_add_epi32(sum0123a, sum0123b); - } - - [[maybe_unused]] static void m512_add_dpbusd_epi32( - __m512i& acc, - __m512i a, - __m512i b) { - -# if defined (USE_VNNI) - acc = _mm512_dpbusd_epi32(acc, a, b); -# else - __m512i product0 = _mm512_maddubs_epi16(a, b); - product0 = _mm512_madd_epi16(product0, _mm512_set1_epi16(1)); - acc = _mm512_add_epi32(acc, product0); -# endif - } - - [[maybe_unused]] static void m512_add_dpbusd_epi32x2( - __m512i& acc, - __m512i a0, __m512i b0, - __m512i a1, __m512i b1) { - -# if defined (USE_VNNI) - acc = _mm512_dpbusd_epi32(acc, a0, b0); - acc = _mm512_dpbusd_epi32(acc, a1, b1); -# else - __m512i product0 = _mm512_maddubs_epi16(a0, b0); - __m512i product1 = _mm512_maddubs_epi16(a1, b1); - product0 = _mm512_madd_epi16(product0, _mm512_set1_epi16(1)); - product1 = _mm512_madd_epi16(product1, _mm512_set1_epi16(1)); - acc = _mm512_add_epi32(acc, _mm512_add_epi32(product0, product1)); -# endif - } +[[maybe_unused]] static __m512i +m512_hadd128x16_interleave(__m512i sum0, __m512i sum1, __m512i sum2, __m512i sum3) { + + __m512i sum01a = _mm512_unpacklo_epi32(sum0, sum1); + __m512i sum01b = _mm512_unpackhi_epi32(sum0, sum1); + + __m512i sum23a = _mm512_unpacklo_epi32(sum2, sum3); + __m512i sum23b = _mm512_unpackhi_epi32(sum2, sum3); + + __m512i sum01 = _mm512_add_epi32(sum01a, sum01b); + __m512i sum23 = _mm512_add_epi32(sum23a, sum23b); + + __m512i sum0123a = _mm512_unpacklo_epi64(sum01, sum23); + __m512i sum0123b = _mm512_unpackhi_epi64(sum01, sum23); + + return _mm512_add_epi32(sum0123a, sum0123b); +} + +[[maybe_unused]] static void m512_add_dpbusd_epi32(__m512i& acc, __m512i a, __m512i b) { + + #if defined(USE_VNNI) + acc = _mm512_dpbusd_epi32(acc, a, b); + #else + __m512i product0 = _mm512_maddubs_epi16(a, b); + product0 = _mm512_madd_epi16(product0, _mm512_set1_epi16(1)); + acc = _mm512_add_epi32(acc, product0); + #endif +} + +[[maybe_unused]] static void +m512_add_dpbusd_epi32x2(__m512i& acc, __m512i a0, __m512i b0, __m512i a1, __m512i b1) { + + #if defined(USE_VNNI) + acc = _mm512_dpbusd_epi32(acc, a0, b0); + acc = _mm512_dpbusd_epi32(acc, a1, b1); + #else + __m512i product0 = _mm512_maddubs_epi16(a0, b0); + __m512i product1 = _mm512_maddubs_epi16(a1, b1); + product0 = _mm512_madd_epi16(product0, _mm512_set1_epi16(1)); + product1 = _mm512_madd_epi16(product1, _mm512_set1_epi16(1)); + acc = _mm512_add_epi32(acc, _mm512_add_epi32(product0, product1)); + #endif +} #endif -#if defined (USE_AVX2) - - [[maybe_unused]] static int m256_hadd(__m256i sum, int bias) { - __m128i sum128 = _mm_add_epi32(_mm256_castsi256_si128(sum), _mm256_extracti128_si256(sum, 1)); - sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_BADC)); - sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_CDAB)); - return _mm_cvtsi128_si32(sum128) + bias; - } - - [[maybe_unused]] static void m256_add_dpbusd_epi32( - __m256i& acc, - __m256i a, - __m256i b) { - -# if defined (USE_VNNI) - acc = _mm256_dpbusd_epi32(acc, a, b); -# else - __m256i product0 = _mm256_maddubs_epi16(a, b); - product0 = _mm256_madd_epi16(product0, _mm256_set1_epi16(1)); - acc = _mm256_add_epi32(acc, product0); -# endif - } - - [[maybe_unused]] static void m256_add_dpbusd_epi32x2( - __m256i& acc, - __m256i a0, __m256i b0, - __m256i a1, __m256i b1) { - -# if defined (USE_VNNI) - acc = _mm256_dpbusd_epi32(acc, a0, b0); - acc = _mm256_dpbusd_epi32(acc, a1, b1); -# else - __m256i product0 = _mm256_maddubs_epi16(a0, b0); - __m256i product1 = _mm256_maddubs_epi16(a1, b1); - product0 = _mm256_madd_epi16(product0, _mm256_set1_epi16(1)); - product1 = _mm256_madd_epi16(product1, _mm256_set1_epi16(1)); - acc = _mm256_add_epi32(acc, _mm256_add_epi32(product0, product1)); -# endif - } +#if defined(USE_AVX2) + +[[maybe_unused]] static int m256_hadd(__m256i sum, int bias) { + __m128i sum128 = _mm_add_epi32(_mm256_castsi256_si128(sum), _mm256_extracti128_si256(sum, 1)); + sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_BADC)); + sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_CDAB)); + return _mm_cvtsi128_si32(sum128) + bias; +} + +[[maybe_unused]] static void m256_add_dpbusd_epi32(__m256i& acc, __m256i a, __m256i b) { + + #if defined(USE_VNNI) + acc = _mm256_dpbusd_epi32(acc, a, b); + #else + __m256i product0 = _mm256_maddubs_epi16(a, b); + product0 = _mm256_madd_epi16(product0, _mm256_set1_epi16(1)); + acc = _mm256_add_epi32(acc, product0); + #endif +} + +[[maybe_unused]] static void +m256_add_dpbusd_epi32x2(__m256i& acc, __m256i a0, __m256i b0, __m256i a1, __m256i b1) { + + #if defined(USE_VNNI) + acc = _mm256_dpbusd_epi32(acc, a0, b0); + acc = _mm256_dpbusd_epi32(acc, a1, b1); + #else + __m256i product0 = _mm256_maddubs_epi16(a0, b0); + __m256i product1 = _mm256_maddubs_epi16(a1, b1); + product0 = _mm256_madd_epi16(product0, _mm256_set1_epi16(1)); + product1 = _mm256_madd_epi16(product1, _mm256_set1_epi16(1)); + acc = _mm256_add_epi32(acc, _mm256_add_epi32(product0, product1)); + #endif +} #endif -#if defined (USE_SSSE3) +#if defined(USE_SSSE3) - [[maybe_unused]] static int m128_hadd(__m128i sum, int bias) { - sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0x4E)); //_MM_PERM_BADC - sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0xB1)); //_MM_PERM_CDAB - return _mm_cvtsi128_si32(sum) + bias; - } +[[maybe_unused]] static int m128_hadd(__m128i sum, int bias) { + sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0x4E)); //_MM_PERM_BADC + sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0xB1)); //_MM_PERM_CDAB + return _mm_cvtsi128_si32(sum) + bias; +} - [[maybe_unused]] static void m128_add_dpbusd_epi32( - __m128i& acc, - __m128i a, - __m128i b) { +[[maybe_unused]] static void m128_add_dpbusd_epi32(__m128i& acc, __m128i a, __m128i b) { - __m128i product0 = _mm_maddubs_epi16(a, b); - product0 = _mm_madd_epi16(product0, _mm_set1_epi16(1)); - acc = _mm_add_epi32(acc, product0); - } + __m128i product0 = _mm_maddubs_epi16(a, b); + product0 = _mm_madd_epi16(product0, _mm_set1_epi16(1)); + acc = _mm_add_epi32(acc, product0); +} - [[maybe_unused]] static void m128_add_dpbusd_epi32x2( - __m128i& acc, - __m128i a0, __m128i b0, - __m128i a1, __m128i b1) { +[[maybe_unused]] static void +m128_add_dpbusd_epi32x2(__m128i& acc, __m128i a0, __m128i b0, __m128i a1, __m128i b1) { - __m128i product0 = _mm_maddubs_epi16(a0, b0); - __m128i product1 = _mm_maddubs_epi16(a1, b1); - product0 = _mm_madd_epi16(product0, _mm_set1_epi16(1)); - product1 = _mm_madd_epi16(product1, _mm_set1_epi16(1)); - acc = _mm_add_epi32(acc, _mm_add_epi32(product0, product1)); - } + __m128i product0 = _mm_maddubs_epi16(a0, b0); + __m128i product1 = _mm_maddubs_epi16(a1, b1); + product0 = _mm_madd_epi16(product0, _mm_set1_epi16(1)); + product1 = _mm_madd_epi16(product1, _mm_set1_epi16(1)); + acc = _mm_add_epi32(acc, _mm_add_epi32(product0, product1)); +} #endif -#if defined (USE_NEON_DOTPROD) +#if defined(USE_NEON_DOTPROD) - [[maybe_unused]] static void dotprod_m128_add_dpbusd_epi32x2( - int32x4_t& acc, - int8x16_t a0, int8x16_t b0, - int8x16_t a1, int8x16_t b1) { +[[maybe_unused]] static void dotprod_m128_add_dpbusd_epi32x2( + int32x4_t& acc, int8x16_t a0, int8x16_t b0, int8x16_t a1, int8x16_t b1) { - acc = vdotq_s32(acc, a0, b0); - acc = vdotq_s32(acc, a1, b1); - } + acc = vdotq_s32(acc, a0, b0); + acc = vdotq_s32(acc, a1, b1); +} - [[maybe_unused]] static void dotprod_m128_add_dpbusd_epi32( - int32x4_t& acc, - int8x16_t a, int8x16_t b) { +[[maybe_unused]] static void +dotprod_m128_add_dpbusd_epi32(int32x4_t& acc, int8x16_t a, int8x16_t b) { - acc = vdotq_s32(acc, a, b); - } + acc = vdotq_s32(acc, a, b); +} #endif -#if defined (USE_NEON) - - [[maybe_unused]] static int neon_m128_reduce_add_epi32(int32x4_t s) { -# if USE_NEON >= 8 - return vaddvq_s32(s); -# else - return s[0] + s[1] + s[2] + s[3]; -# endif - } - - [[maybe_unused]] static int neon_m128_hadd(int32x4_t sum, int bias) { - return neon_m128_reduce_add_epi32(sum) + bias; - } - - [[maybe_unused]] static void neon_m128_add_dpbusd_epi32x2( - int32x4_t& acc, - int8x8_t a0, int8x8_t b0, - int8x8_t a1, int8x8_t b1) { - - int16x8_t product = vmull_s8(a0, b0); - product = vmlal_s8(product, a1, b1); - acc = vpadalq_s16(acc, product); - } +#if defined(USE_NEON) + +[[maybe_unused]] static int neon_m128_reduce_add_epi32(int32x4_t s) { + #if USE_NEON >= 8 + return vaddvq_s32(s); + #else + return s[0] + s[1] + s[2] + s[3]; + #endif +} + +[[maybe_unused]] static int neon_m128_hadd(int32x4_t sum, int bias) { + return neon_m128_reduce_add_epi32(sum) + bias; +} + +[[maybe_unused]] static void +neon_m128_add_dpbusd_epi32x2(int32x4_t& acc, int8x8_t a0, int8x8_t b0, int8x8_t a1, int8x8_t b1) { + + int16x8_t product = vmull_s8(a0, b0); + product = vmlal_s8(product, a1, b1); + acc = vpadalq_s16(acc, product); +} #endif #if USE_NEON >= 8 - [[maybe_unused]] static void neon_m128_add_dpbusd_epi32( - int32x4_t& acc, - int8x16_t a, int8x16_t b) { - - int16x8_t product0 = vmull_s8(vget_low_s8(a), vget_low_s8(b)); - int16x8_t product1 = vmull_high_s8(a, b); - int16x8_t sum = vpaddq_s16(product0, product1); - acc = vpadalq_s16(acc, sum); - } +[[maybe_unused]] static void neon_m128_add_dpbusd_epi32(int32x4_t& acc, int8x16_t a, int8x16_t b) { + + int16x8_t product0 = vmull_s8(vget_low_s8(a), vget_low_s8(b)); + int16x8_t product1 = vmull_high_s8(a, b); + int16x8_t sum = vpaddq_s16(product0, product1); + acc = vpadalq_s16(acc, sum); +} #endif } -#endif // STOCKFISH_SIMD_H_INCLUDED +#endif // STOCKFISH_SIMD_H_INCLUDED diff --git a/src/nnue/layers/sqr_clipped_relu.h b/src/nnue/layers/sqr_clipped_relu.h index a3d2059b4de..31806ba21f3 100644 --- a/src/nnue/layers/sqr_clipped_relu.h +++ b/src/nnue/layers/sqr_clipped_relu.h @@ -29,80 +29,73 @@ namespace Stockfish::Eval::NNUE::Layers { - // Clipped ReLU - template - class SqrClippedReLU { +// Clipped ReLU +template +class SqrClippedReLU { public: // Input/output type - using InputType = std::int32_t; + using InputType = std::int32_t; using OutputType = std::uint8_t; // Number of input/output dimensions - static constexpr IndexType InputDimensions = InDims; + static constexpr IndexType InputDimensions = InDims; static constexpr IndexType OutputDimensions = InputDimensions; static constexpr IndexType PaddedOutputDimensions = - ceil_to_multiple(OutputDimensions, 32); + ceil_to_multiple(OutputDimensions, 32); using OutputBuffer = OutputType[PaddedOutputDimensions]; // Hash value embedded in the evaluation file static constexpr std::uint32_t get_hash_value(std::uint32_t prevHash) { - std::uint32_t hashValue = 0x538D24C7u; - hashValue += prevHash; - return hashValue; + std::uint32_t hashValue = 0x538D24C7u; + hashValue += prevHash; + return hashValue; } // Read network parameters - bool read_parameters(std::istream&) { - return true; - } + bool read_parameters(std::istream&) { return true; } // Write network parameters - bool write_parameters(std::ostream&) const { - return true; - } + bool write_parameters(std::ostream&) const { return true; } // Forward propagation - void propagate( - const InputType* input, OutputType* output) const { - - #if defined(USE_SSE2) - constexpr IndexType NumChunks = InputDimensions / 16; - - static_assert(WeightScaleBits == 6); - const auto in = reinterpret_cast(input); - const auto out = reinterpret_cast<__m128i*>(output); - for (IndexType i = 0; i < NumChunks; ++i) { - __m128i words0 = _mm_packs_epi32( - _mm_load_si128(&in[i * 4 + 0]), - _mm_load_si128(&in[i * 4 + 1])); - __m128i words1 = _mm_packs_epi32( - _mm_load_si128(&in[i * 4 + 2]), - _mm_load_si128(&in[i * 4 + 3])); - - // We shift by WeightScaleBits * 2 = 12 and divide by 128 - // which is an additional shift-right of 7, meaning 19 in total. - // MulHi strips the lower 16 bits so we need to shift out 3 more to match. - words0 = _mm_srli_epi16(_mm_mulhi_epi16(words0, words0), 3); - words1 = _mm_srli_epi16(_mm_mulhi_epi16(words1, words1), 3); - - _mm_store_si128(&out[i], _mm_packs_epi16(words0, words1)); - } - constexpr IndexType Start = NumChunks * 16; - - #else - constexpr IndexType Start = 0; - #endif - - for (IndexType i = Start; i < InputDimensions; ++i) { - output[i] = static_cast( - // Really should be /127 but we need to make it fast so we right shift - // by an extra 7 bits instead. Needs to be accounted for in the trainer. - std::min(127ll, ((long long)input[i] * input[i]) >> (2 * WeightScaleBits + 7))); - } + void propagate(const InputType* input, OutputType* output) const { + +#if defined(USE_SSE2) + constexpr IndexType NumChunks = InputDimensions / 16; + + static_assert(WeightScaleBits == 6); + const auto in = reinterpret_cast(input); + const auto out = reinterpret_cast<__m128i*>(output); + for (IndexType i = 0; i < NumChunks; ++i) { + __m128i words0 = + _mm_packs_epi32(_mm_load_si128(&in[i * 4 + 0]), _mm_load_si128(&in[i * 4 + 1])); + __m128i words1 = + _mm_packs_epi32(_mm_load_si128(&in[i * 4 + 2]), _mm_load_si128(&in[i * 4 + 3])); + + // We shift by WeightScaleBits * 2 = 12 and divide by 128 + // which is an additional shift-right of 7, meaning 19 in total. + // MulHi strips the lower 16 bits so we need to shift out 3 more to match. + words0 = _mm_srli_epi16(_mm_mulhi_epi16(words0, words0), 3); + words1 = _mm_srli_epi16(_mm_mulhi_epi16(words1, words1), 3); + + _mm_store_si128(&out[i], _mm_packs_epi16(words0, words1)); + } + constexpr IndexType Start = NumChunks * 16; + +#else + constexpr IndexType Start = 0; +#endif + + for (IndexType i = Start; i < InputDimensions; ++i) { + output[i] = static_cast( + // Really should be /127 but we need to make it fast so we right shift + // by an extra 7 bits instead. Needs to be accounted for in the trainer. + std::min(127ll, ((long long) input[i] * input[i]) >> (2 * WeightScaleBits + 7))); + } } - }; +}; } // namespace Stockfish::Eval::NNUE::Layers -#endif // NNUE_LAYERS_SQR_CLIPPED_RELU_H_INCLUDED +#endif // NNUE_LAYERS_SQR_CLIPPED_RELU_H_INCLUDED diff --git a/src/nnue/nnue_accumulator.h b/src/nnue/nnue_accumulator.h index 03fc3bd5cd8..2f1b1d35e52 100644 --- a/src/nnue/nnue_accumulator.h +++ b/src/nnue/nnue_accumulator.h @@ -28,13 +28,13 @@ namespace Stockfish::Eval::NNUE { - // Class that holds the result of affine transformation of input features - struct alignas(CacheLineSize) Accumulator { +// Class that holds the result of affine transformation of input features +struct alignas(CacheLineSize) Accumulator { std::int16_t accumulation[2][TransformedFeatureDimensions]; std::int32_t psqtAccumulation[2][PSQTBuckets]; - bool computed[2]; - }; + bool computed[2]; +}; } // namespace Stockfish::Eval::NNUE -#endif // NNUE_ACCUMULATOR_H_INCLUDED +#endif // NNUE_ACCUMULATOR_H_INCLUDED diff --git a/src/nnue/nnue_architecture.h b/src/nnue/nnue_architecture.h index 2a7f064bbaa..be0138f14bd 100644 --- a/src/nnue/nnue_architecture.h +++ b/src/nnue/nnue_architecture.h @@ -39,97 +39,90 @@ using FeatureSet = Features::HalfKAv2_hm; // Number of input feature dimensions after conversion constexpr IndexType TransformedFeatureDimensions = 2560; -constexpr IndexType PSQTBuckets = 8; -constexpr IndexType LayerStacks = 8; - -struct Network -{ - static constexpr int FC_0_OUTPUTS = 15; - static constexpr int FC_1_OUTPUTS = 32; - - Layers::AffineTransformSparseInput fc_0; - Layers::SqrClippedReLU ac_sqr_0; - Layers::ClippedReLU ac_0; - Layers::AffineTransform fc_1; - Layers::ClippedReLU ac_1; - Layers::AffineTransform fc_2; - - // Hash value embedded in the evaluation file - static constexpr std::uint32_t get_hash_value() { - // input slice hash - std::uint32_t hashValue = 0xEC42E90Du; - hashValue ^= TransformedFeatureDimensions * 2; - - hashValue = decltype(fc_0)::get_hash_value(hashValue); - hashValue = decltype(ac_0)::get_hash_value(hashValue); - hashValue = decltype(fc_1)::get_hash_value(hashValue); - hashValue = decltype(ac_1)::get_hash_value(hashValue); - hashValue = decltype(fc_2)::get_hash_value(hashValue); - - return hashValue; - } - - // Read network parameters - bool read_parameters(std::istream& stream) { - return fc_0.read_parameters(stream) - && ac_0.read_parameters(stream) - && fc_1.read_parameters(stream) - && ac_1.read_parameters(stream) - && fc_2.read_parameters(stream); - } - - // Write network parameters - bool write_parameters(std::ostream& stream) const { - return fc_0.write_parameters(stream) - && ac_0.write_parameters(stream) - && fc_1.write_parameters(stream) - && ac_1.write_parameters(stream) - && fc_2.write_parameters(stream); - } - - std::int32_t propagate(const TransformedFeatureType* transformedFeatures) - { - struct alignas(CacheLineSize) Buffer - { - alignas(CacheLineSize) decltype(fc_0)::OutputBuffer fc_0_out; - alignas(CacheLineSize) decltype(ac_sqr_0)::OutputType ac_sqr_0_out[ceil_to_multiple(FC_0_OUTPUTS * 2, 32)]; - alignas(CacheLineSize) decltype(ac_0)::OutputBuffer ac_0_out; - alignas(CacheLineSize) decltype(fc_1)::OutputBuffer fc_1_out; - alignas(CacheLineSize) decltype(ac_1)::OutputBuffer ac_1_out; - alignas(CacheLineSize) decltype(fc_2)::OutputBuffer fc_2_out; - - Buffer() - { - std::memset(this, 0, sizeof(*this)); - } - }; +constexpr IndexType PSQTBuckets = 8; +constexpr IndexType LayerStacks = 8; + +struct Network { + static constexpr int FC_0_OUTPUTS = 15; + static constexpr int FC_1_OUTPUTS = 32; + + Layers::AffineTransformSparseInput fc_0; + Layers::SqrClippedReLU ac_sqr_0; + Layers::ClippedReLU ac_0; + Layers::AffineTransform fc_1; + Layers::ClippedReLU ac_1; + Layers::AffineTransform fc_2; + + // Hash value embedded in the evaluation file + static constexpr std::uint32_t get_hash_value() { + // input slice hash + std::uint32_t hashValue = 0xEC42E90Du; + hashValue ^= TransformedFeatureDimensions * 2; + + hashValue = decltype(fc_0)::get_hash_value(hashValue); + hashValue = decltype(ac_0)::get_hash_value(hashValue); + hashValue = decltype(fc_1)::get_hash_value(hashValue); + hashValue = decltype(ac_1)::get_hash_value(hashValue); + hashValue = decltype(fc_2)::get_hash_value(hashValue); + + return hashValue; + } + + // Read network parameters + bool read_parameters(std::istream& stream) { + return fc_0.read_parameters(stream) && ac_0.read_parameters(stream) + && fc_1.read_parameters(stream) && ac_1.read_parameters(stream) + && fc_2.read_parameters(stream); + } + + // Write network parameters + bool write_parameters(std::ostream& stream) const { + return fc_0.write_parameters(stream) && ac_0.write_parameters(stream) + && fc_1.write_parameters(stream) && ac_1.write_parameters(stream) + && fc_2.write_parameters(stream); + } + + std::int32_t propagate(const TransformedFeatureType* transformedFeatures) { + struct alignas(CacheLineSize) Buffer { + alignas(CacheLineSize) decltype(fc_0)::OutputBuffer fc_0_out; + alignas(CacheLineSize) decltype(ac_sqr_0)::OutputType + ac_sqr_0_out[ceil_to_multiple(FC_0_OUTPUTS * 2, 32)]; + alignas(CacheLineSize) decltype(ac_0)::OutputBuffer ac_0_out; + alignas(CacheLineSize) decltype(fc_1)::OutputBuffer fc_1_out; + alignas(CacheLineSize) decltype(ac_1)::OutputBuffer ac_1_out; + alignas(CacheLineSize) decltype(fc_2)::OutputBuffer fc_2_out; + + Buffer() { std::memset(this, 0, sizeof(*this)); } + }; #if defined(__clang__) && (__APPLE__) - // workaround for a bug reported with xcode 12 - static thread_local auto tlsBuffer = std::make_unique(); - // Access TLS only once, cache result. - Buffer& buffer = *tlsBuffer; + // workaround for a bug reported with xcode 12 + static thread_local auto tlsBuffer = std::make_unique(); + // Access TLS only once, cache result. + Buffer& buffer = *tlsBuffer; #else - alignas(CacheLineSize) static thread_local Buffer buffer; + alignas(CacheLineSize) static thread_local Buffer buffer; #endif - fc_0.propagate(transformedFeatures, buffer.fc_0_out); - ac_sqr_0.propagate(buffer.fc_0_out, buffer.ac_sqr_0_out); - ac_0.propagate(buffer.fc_0_out, buffer.ac_0_out); - std::memcpy(buffer.ac_sqr_0_out + FC_0_OUTPUTS, buffer.ac_0_out, FC_0_OUTPUTS * sizeof(decltype(ac_0)::OutputType)); - fc_1.propagate(buffer.ac_sqr_0_out, buffer.fc_1_out); - ac_1.propagate(buffer.fc_1_out, buffer.ac_1_out); - fc_2.propagate(buffer.ac_1_out, buffer.fc_2_out); - - // buffer.fc_0_out[FC_0_OUTPUTS] is such that 1.0 is equal to 127*(1< + #include #elif defined(USE_SSE41) -#include + #include #elif defined(USE_SSSE3) -#include + #include #elif defined(USE_SSE2) -#include + #include #elif defined(USE_NEON) -#include + #include #endif namespace Stockfish::Eval::NNUE { - // Version of the evaluation file - constexpr std::uint32_t Version = 0x7AF32F20u; - - // Constant used in evaluation value calculation - constexpr int OutputScale = 16; - constexpr int WeightScaleBits = 6; - - // Size of cache line (in bytes) - constexpr std::size_t CacheLineSize = 64; - - constexpr const char Leb128MagicString[] = "COMPRESSED_LEB128"; - constexpr const std::size_t Leb128MagicStringSize = sizeof(Leb128MagicString) - 1; - - // SIMD width (in bytes) - #if defined(USE_AVX2) - constexpr std::size_t SimdWidth = 32; - - #elif defined(USE_SSE2) - constexpr std::size_t SimdWidth = 16; - - #elif defined(USE_NEON) - constexpr std::size_t SimdWidth = 16; - #endif - - constexpr std::size_t MaxSimdWidth = 32; - - // Type of input feature after conversion - using TransformedFeatureType = std::uint8_t; - using IndexType = std::uint32_t; - - // Round n up to be a multiple of base - template - constexpr IntType ceil_to_multiple(IntType n, IntType base) { - return (n + base - 1) / base * base; - } - - - // read_little_endian() is our utility to read an integer (signed or unsigned, any size) - // from a stream in little-endian order. We swap the byte order after the read if - // necessary to return a result with the byte ordering of the compiling machine. - template - inline IntType read_little_endian(std::istream& stream) { - IntType result; - - if (IsLittleEndian) - stream.read(reinterpret_cast(&result), sizeof(IntType)); - else - { - std::uint8_t u[sizeof(IntType)]; - std::make_unsigned_t v = 0; - - stream.read(reinterpret_cast(u), sizeof(IntType)); - for (std::size_t i = 0; i < sizeof(IntType); ++i) - v = (v << 8) | u[sizeof(IntType) - i - 1]; - - std::memcpy(&result, &v, sizeof(IntType)); - } - - return result; - } - - - // write_little_endian() is our utility to write an integer (signed or unsigned, any size) - // to a stream in little-endian order. We swap the byte order before the write if - // necessary to always write in little endian order, independently of the byte - // ordering of the compiling machine. - template - inline void write_little_endian(std::ostream& stream, IntType value) { - - if (IsLittleEndian) - stream.write(reinterpret_cast(&value), sizeof(IntType)); - else - { - std::uint8_t u[sizeof(IntType)]; - std::make_unsigned_t v = value; - - std::size_t i = 0; - // if constexpr to silence the warning about shift by 8 - if constexpr (sizeof(IntType) > 1) - { - for (; i + 1 < sizeof(IntType); ++i) - { - u[i] = (std::uint8_t)v; +// Version of the evaluation file +constexpr std::uint32_t Version = 0x7AF32F20u; + +// Constant used in evaluation value calculation +constexpr int OutputScale = 16; +constexpr int WeightScaleBits = 6; + +// Size of cache line (in bytes) +constexpr std::size_t CacheLineSize = 64; + +constexpr const char Leb128MagicString[] = "COMPRESSED_LEB128"; +constexpr const std::size_t Leb128MagicStringSize = sizeof(Leb128MagicString) - 1; + +// SIMD width (in bytes) +#if defined(USE_AVX2) +constexpr std::size_t SimdWidth = 32; + +#elif defined(USE_SSE2) +constexpr std::size_t SimdWidth = 16; + +#elif defined(USE_NEON) +constexpr std::size_t SimdWidth = 16; +#endif + +constexpr std::size_t MaxSimdWidth = 32; + +// Type of input feature after conversion +using TransformedFeatureType = std::uint8_t; +using IndexType = std::uint32_t; + +// Round n up to be a multiple of base +template +constexpr IntType ceil_to_multiple(IntType n, IntType base) { + return (n + base - 1) / base * base; +} + + +// read_little_endian() is our utility to read an integer (signed or unsigned, any size) +// from a stream in little-endian order. We swap the byte order after the read if +// necessary to return a result with the byte ordering of the compiling machine. +template +inline IntType read_little_endian(std::istream& stream) { + IntType result; + + if (IsLittleEndian) + stream.read(reinterpret_cast(&result), sizeof(IntType)); + else { + std::uint8_t u[sizeof(IntType)]; + std::make_unsigned_t v = 0; + + stream.read(reinterpret_cast(u), sizeof(IntType)); + for (std::size_t i = 0; i < sizeof(IntType); ++i) + v = (v << 8) | u[sizeof(IntType) - i - 1]; + + std::memcpy(&result, &v, sizeof(IntType)); + } + + return result; +} + + +// write_little_endian() is our utility to write an integer (signed or unsigned, any size) +// to a stream in little-endian order. We swap the byte order before the write if +// necessary to always write in little endian order, independently of the byte +// ordering of the compiling machine. +template +inline void write_little_endian(std::ostream& stream, IntType value) { + + if (IsLittleEndian) + stream.write(reinterpret_cast(&value), sizeof(IntType)); + else { + std::uint8_t u[sizeof(IntType)]; + std::make_unsigned_t v = value; + + std::size_t i = 0; + // if constexpr to silence the warning about shift by 8 + if constexpr (sizeof(IntType) > 1) { + for (; i + 1 < sizeof(IntType); ++i) { + u[i] = (std::uint8_t) v; v >>= 8; } - } - u[i] = (std::uint8_t)v; - - stream.write(reinterpret_cast(u), sizeof(IntType)); - } - } - - - // read_little_endian(s, out, N) : read integers in bulk from a little indian stream. - // This reads N integers from stream s and put them in array out. - template - inline void read_little_endian(std::istream& stream, IntType* out, std::size_t count) { - if (IsLittleEndian) - stream.read(reinterpret_cast(out), sizeof(IntType) * count); - else - for (std::size_t i = 0; i < count; ++i) - out[i] = read_little_endian(stream); - } - - - // write_little_endian(s, values, N) : write integers in bulk to a little indian stream. - // This takes N integers from array values and writes them on stream s. - template - inline void write_little_endian(std::ostream& stream, const IntType* values, std::size_t count) { - if (IsLittleEndian) - stream.write(reinterpret_cast(values), sizeof(IntType) * count); - else - for (std::size_t i = 0; i < count; ++i) - write_little_endian(stream, values[i]); - } - - - // read_leb_128(s, out, N) : read N signed integers from the stream s, putting them in - // the array out. The stream is assumed to be compressed using the signed LEB128 format. - // See https://en.wikipedia.org/wiki/LEB128 for a description of the compression scheme. - template - inline void read_leb_128(std::istream& stream, IntType* out, std::size_t count) { - - // Check the presence of our LEB128 magic string - char leb128MagicString[Leb128MagicStringSize]; - stream.read(leb128MagicString, Leb128MagicStringSize); - assert(strncmp(Leb128MagicString, leb128MagicString, Leb128MagicStringSize) == 0); - - static_assert(std::is_signed_v, "Not implemented for unsigned types"); - - const std::uint32_t BUF_SIZE = 4096; - std::uint8_t buf[BUF_SIZE]; - - auto bytes_left = read_little_endian(stream); - - std::uint32_t buf_pos = BUF_SIZE; - for (std::size_t i = 0; i < count; ++i) - { - IntType result = 0; - size_t shift = 0; - do - { - if (buf_pos == BUF_SIZE) - { - stream.read(reinterpret_cast(buf), std::min(bytes_left, BUF_SIZE)); - buf_pos = 0; - } - - std::uint8_t byte = buf[buf_pos++]; - --bytes_left; - result |= (byte & 0x7f) << shift; - shift += 7; - - if ((byte & 0x80) == 0) - { - out[i] = (sizeof(IntType) * 8 <= shift || (byte & 0x40) == 0) ? result - : result | ~((1 << shift) - 1); - break; - } - } - while (shift < sizeof(IntType) * 8); - } - - assert(bytes_left == 0); - } - - - // write_leb_128(s, values, N) : write signed integers to a stream with LEB128 compression. - // This takes N integers from array values, compress them with the LEB128 algorithm and - // writes the result on the stream s. - // See https://en.wikipedia.org/wiki/LEB128 for a description of the compression scheme. - template - inline void write_leb_128(std::ostream& stream, const IntType* values, std::size_t count) { - - // Write our LEB128 magic string - stream.write(Leb128MagicString, Leb128MagicStringSize); - - static_assert(std::is_signed_v, "Not implemented for unsigned types"); - - std::uint32_t byte_count = 0; - for (std::size_t i = 0; i < count; ++i) - { - IntType value = values[i]; - std::uint8_t byte; - do - { - byte = value & 0x7f; - value >>= 7; - ++byte_count; - } - while ((byte & 0x40) == 0 ? value != 0 : value != -1); - } - - write_little_endian(stream, byte_count); - - const std::uint32_t BUF_SIZE = 4096; - std::uint8_t buf[BUF_SIZE]; - std::uint32_t buf_pos = 0; - - auto flush = [&]() { - if (buf_pos > 0) - { - stream.write(reinterpret_cast(buf), buf_pos); - buf_pos = 0; - } - }; - - auto write = [&](std::uint8_t byte) { - buf[buf_pos++] = byte; - if (buf_pos == BUF_SIZE) - flush(); - }; - - for (std::size_t i = 0; i < count; ++i) - { - IntType value = values[i]; - while (true) - { - std::uint8_t byte = value & 0x7f; - value >>= 7; - if ((byte & 0x40) == 0 ? value == 0 : value == -1) - { - write(byte); - break; - } - write(byte | 0x80); - } - } - - flush(); - } + } + u[i] = (std::uint8_t) v; + + stream.write(reinterpret_cast(u), sizeof(IntType)); + } +} + + +// read_little_endian(s, out, N) : read integers in bulk from a little indian stream. +// This reads N integers from stream s and put them in array out. +template +inline void read_little_endian(std::istream& stream, IntType* out, std::size_t count) { + if (IsLittleEndian) + stream.read(reinterpret_cast(out), sizeof(IntType) * count); + else + for (std::size_t i = 0; i < count; ++i) + out[i] = read_little_endian(stream); +} + + +// write_little_endian(s, values, N) : write integers in bulk to a little indian stream. +// This takes N integers from array values and writes them on stream s. +template +inline void write_little_endian(std::ostream& stream, const IntType* values, std::size_t count) { + if (IsLittleEndian) + stream.write(reinterpret_cast(values), sizeof(IntType) * count); + else + for (std::size_t i = 0; i < count; ++i) + write_little_endian(stream, values[i]); +} + + +// read_leb_128(s, out, N) : read N signed integers from the stream s, putting them in +// the array out. The stream is assumed to be compressed using the signed LEB128 format. +// See https://en.wikipedia.org/wiki/LEB128 for a description of the compression scheme. +template +inline void read_leb_128(std::istream& stream, IntType* out, std::size_t count) { + + // Check the presence of our LEB128 magic string + char leb128MagicString[Leb128MagicStringSize]; + stream.read(leb128MagicString, Leb128MagicStringSize); + assert(strncmp(Leb128MagicString, leb128MagicString, Leb128MagicStringSize) == 0); + + static_assert(std::is_signed_v, "Not implemented for unsigned types"); + + const std::uint32_t BUF_SIZE = 4096; + std::uint8_t buf[BUF_SIZE]; + + auto bytes_left = read_little_endian(stream); + + std::uint32_t buf_pos = BUF_SIZE; + for (std::size_t i = 0; i < count; ++i) { + IntType result = 0; + size_t shift = 0; + do { + if (buf_pos == BUF_SIZE) { + stream.read(reinterpret_cast(buf), std::min(bytes_left, BUF_SIZE)); + buf_pos = 0; + } + + std::uint8_t byte = buf[buf_pos++]; + --bytes_left; + result |= (byte & 0x7f) << shift; + shift += 7; + + if ((byte & 0x80) == 0) { + out[i] = (sizeof(IntType) * 8 <= shift || (byte & 0x40) == 0) + ? result + : result | ~((1 << shift) - 1); + break; + } + } while (shift < sizeof(IntType) * 8); + } + + assert(bytes_left == 0); +} + + +// write_leb_128(s, values, N) : write signed integers to a stream with LEB128 compression. +// This takes N integers from array values, compress them with the LEB128 algorithm and +// writes the result on the stream s. +// See https://en.wikipedia.org/wiki/LEB128 for a description of the compression scheme. +template +inline void write_leb_128(std::ostream& stream, const IntType* values, std::size_t count) { + + // Write our LEB128 magic string + stream.write(Leb128MagicString, Leb128MagicStringSize); + + static_assert(std::is_signed_v, "Not implemented for unsigned types"); + + std::uint32_t byte_count = 0; + for (std::size_t i = 0; i < count; ++i) { + IntType value = values[i]; + std::uint8_t byte; + do { + byte = value & 0x7f; + value >>= 7; + ++byte_count; + } while ((byte & 0x40) == 0 ? value != 0 : value != -1); + } + + write_little_endian(stream, byte_count); + + const std::uint32_t BUF_SIZE = 4096; + std::uint8_t buf[BUF_SIZE]; + std::uint32_t buf_pos = 0; + + auto flush = [&]() { + if (buf_pos > 0) { + stream.write(reinterpret_cast(buf), buf_pos); + buf_pos = 0; + } + }; + + auto write = [&](std::uint8_t byte) { + buf[buf_pos++] = byte; + if (buf_pos == BUF_SIZE) + flush(); + }; + + for (std::size_t i = 0; i < count; ++i) { + IntType value = values[i]; + while (true) { + std::uint8_t byte = value & 0x7f; + value >>= 7; + if ((byte & 0x40) == 0 ? value == 0 : value == -1) { + write(byte); + break; + } + write(byte | 0x80); + } + } + + flush(); +} } // namespace Stockfish::Eval::NNUE -#endif // #ifndef NNUE_COMMON_H_INCLUDED +#endif // #ifndef NNUE_COMMON_H_INCLUDED diff --git a/src/nnue/nnue_feature_transformer.h b/src/nnue/nnue_feature_transformer.h index 9f02830a63e..823a32bd459 100644 --- a/src/nnue/nnue_feature_transformer.h +++ b/src/nnue/nnue_feature_transformer.h @@ -36,303 +36,300 @@ namespace Stockfish::Eval::NNUE { - using BiasType = std::int16_t; - using WeightType = std::int16_t; - using PSQTWeightType = std::int32_t; - - // If vector instructions are enabled, we update and refresh the - // accumulator tile by tile such that each tile fits in the CPU's - // vector registers. - #define VECTOR - - static_assert(PSQTBuckets % 8 == 0, - "Per feature PSQT values cannot be processed at granularity lower than 8 at a time."); - - #ifdef USE_AVX512 - using vec_t = __m512i; - using psqt_vec_t = __m256i; - #define vec_load(a) _mm512_load_si512(a) - #define vec_store(a,b) _mm512_store_si512(a,b) - #define vec_add_16(a,b) _mm512_add_epi16(a,b) - #define vec_sub_16(a,b) _mm512_sub_epi16(a,b) - #define vec_mul_16(a,b) _mm512_mullo_epi16(a,b) - #define vec_zero() _mm512_setzero_epi32() - #define vec_set_16(a) _mm512_set1_epi16(a) - #define vec_max_16(a,b) _mm512_max_epi16(a,b) - #define vec_min_16(a,b) _mm512_min_epi16(a,b) - inline vec_t vec_msb_pack_16(vec_t a, vec_t b){ - vec_t compacted = _mm512_packs_epi16(_mm512_srli_epi16(a,7),_mm512_srli_epi16(b,7)); +using BiasType = std::int16_t; +using WeightType = std::int16_t; +using PSQTWeightType = std::int32_t; + +// If vector instructions are enabled, we update and refresh the +// accumulator tile by tile such that each tile fits in the CPU's +// vector registers. +#define VECTOR + +static_assert(PSQTBuckets % 8 == 0, + "Per feature PSQT values cannot be processed at granularity lower than 8 at a time."); + +#ifdef USE_AVX512 +using vec_t = __m512i; +using psqt_vec_t = __m256i; + #define vec_load(a) _mm512_load_si512(a) + #define vec_store(a, b) _mm512_store_si512(a, b) + #define vec_add_16(a, b) _mm512_add_epi16(a, b) + #define vec_sub_16(a, b) _mm512_sub_epi16(a, b) + #define vec_mul_16(a, b) _mm512_mullo_epi16(a, b) + #define vec_zero() _mm512_setzero_epi32() + #define vec_set_16(a) _mm512_set1_epi16(a) + #define vec_max_16(a, b) _mm512_max_epi16(a, b) + #define vec_min_16(a, b) _mm512_min_epi16(a, b) +inline vec_t vec_msb_pack_16(vec_t a, vec_t b) { + vec_t compacted = _mm512_packs_epi16(_mm512_srli_epi16(a, 7), _mm512_srli_epi16(b, 7)); return _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 2, 4, 6, 1, 3, 5, 7), compacted); - } - #define vec_load_psqt(a) _mm256_load_si256(a) - #define vec_store_psqt(a,b) _mm256_store_si256(a,b) - #define vec_add_psqt_32(a,b) _mm256_add_epi32(a,b) - #define vec_sub_psqt_32(a,b) _mm256_sub_epi32(a,b) - #define vec_zero_psqt() _mm256_setzero_si256() - #define NumRegistersSIMD 16 - #define MaxChunkSize 64 - - #elif USE_AVX2 - using vec_t = __m256i; - using psqt_vec_t = __m256i; - #define vec_load(a) _mm256_load_si256(a) - #define vec_store(a,b) _mm256_store_si256(a,b) - #define vec_add_16(a,b) _mm256_add_epi16(a,b) - #define vec_sub_16(a,b) _mm256_sub_epi16(a,b) - #define vec_mul_16(a,b) _mm256_mullo_epi16(a,b) - #define vec_zero() _mm256_setzero_si256() - #define vec_set_16(a) _mm256_set1_epi16(a) - #define vec_max_16(a,b) _mm256_max_epi16(a,b) - #define vec_min_16(a,b) _mm256_min_epi16(a,b) - inline vec_t vec_msb_pack_16(vec_t a, vec_t b){ - vec_t compacted = _mm256_packs_epi16(_mm256_srli_epi16(a,7), _mm256_srli_epi16(b,7)); +} + #define vec_load_psqt(a) _mm256_load_si256(a) + #define vec_store_psqt(a, b) _mm256_store_si256(a, b) + #define vec_add_psqt_32(a, b) _mm256_add_epi32(a, b) + #define vec_sub_psqt_32(a, b) _mm256_sub_epi32(a, b) + #define vec_zero_psqt() _mm256_setzero_si256() + #define NumRegistersSIMD 16 + #define MaxChunkSize 64 + +#elif USE_AVX2 +using vec_t = __m256i; +using psqt_vec_t = __m256i; + #define vec_load(a) _mm256_load_si256(a) + #define vec_store(a, b) _mm256_store_si256(a, b) + #define vec_add_16(a, b) _mm256_add_epi16(a, b) + #define vec_sub_16(a, b) _mm256_sub_epi16(a, b) + #define vec_mul_16(a, b) _mm256_mullo_epi16(a, b) + #define vec_zero() _mm256_setzero_si256() + #define vec_set_16(a) _mm256_set1_epi16(a) + #define vec_max_16(a, b) _mm256_max_epi16(a, b) + #define vec_min_16(a, b) _mm256_min_epi16(a, b) +inline vec_t vec_msb_pack_16(vec_t a, vec_t b) { + vec_t compacted = _mm256_packs_epi16(_mm256_srli_epi16(a, 7), _mm256_srli_epi16(b, 7)); return _mm256_permute4x64_epi64(compacted, 0b11011000); - } - #define vec_load_psqt(a) _mm256_load_si256(a) - #define vec_store_psqt(a,b) _mm256_store_si256(a,b) - #define vec_add_psqt_32(a,b) _mm256_add_epi32(a,b) - #define vec_sub_psqt_32(a,b) _mm256_sub_epi32(a,b) - #define vec_zero_psqt() _mm256_setzero_si256() - #define NumRegistersSIMD 16 - #define MaxChunkSize 32 - - #elif USE_SSE2 - using vec_t = __m128i; - using psqt_vec_t = __m128i; - #define vec_load(a) (*(a)) - #define vec_store(a,b) *(a)=(b) - #define vec_add_16(a,b) _mm_add_epi16(a,b) - #define vec_sub_16(a,b) _mm_sub_epi16(a,b) - #define vec_mul_16(a,b) _mm_mullo_epi16(a,b) - #define vec_zero() _mm_setzero_si128() - #define vec_set_16(a) _mm_set1_epi16(a) - #define vec_max_16(a,b) _mm_max_epi16(a,b) - #define vec_min_16(a,b) _mm_min_epi16(a,b) - #define vec_msb_pack_16(a,b) _mm_packs_epi16(_mm_srli_epi16(a,7),_mm_srli_epi16(b,7)) - #define vec_load_psqt(a) (*(a)) - #define vec_store_psqt(a,b) *(a)=(b) - #define vec_add_psqt_32(a,b) _mm_add_epi32(a,b) - #define vec_sub_psqt_32(a,b) _mm_sub_epi32(a,b) - #define vec_zero_psqt() _mm_setzero_si128() - #define NumRegistersSIMD (Is64Bit ? 16 : 8) - #define MaxChunkSize 16 - - #elif USE_NEON - using vec_t = int16x8_t; - using psqt_vec_t = int32x4_t; - #define vec_load(a) (*(a)) - #define vec_store(a,b) *(a)=(b) - #define vec_add_16(a,b) vaddq_s16(a,b) - #define vec_sub_16(a,b) vsubq_s16(a,b) - #define vec_mul_16(a,b) vmulq_s16(a,b) - #define vec_zero() vec_t{0} - #define vec_set_16(a) vdupq_n_s16(a) - #define vec_max_16(a,b) vmaxq_s16(a,b) - #define vec_min_16(a,b) vminq_s16(a,b) - inline vec_t vec_msb_pack_16(vec_t a, vec_t b){ - const int8x8_t shifta = vshrn_n_s16(a, 7); - const int8x8_t shiftb = vshrn_n_s16(b, 7); - const int8x16_t compacted = vcombine_s8(shifta,shiftb); - return *reinterpret_cast (&compacted); - } - #define vec_load_psqt(a) (*(a)) - #define vec_store_psqt(a,b) *(a)=(b) - #define vec_add_psqt_32(a,b) vaddq_s32(a,b) - #define vec_sub_psqt_32(a,b) vsubq_s32(a,b) - #define vec_zero_psqt() psqt_vec_t{0} - #define NumRegistersSIMD 16 - #define MaxChunkSize 16 - - #else - #undef VECTOR - - #endif - - - #ifdef VECTOR - - // Compute optimal SIMD register count for feature transformer accumulation. - - // We use __m* types as template arguments, which causes GCC to emit warnings - // about losing some attribute information. This is irrelevant to us as we - // only take their size, so the following pragma are harmless. - #if defined(__GNUC__) - #pragma GCC diagnostic push - #pragma GCC diagnostic ignored "-Wignored-attributes" - #endif - - template - static constexpr int BestRegisterCount() - { - #define RegisterSize sizeof(SIMDRegisterType) - #define LaneSize sizeof(LaneType) - - static_assert(RegisterSize >= LaneSize); - static_assert(MaxRegisters <= NumRegistersSIMD); - static_assert(MaxRegisters > 0); - static_assert(NumRegistersSIMD > 0); - static_assert(RegisterSize % LaneSize == 0); - static_assert((NumLanes * LaneSize) % RegisterSize == 0); - - const int ideal = (NumLanes * LaneSize) / RegisterSize; - if (ideal <= MaxRegisters) - return ideal; - - // Look for the largest divisor of the ideal register count that is smaller than MaxRegisters - for (int divisor = MaxRegisters; divisor > 1; --divisor) - if (ideal % divisor == 0) - return divisor; - - return 1; - } - - static constexpr int NumRegs = BestRegisterCount(); - static constexpr int NumPsqtRegs = BestRegisterCount(); - #if defined(__GNUC__) - #pragma GCC diagnostic pop - #endif - #endif - - - - // Input feature converter - class FeatureTransformer { +} + #define vec_load_psqt(a) _mm256_load_si256(a) + #define vec_store_psqt(a, b) _mm256_store_si256(a, b) + #define vec_add_psqt_32(a, b) _mm256_add_epi32(a, b) + #define vec_sub_psqt_32(a, b) _mm256_sub_epi32(a, b) + #define vec_zero_psqt() _mm256_setzero_si256() + #define NumRegistersSIMD 16 + #define MaxChunkSize 32 + +#elif USE_SSE2 +using vec_t = __m128i; +using psqt_vec_t = __m128i; + #define vec_load(a) (*(a)) + #define vec_store(a, b) *(a) = (b) + #define vec_add_16(a, b) _mm_add_epi16(a, b) + #define vec_sub_16(a, b) _mm_sub_epi16(a, b) + #define vec_mul_16(a, b) _mm_mullo_epi16(a, b) + #define vec_zero() _mm_setzero_si128() + #define vec_set_16(a) _mm_set1_epi16(a) + #define vec_max_16(a, b) _mm_max_epi16(a, b) + #define vec_min_16(a, b) _mm_min_epi16(a, b) + #define vec_msb_pack_16(a, b) _mm_packs_epi16(_mm_srli_epi16(a, 7), _mm_srli_epi16(b, 7)) + #define vec_load_psqt(a) (*(a)) + #define vec_store_psqt(a, b) *(a) = (b) + #define vec_add_psqt_32(a, b) _mm_add_epi32(a, b) + #define vec_sub_psqt_32(a, b) _mm_sub_epi32(a, b) + #define vec_zero_psqt() _mm_setzero_si128() + #define NumRegistersSIMD (Is64Bit ? 16 : 8) + #define MaxChunkSize 16 + +#elif USE_NEON +using vec_t = int16x8_t; +using psqt_vec_t = int32x4_t; + #define vec_load(a) (*(a)) + #define vec_store(a, b) *(a) = (b) + #define vec_add_16(a, b) vaddq_s16(a, b) + #define vec_sub_16(a, b) vsubq_s16(a, b) + #define vec_mul_16(a, b) vmulq_s16(a, b) + #define vec_zero() \ + vec_t { 0 } + #define vec_set_16(a) vdupq_n_s16(a) + #define vec_max_16(a, b) vmaxq_s16(a, b) + #define vec_min_16(a, b) vminq_s16(a, b) +inline vec_t vec_msb_pack_16(vec_t a, vec_t b) { + const int8x8_t shifta = vshrn_n_s16(a, 7); + const int8x8_t shiftb = vshrn_n_s16(b, 7); + const int8x16_t compacted = vcombine_s8(shifta, shiftb); + return *reinterpret_cast(&compacted); +} + #define vec_load_psqt(a) (*(a)) + #define vec_store_psqt(a, b) *(a) = (b) + #define vec_add_psqt_32(a, b) vaddq_s32(a, b) + #define vec_sub_psqt_32(a, b) vsubq_s32(a, b) + #define vec_zero_psqt() \ + psqt_vec_t { 0 } + #define NumRegistersSIMD 16 + #define MaxChunkSize 16 + +#else + #undef VECTOR + +#endif + + +#ifdef VECTOR + + // Compute optimal SIMD register count for feature transformer accumulation. + + // We use __m* types as template arguments, which causes GCC to emit warnings + // about losing some attribute information. This is irrelevant to us as we + // only take their size, so the following pragma are harmless. + #if defined(__GNUC__) + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wignored-attributes" + #endif + +template +static constexpr int BestRegisterCount() { + #define RegisterSize sizeof(SIMDRegisterType) + #define LaneSize sizeof(LaneType) + + static_assert(RegisterSize >= LaneSize); + static_assert(MaxRegisters <= NumRegistersSIMD); + static_assert(MaxRegisters > 0); + static_assert(NumRegistersSIMD > 0); + static_assert(RegisterSize % LaneSize == 0); + static_assert((NumLanes * LaneSize) % RegisterSize == 0); + + const int ideal = (NumLanes * LaneSize) / RegisterSize; + if (ideal <= MaxRegisters) + return ideal; + + // Look for the largest divisor of the ideal register count that is smaller than MaxRegisters + for (int divisor = MaxRegisters; divisor > 1; --divisor) + if (ideal % divisor == 0) + return divisor; + + return 1; +} + +static constexpr int NumRegs = + BestRegisterCount(); +static constexpr int NumPsqtRegs = + BestRegisterCount(); + #if defined(__GNUC__) + #pragma GCC diagnostic pop + #endif +#endif + + +// Input feature converter +class FeatureTransformer { private: // Number of output dimensions for one side static constexpr IndexType HalfDimensions = TransformedFeatureDimensions; - #ifdef VECTOR - static constexpr IndexType TileHeight = NumRegs * sizeof(vec_t) / 2; +#ifdef VECTOR + static constexpr IndexType TileHeight = NumRegs * sizeof(vec_t) / 2; static constexpr IndexType PsqtTileHeight = NumPsqtRegs * sizeof(psqt_vec_t) / 4; static_assert(HalfDimensions % TileHeight == 0, "TileHeight must divide HalfDimensions"); static_assert(PSQTBuckets % PsqtTileHeight == 0, "PsqtTileHeight must divide PSQTBuckets"); - #endif +#endif public: // Output type using OutputType = TransformedFeatureType; // Number of input/output dimensions - static constexpr IndexType InputDimensions = FeatureSet::Dimensions; + static constexpr IndexType InputDimensions = FeatureSet::Dimensions; static constexpr IndexType OutputDimensions = HalfDimensions; // Size of forward propagation buffer - static constexpr std::size_t BufferSize = - OutputDimensions * sizeof(OutputType); + static constexpr std::size_t BufferSize = OutputDimensions * sizeof(OutputType); // Hash value embedded in the evaluation file static constexpr std::uint32_t get_hash_value() { - return FeatureSet::HashValue ^ (OutputDimensions * 2); + return FeatureSet::HashValue ^ (OutputDimensions * 2); } // Read network parameters bool read_parameters(std::istream& stream) { - read_leb_128(stream, biases , HalfDimensions ); - read_leb_128(stream, weights , HalfDimensions * InputDimensions); - read_leb_128(stream, psqtWeights, PSQTBuckets * InputDimensions); + read_leb_128(stream, biases, HalfDimensions); + read_leb_128(stream, weights, HalfDimensions * InputDimensions); + read_leb_128(stream, psqtWeights, PSQTBuckets * InputDimensions); - return !stream.fail(); + return !stream.fail(); } // Write network parameters bool write_parameters(std::ostream& stream) const { - write_leb_128(stream, biases , HalfDimensions ); - write_leb_128(stream, weights , HalfDimensions * InputDimensions); - write_leb_128(stream, psqtWeights, PSQTBuckets * InputDimensions); + write_leb_128(stream, biases, HalfDimensions); + write_leb_128(stream, weights, HalfDimensions * InputDimensions); + write_leb_128(stream, psqtWeights, PSQTBuckets * InputDimensions); - return !stream.fail(); + return !stream.fail(); } // Convert input features std::int32_t transform(const Position& pos, OutputType* output, int bucket) const { - update_accumulator(pos); - update_accumulator(pos); + update_accumulator(pos); + update_accumulator(pos); - const Color perspectives[2] = {pos.side_to_move(), ~pos.side_to_move()}; - const auto& accumulation = pos.state()->accumulator.accumulation; - const auto& psqtAccumulation = pos.state()->accumulator.psqtAccumulation; + const Color perspectives[2] = {pos.side_to_move(), ~pos.side_to_move()}; + const auto& accumulation = pos.state()->accumulator.accumulation; + const auto& psqtAccumulation = pos.state()->accumulator.psqtAccumulation; - const auto psqt = ( - psqtAccumulation[perspectives[0]][bucket] - - psqtAccumulation[perspectives[1]][bucket] - ) / 2; + const auto psqt = + (psqtAccumulation[perspectives[0]][bucket] - psqtAccumulation[perspectives[1]][bucket]) + / 2; - for (IndexType p = 0; p < 2; ++p) - { - const IndexType offset = (HalfDimensions / 2) * p; + for (IndexType p = 0; p < 2; ++p) { + const IndexType offset = (HalfDimensions / 2) * p; #if defined(VECTOR) - constexpr IndexType OutputChunkSize = MaxChunkSize; - static_assert((HalfDimensions / 2) % OutputChunkSize == 0); - constexpr IndexType NumOutputChunks = HalfDimensions / 2 / OutputChunkSize; + constexpr IndexType OutputChunkSize = MaxChunkSize; + static_assert((HalfDimensions / 2) % OutputChunkSize == 0); + constexpr IndexType NumOutputChunks = HalfDimensions / 2 / OutputChunkSize; - vec_t Zero = vec_zero(); - vec_t One = vec_set_16(127); + vec_t Zero = vec_zero(); + vec_t One = vec_set_16(127); - const vec_t* in0 = reinterpret_cast(&(accumulation[perspectives[p]][0])); - const vec_t* in1 = reinterpret_cast(&(accumulation[perspectives[p]][HalfDimensions / 2])); - vec_t* out = reinterpret_cast< vec_t*>(output + offset); + const vec_t* in0 = reinterpret_cast(&(accumulation[perspectives[p]][0])); + const vec_t* in1 = + reinterpret_cast(&(accumulation[perspectives[p]][HalfDimensions / 2])); + vec_t* out = reinterpret_cast(output + offset); - for (IndexType j = 0; j < NumOutputChunks; j += 1) - { - const vec_t sum0a = vec_max_16(vec_min_16(in0[j * 2 + 0], One), Zero); - const vec_t sum0b = vec_max_16(vec_min_16(in0[j * 2 + 1], One), Zero); - const vec_t sum1a = vec_max_16(vec_min_16(in1[j * 2 + 0], One), Zero); - const vec_t sum1b = vec_max_16(vec_min_16(in1[j * 2 + 1], One), Zero); + for (IndexType j = 0; j < NumOutputChunks; j += 1) { + const vec_t sum0a = vec_max_16(vec_min_16(in0[j * 2 + 0], One), Zero); + const vec_t sum0b = vec_max_16(vec_min_16(in0[j * 2 + 1], One), Zero); + const vec_t sum1a = vec_max_16(vec_min_16(in1[j * 2 + 0], One), Zero); + const vec_t sum1b = vec_max_16(vec_min_16(in1[j * 2 + 1], One), Zero); - const vec_t pa = vec_mul_16(sum0a, sum1a); - const vec_t pb = vec_mul_16(sum0b, sum1b); + const vec_t pa = vec_mul_16(sum0a, sum1a); + const vec_t pb = vec_mul_16(sum0b, sum1b); - out[j] = vec_msb_pack_16(pa, pb); - } + out[j] = vec_msb_pack_16(pa, pb); + } #else - for (IndexType j = 0; j < HalfDimensions / 2; ++j) { - BiasType sum0 = accumulation[static_cast(perspectives[p])][j + 0]; - BiasType sum1 = accumulation[static_cast(perspectives[p])][j + HalfDimensions / 2]; - sum0 = std::clamp(sum0, 0, 127); - sum1 = std::clamp(sum1, 0, 127); - output[offset + j] = static_cast(unsigned(sum0 * sum1) / 128); - } + for (IndexType j = 0; j < HalfDimensions / 2; ++j) { + BiasType sum0 = accumulation[static_cast(perspectives[p])][j + 0]; + BiasType sum1 = + accumulation[static_cast(perspectives[p])][j + HalfDimensions / 2]; + sum0 = std::clamp(sum0, 0, 127); + sum1 = std::clamp(sum1, 0, 127); + output[offset + j] = static_cast(unsigned(sum0 * sum1) / 128); + } #endif - } + } - return psqt; - } // end of function transform() + return psqt; + } // end of function transform() void hint_common_access(const Position& pos) const { - hint_common_access_for_perspective(pos); - hint_common_access_for_perspective(pos); + hint_common_access_for_perspective(pos); + hint_common_access_for_perspective(pos); } private: template - [[nodiscard]] std::pair try_find_computed_accumulator(const Position& pos) const { - // Look for a usable accumulator of an earlier position. We keep track - // of the estimated gain in terms of features to be added/subtracted. - StateInfo *st = pos.state(), *next = nullptr; - int gain = FeatureSet::refresh_cost(pos); - while (st->previous && !st->accumulator.computed[Perspective]) - { - // This governs when a full feature refresh is needed and how many - // updates are better than just one full refresh. - if ( FeatureSet::requires_refresh(st, Perspective) - || (gain -= FeatureSet::update_cost(st) + 1) < 0) - break; - next = st; - st = st->previous; - } - return { st, next }; + [[nodiscard]] std::pair + try_find_computed_accumulator(const Position& pos) const { + // Look for a usable accumulator of an earlier position. We keep track + // of the estimated gain in terms of features to be added/subtracted. + StateInfo *st = pos.state(), *next = nullptr; + int gain = FeatureSet::refresh_cost(pos); + while (st->previous && !st->accumulator.computed[Perspective]) { + // This governs when a full feature refresh is needed and how many + // updates are better than just one full refresh. + if (FeatureSet::requires_refresh(st, Perspective) + || (gain -= FeatureSet::update_cost(st) + 1) < 0) + break; + next = st; + st = st->previous; + } + return {st, next}; } // NOTE: The parameter states_to_update is an array of position states, ending with nullptr. @@ -340,364 +337,342 @@ namespace Stockfish::Eval::NNUE { // by repeatedly applying ->previous from states_to_update[i+1] or states_to_update[i] == nullptr. // computed_st must be reachable by repeatedly applying ->previous on states_to_update[0], if not nullptr. template - void update_accumulator_incremental(const Position& pos, StateInfo* computed_st, StateInfo* states_to_update[N]) const { - static_assert(N > 0); - assert(states_to_update[N-1] == nullptr); + void update_accumulator_incremental(const Position& pos, + StateInfo* computed_st, + StateInfo* states_to_update[N]) const { + static_assert(N > 0); + assert(states_to_update[N - 1] == nullptr); - #ifdef VECTOR - // Gcc-10.2 unnecessarily spills AVX2 registers if this array - // is defined in the VECTOR code below, once in each branch - vec_t acc[NumRegs]; - psqt_vec_t psqt[NumPsqtRegs]; - #endif +#ifdef VECTOR + // Gcc-10.2 unnecessarily spills AVX2 registers if this array + // is defined in the VECTOR code below, once in each branch + vec_t acc[NumRegs]; + psqt_vec_t psqt[NumPsqtRegs]; +#endif - if (states_to_update[0] == nullptr) - return; + if (states_to_update[0] == nullptr) + return; - // Update incrementally going back through states_to_update. + // Update incrementally going back through states_to_update. - // Gather all features to be updated. - const Square ksq = pos.square(Perspective); + // Gather all features to be updated. + const Square ksq = pos.square(Perspective); - // The size must be enough to contain the largest possible update. - // That might depend on the feature set and generally relies on the - // feature set's update cost calculation to be correct and never - // allow updates with more added/removed features than MaxActiveDimensions. - FeatureSet::IndexList removed[N-1], added[N-1]; + // The size must be enough to contain the largest possible update. + // That might depend on the feature set and generally relies on the + // feature set's update cost calculation to be correct and never + // allow updates with more added/removed features than MaxActiveDimensions. + FeatureSet::IndexList removed[N - 1], added[N - 1]; - { - int i = N-2; // last potential state to update. Skip last element because it must be nullptr. - while (states_to_update[i] == nullptr) - --i; + { + int i = + N + - 2; // last potential state to update. Skip last element because it must be nullptr. + while (states_to_update[i] == nullptr) + --i; - StateInfo* st2 = states_to_update[i]; + StateInfo* st2 = states_to_update[i]; - for (; i >= 0; --i) - { - states_to_update[i]->accumulator.computed[Perspective] = true; + for (; i >= 0; --i) { + states_to_update[i]->accumulator.computed[Perspective] = true; - const StateInfo* end_state = i == 0 ? computed_st : states_to_update[i - 1]; + const StateInfo* end_state = i == 0 ? computed_st : states_to_update[i - 1]; - for (; st2 != end_state; st2 = st2->previous) - FeatureSet::append_changed_indices( - ksq, st2->dirtyPiece, removed[i], added[i]); + for (; st2 != end_state; st2 = st2->previous) + FeatureSet::append_changed_indices(ksq, st2->dirtyPiece, + removed[i], added[i]); + } } - } - StateInfo* st = computed_st; + StateInfo* st = computed_st; - // Now update the accumulators listed in states_to_update[], where the last element is a sentinel. + // Now update the accumulators listed in states_to_update[], where the last element is a sentinel. #ifdef VECTOR - if ( states_to_update[1] == nullptr - && (removed[0].size() == 1 || removed[0].size() == 2) - && added[0].size() == 1) - { - assert(states_to_update[0]); + if (states_to_update[1] == nullptr && (removed[0].size() == 1 || removed[0].size() == 2) + && added[0].size() == 1) { + assert(states_to_update[0]); - auto accIn = reinterpret_cast( - &st->accumulator.accumulation[Perspective][0]); - auto accOut = reinterpret_cast( + auto accIn = + reinterpret_cast(&st->accumulator.accumulation[Perspective][0]); + auto accOut = reinterpret_cast( &states_to_update[0]->accumulator.accumulation[Perspective][0]); - const IndexType offsetR0 = HalfDimensions * removed[0][0]; - auto columnR0 = reinterpret_cast(&weights[offsetR0]); - const IndexType offsetA = HalfDimensions * added[0][0]; - auto columnA = reinterpret_cast(&weights[offsetA]); - - if (removed[0].size() == 1) - { - for (IndexType k = 0; k < HalfDimensions * sizeof(std::int16_t) / sizeof(vec_t); ++k) - accOut[k] = vec_add_16(vec_sub_16(accIn[k], columnR0[k]), columnA[k]); - } - else - { - const IndexType offsetR1 = HalfDimensions * removed[0][1]; - auto columnR1 = reinterpret_cast(&weights[offsetR1]); - - for (IndexType k = 0; k < HalfDimensions * sizeof(std::int16_t) / sizeof(vec_t); ++k) - accOut[k] = vec_sub_16( - vec_add_16(accIn[k], columnA[k]), - vec_add_16(columnR0[k], columnR1[k])); - } - - auto accPsqtIn = reinterpret_cast( + const IndexType offsetR0 = HalfDimensions * removed[0][0]; + auto columnR0 = reinterpret_cast(&weights[offsetR0]); + const IndexType offsetA = HalfDimensions * added[0][0]; + auto columnA = reinterpret_cast(&weights[offsetA]); + + if (removed[0].size() == 1) { + for (IndexType k = 0; k < HalfDimensions * sizeof(std::int16_t) / sizeof(vec_t); + ++k) + accOut[k] = vec_add_16(vec_sub_16(accIn[k], columnR0[k]), columnA[k]); + } else { + const IndexType offsetR1 = HalfDimensions * removed[0][1]; + auto columnR1 = reinterpret_cast(&weights[offsetR1]); + + for (IndexType k = 0; k < HalfDimensions * sizeof(std::int16_t) / sizeof(vec_t); + ++k) + accOut[k] = vec_sub_16(vec_add_16(accIn[k], columnA[k]), + vec_add_16(columnR0[k], columnR1[k])); + } + + auto accPsqtIn = reinterpret_cast( &st->accumulator.psqtAccumulation[Perspective][0]); - auto accPsqtOut = reinterpret_cast( + auto accPsqtOut = reinterpret_cast( &states_to_update[0]->accumulator.psqtAccumulation[Perspective][0]); - const IndexType offsetPsqtR0 = PSQTBuckets * removed[0][0]; - auto columnPsqtR0 = reinterpret_cast(&psqtWeights[offsetPsqtR0]); - const IndexType offsetPsqtA = PSQTBuckets * added[0][0]; - auto columnPsqtA = reinterpret_cast(&psqtWeights[offsetPsqtA]); - - if (removed[0].size() == 1) - { - for (std::size_t k = 0; k < PSQTBuckets * sizeof(std::int32_t) / sizeof(psqt_vec_t); ++k) - accPsqtOut[k] = vec_add_psqt_32(vec_sub_psqt_32( - accPsqtIn[k], columnPsqtR0[k]), columnPsqtA[k]); - } - else - { - const IndexType offsetPsqtR1 = PSQTBuckets * removed[0][1]; - auto columnPsqtR1 = reinterpret_cast(&psqtWeights[offsetPsqtR1]); - - for (std::size_t k = 0; k < PSQTBuckets * sizeof(std::int32_t) / sizeof(psqt_vec_t); ++k) - accPsqtOut[k] = vec_sub_psqt_32( - vec_add_psqt_32(accPsqtIn[k], columnPsqtA[k]), + const IndexType offsetPsqtR0 = PSQTBuckets * removed[0][0]; + auto columnPsqtR0 = reinterpret_cast(&psqtWeights[offsetPsqtR0]); + const IndexType offsetPsqtA = PSQTBuckets * added[0][0]; + auto columnPsqtA = reinterpret_cast(&psqtWeights[offsetPsqtA]); + + if (removed[0].size() == 1) { + for (std::size_t k = 0; k < PSQTBuckets * sizeof(std::int32_t) / sizeof(psqt_vec_t); + ++k) + accPsqtOut[k] = vec_add_psqt_32(vec_sub_psqt_32(accPsqtIn[k], columnPsqtR0[k]), + columnPsqtA[k]); + } else { + const IndexType offsetPsqtR1 = PSQTBuckets * removed[0][1]; + auto columnPsqtR1 = reinterpret_cast(&psqtWeights[offsetPsqtR1]); + + for (std::size_t k = 0; k < PSQTBuckets * sizeof(std::int32_t) / sizeof(psqt_vec_t); + ++k) + accPsqtOut[k] = + vec_sub_psqt_32(vec_add_psqt_32(accPsqtIn[k], columnPsqtA[k]), vec_add_psqt_32(columnPsqtR0[k], columnPsqtR1[k])); - } - } - else - { - for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j) - { - // Load accumulator - auto accTileIn = reinterpret_cast( - &st->accumulator.accumulation[Perspective][j * TileHeight]); - for (IndexType k = 0; k < NumRegs; ++k) - acc[k] = vec_load(&accTileIn[k]); - - for (IndexType i = 0; states_to_update[i]; ++i) - { - // Difference calculation for the deactivated features - for (const auto index : removed[i]) - { - const IndexType offset = HalfDimensions * index + j * TileHeight; - auto column = reinterpret_cast(&weights[offset]); - for (IndexType k = 0; k < NumRegs; ++k) - acc[k] = vec_sub_16(acc[k], column[k]); - } - - // Difference calculation for the activated features - for (const auto index : added[i]) - { - const IndexType offset = HalfDimensions * index + j * TileHeight; - auto column = reinterpret_cast(&weights[offset]); + } + } else { + for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j) { + // Load accumulator + auto accTileIn = reinterpret_cast( + &st->accumulator.accumulation[Perspective][j * TileHeight]); for (IndexType k = 0; k < NumRegs; ++k) - acc[k] = vec_add_16(acc[k], column[k]); - } - - // Store accumulator - auto accTileOut = reinterpret_cast( - &states_to_update[i]->accumulator.accumulation[Perspective][j * TileHeight]); - for (IndexType k = 0; k < NumRegs; ++k) - vec_store(&accTileOut[k], acc[k]); + acc[k] = vec_load(&accTileIn[k]); + + for (IndexType i = 0; states_to_update[i]; ++i) { + // Difference calculation for the deactivated features + for (const auto index : removed[i]) { + const IndexType offset = HalfDimensions * index + j * TileHeight; + auto column = reinterpret_cast(&weights[offset]); + for (IndexType k = 0; k < NumRegs; ++k) + acc[k] = vec_sub_16(acc[k], column[k]); + } + + // Difference calculation for the activated features + for (const auto index : added[i]) { + const IndexType offset = HalfDimensions * index + j * TileHeight; + auto column = reinterpret_cast(&weights[offset]); + for (IndexType k = 0; k < NumRegs; ++k) + acc[k] = vec_add_16(acc[k], column[k]); + } + + // Store accumulator + auto accTileOut = reinterpret_cast( + &states_to_update[i]->accumulator.accumulation[Perspective][j * TileHeight]); + for (IndexType k = 0; k < NumRegs; ++k) + vec_store(&accTileOut[k], acc[k]); + } } - } - - for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j) - { - // Load accumulator - auto accTilePsqtIn = reinterpret_cast( - &st->accumulator.psqtAccumulation[Perspective][j * PsqtTileHeight]); - for (std::size_t k = 0; k < NumPsqtRegs; ++k) - psqt[k] = vec_load_psqt(&accTilePsqtIn[k]); - for (IndexType i = 0; states_to_update[i]; ++i) - { - // Difference calculation for the deactivated features - for (const auto index : removed[i]) - { - const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight; - auto columnPsqt = reinterpret_cast(&psqtWeights[offset]); - for (std::size_t k = 0; k < NumPsqtRegs; ++k) - psqt[k] = vec_sub_psqt_32(psqt[k], columnPsqt[k]); - } - - // Difference calculation for the activated features - for (const auto index : added[i]) - { - const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight; - auto columnPsqt = reinterpret_cast(&psqtWeights[offset]); + for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j) { + // Load accumulator + auto accTilePsqtIn = reinterpret_cast( + &st->accumulator.psqtAccumulation[Perspective][j * PsqtTileHeight]); for (std::size_t k = 0; k < NumPsqtRegs; ++k) - psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]); - } - - // Store accumulator - auto accTilePsqtOut = reinterpret_cast( - &states_to_update[i]->accumulator.psqtAccumulation[Perspective][j * PsqtTileHeight]); - for (std::size_t k = 0; k < NumPsqtRegs; ++k) - vec_store_psqt(&accTilePsqtOut[k], psqt[k]); + psqt[k] = vec_load_psqt(&accTilePsqtIn[k]); + + for (IndexType i = 0; states_to_update[i]; ++i) { + // Difference calculation for the deactivated features + for (const auto index : removed[i]) { + const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight; + auto columnPsqt = reinterpret_cast(&psqtWeights[offset]); + for (std::size_t k = 0; k < NumPsqtRegs; ++k) + psqt[k] = vec_sub_psqt_32(psqt[k], columnPsqt[k]); + } + + // Difference calculation for the activated features + for (const auto index : added[i]) { + const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight; + auto columnPsqt = reinterpret_cast(&psqtWeights[offset]); + for (std::size_t k = 0; k < NumPsqtRegs; ++k) + psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]); + } + + // Store accumulator + auto accTilePsqtOut = reinterpret_cast( + &states_to_update[i] + ->accumulator.psqtAccumulation[Perspective][j * PsqtTileHeight]); + for (std::size_t k = 0; k < NumPsqtRegs; ++k) + vec_store_psqt(&accTilePsqtOut[k], psqt[k]); + } } - } - } + } #else - for (IndexType i = 0; states_to_update[i]; ++i) - { - std::memcpy(states_to_update[i]->accumulator.accumulation[Perspective], - st->accumulator.accumulation[Perspective], - HalfDimensions * sizeof(BiasType)); + for (IndexType i = 0; states_to_update[i]; ++i) { + std::memcpy(states_to_update[i]->accumulator.accumulation[Perspective], + st->accumulator.accumulation[Perspective], + HalfDimensions * sizeof(BiasType)); - for (std::size_t k = 0; k < PSQTBuckets; ++k) - states_to_update[i]->accumulator.psqtAccumulation[Perspective][k] = st->accumulator.psqtAccumulation[Perspective][k]; + for (std::size_t k = 0; k < PSQTBuckets; ++k) + states_to_update[i]->accumulator.psqtAccumulation[Perspective][k] = + st->accumulator.psqtAccumulation[Perspective][k]; - st = states_to_update[i]; + st = states_to_update[i]; - // Difference calculation for the deactivated features - for (const auto index : removed[i]) - { - const IndexType offset = HalfDimensions * index; + // Difference calculation for the deactivated features + for (const auto index : removed[i]) { + const IndexType offset = HalfDimensions * index; - for (IndexType j = 0; j < HalfDimensions; ++j) - st->accumulator.accumulation[Perspective][j] -= weights[offset + j]; + for (IndexType j = 0; j < HalfDimensions; ++j) + st->accumulator.accumulation[Perspective][j] -= weights[offset + j]; - for (std::size_t k = 0; k < PSQTBuckets; ++k) - st->accumulator.psqtAccumulation[Perspective][k] -= psqtWeights[index * PSQTBuckets + k]; - } + for (std::size_t k = 0; k < PSQTBuckets; ++k) + st->accumulator.psqtAccumulation[Perspective][k] -= + psqtWeights[index * PSQTBuckets + k]; + } - // Difference calculation for the activated features - for (const auto index : added[i]) - { - const IndexType offset = HalfDimensions * index; + // Difference calculation for the activated features + for (const auto index : added[i]) { + const IndexType offset = HalfDimensions * index; - for (IndexType j = 0; j < HalfDimensions; ++j) - st->accumulator.accumulation[Perspective][j] += weights[offset + j]; + for (IndexType j = 0; j < HalfDimensions; ++j) + st->accumulator.accumulation[Perspective][j] += weights[offset + j]; - for (std::size_t k = 0; k < PSQTBuckets; ++k) - st->accumulator.psqtAccumulation[Perspective][k] += psqtWeights[index * PSQTBuckets + k]; + for (std::size_t k = 0; k < PSQTBuckets; ++k) + st->accumulator.psqtAccumulation[Perspective][k] += + psqtWeights[index * PSQTBuckets + k]; + } } - } #endif } template void update_accumulator_refresh(const Position& pos) const { - #ifdef VECTOR - // Gcc-10.2 unnecessarily spills AVX2 registers if this array - // is defined in the VECTOR code below, once in each branch - vec_t acc[NumRegs]; - psqt_vec_t psqt[NumPsqtRegs]; - #endif - - // Refresh the accumulator - // Could be extracted to a separate function because it's done in 2 places, - // but it's unclear if compilers would correctly handle register allocation. - auto& accumulator = pos.state()->accumulator; - accumulator.computed[Perspective] = true; - FeatureSet::IndexList active; - FeatureSet::append_active_indices(pos, active); +#ifdef VECTOR + // Gcc-10.2 unnecessarily spills AVX2 registers if this array + // is defined in the VECTOR code below, once in each branch + vec_t acc[NumRegs]; + psqt_vec_t psqt[NumPsqtRegs]; +#endif + + // Refresh the accumulator + // Could be extracted to a separate function because it's done in 2 places, + // but it's unclear if compilers would correctly handle register allocation. + auto& accumulator = pos.state()->accumulator; + accumulator.computed[Perspective] = true; + FeatureSet::IndexList active; + FeatureSet::append_active_indices(pos, active); #ifdef VECTOR - for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j) - { - auto biasesTile = reinterpret_cast( - &biases[j * TileHeight]); - for (IndexType k = 0; k < NumRegs; ++k) - acc[k] = biasesTile[k]; - - for (const auto index : active) - { - const IndexType offset = HalfDimensions * index + j * TileHeight; - auto column = reinterpret_cast(&weights[offset]); + for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j) { + auto biasesTile = reinterpret_cast(&biases[j * TileHeight]); + for (IndexType k = 0; k < NumRegs; ++k) + acc[k] = biasesTile[k]; + + for (const auto index : active) { + const IndexType offset = HalfDimensions * index + j * TileHeight; + auto column = reinterpret_cast(&weights[offset]); - for (unsigned k = 0; k < NumRegs; ++k) - acc[k] = vec_add_16(acc[k], column[k]); + for (unsigned k = 0; k < NumRegs; ++k) + acc[k] = vec_add_16(acc[k], column[k]); + } + + auto accTile = + reinterpret_cast(&accumulator.accumulation[Perspective][j * TileHeight]); + for (unsigned k = 0; k < NumRegs; k++) + vec_store(&accTile[k], acc[k]); } - auto accTile = reinterpret_cast( - &accumulator.accumulation[Perspective][j * TileHeight]); - for (unsigned k = 0; k < NumRegs; k++) - vec_store(&accTile[k], acc[k]); - } + for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j) { + for (std::size_t k = 0; k < NumPsqtRegs; ++k) + psqt[k] = vec_zero_psqt(); - for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j) - { - for (std::size_t k = 0; k < NumPsqtRegs; ++k) - psqt[k] = vec_zero_psqt(); + for (const auto index : active) { + const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight; + auto columnPsqt = reinterpret_cast(&psqtWeights[offset]); - for (const auto index : active) - { - const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight; - auto columnPsqt = reinterpret_cast(&psqtWeights[offset]); + for (std::size_t k = 0; k < NumPsqtRegs; ++k) + psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]); + } - for (std::size_t k = 0; k < NumPsqtRegs; ++k) - psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]); + auto accTilePsqt = reinterpret_cast( + &accumulator.psqtAccumulation[Perspective][j * PsqtTileHeight]); + for (std::size_t k = 0; k < NumPsqtRegs; ++k) + vec_store_psqt(&accTilePsqt[k], psqt[k]); } - auto accTilePsqt = reinterpret_cast( - &accumulator.psqtAccumulation[Perspective][j * PsqtTileHeight]); - for (std::size_t k = 0; k < NumPsqtRegs; ++k) - vec_store_psqt(&accTilePsqt[k], psqt[k]); - } - #else - std::memcpy(accumulator.accumulation[Perspective], biases, - HalfDimensions * sizeof(BiasType)); + std::memcpy(accumulator.accumulation[Perspective], biases, + HalfDimensions * sizeof(BiasType)); - for (std::size_t k = 0; k < PSQTBuckets; ++k) - accumulator.psqtAccumulation[Perspective][k] = 0; + for (std::size_t k = 0; k < PSQTBuckets; ++k) + accumulator.psqtAccumulation[Perspective][k] = 0; - for (const auto index : active) - { - const IndexType offset = HalfDimensions * index; + for (const auto index : active) { + const IndexType offset = HalfDimensions * index; - for (IndexType j = 0; j < HalfDimensions; ++j) - accumulator.accumulation[Perspective][j] += weights[offset + j]; + for (IndexType j = 0; j < HalfDimensions; ++j) + accumulator.accumulation[Perspective][j] += weights[offset + j]; - for (std::size_t k = 0; k < PSQTBuckets; ++k) - accumulator.psqtAccumulation[Perspective][k] += psqtWeights[index * PSQTBuckets + k]; - } + for (std::size_t k = 0; k < PSQTBuckets; ++k) + accumulator.psqtAccumulation[Perspective][k] += + psqtWeights[index * PSQTBuckets + k]; + } #endif } template void hint_common_access_for_perspective(const Position& pos) const { - // Works like update_accumulator, but performs less work. - // Updates ONLY the accumulator for pos. - - // Look for a usable accumulator of an earlier position. We keep track - // of the estimated gain in terms of features to be added/subtracted. - // Fast early exit. - if (pos.state()->accumulator.computed[Perspective]) - return; - - auto [oldest_st, _] = try_find_computed_accumulator(pos); - - if (oldest_st->accumulator.computed[Perspective]) - { - // Only update current position accumulator to minimize work. - StateInfo* states_to_update[2] = { pos.state(), nullptr }; - update_accumulator_incremental(pos, oldest_st, states_to_update); - } - else - { - update_accumulator_refresh(pos); - } + // Works like update_accumulator, but performs less work. + // Updates ONLY the accumulator for pos. + + // Look for a usable accumulator of an earlier position. We keep track + // of the estimated gain in terms of features to be added/subtracted. + // Fast early exit. + if (pos.state()->accumulator.computed[Perspective]) + return; + + auto [oldest_st, _] = try_find_computed_accumulator(pos); + + if (oldest_st->accumulator.computed[Perspective]) { + // Only update current position accumulator to minimize work. + StateInfo* states_to_update[2] = {pos.state(), nullptr}; + update_accumulator_incremental(pos, oldest_st, states_to_update); + } else { + update_accumulator_refresh(pos); + } } template void update_accumulator(const Position& pos) const { - auto [oldest_st, next] = try_find_computed_accumulator(pos); + auto [oldest_st, next] = try_find_computed_accumulator(pos); - if (oldest_st->accumulator.computed[Perspective]) - { - if (next == nullptr) - return; + if (oldest_st->accumulator.computed[Perspective]) { + if (next == nullptr) + return; - // Now update the accumulators listed in states_to_update[], where the last element is a sentinel. - // Currently we update 2 accumulators. - // 1. for the current position - // 2. the next accumulator after the computed one - // The heuristic may change in the future. - StateInfo *states_to_update[3] = - { next, next == pos.state() ? nullptr : pos.state(), nullptr }; - - update_accumulator_incremental(pos, oldest_st, states_to_update); - } - else - { - update_accumulator_refresh(pos); - } + // Now update the accumulators listed in states_to_update[], where the last element is a sentinel. + // Currently we update 2 accumulators. + // 1. for the current position + // 2. the next accumulator after the computed one + // The heuristic may change in the future. + StateInfo* states_to_update[3] = {next, next == pos.state() ? nullptr : pos.state(), + nullptr}; + + update_accumulator_incremental(pos, oldest_st, states_to_update); + } else { + update_accumulator_refresh(pos); + } } alignas(CacheLineSize) BiasType biases[HalfDimensions]; alignas(CacheLineSize) WeightType weights[HalfDimensions * InputDimensions]; alignas(CacheLineSize) PSQTWeightType psqtWeights[InputDimensions * PSQTBuckets]; - }; +}; } // namespace Stockfish::Eval::NNUE -#endif // #ifndef NNUE_FEATURE_TRANSFORMER_H_INCLUDED +#endif // #ifndef NNUE_FEATURE_TRANSFORMER_H_INCLUDED diff --git a/src/position.cpp b/src/position.cpp index ada371eb951..b5d6676ca49 100644 --- a/src/position.cpp +++ b/src/position.cpp @@ -46,59 +46,56 @@ namespace Stockfish { namespace Zobrist { - Key psq[PIECE_NB][SQUARE_NB]; - Key enpassant[FILE_NB]; - Key castling[CASTLING_RIGHT_NB]; - Key side; +Key psq[PIECE_NB][SQUARE_NB]; +Key enpassant[FILE_NB]; +Key castling[CASTLING_RIGHT_NB]; +Key side; } namespace { constexpr std::string_view PieceToChar(" PNBRQK pnbrqk"); -constexpr Piece Pieces[] = { W_PAWN, W_KNIGHT, W_BISHOP, W_ROOK, W_QUEEN, W_KING, - B_PAWN, B_KNIGHT, B_BISHOP, B_ROOK, B_QUEEN, B_KING }; -} // namespace +constexpr Piece Pieces[] = {W_PAWN, W_KNIGHT, W_BISHOP, W_ROOK, W_QUEEN, W_KING, + B_PAWN, B_KNIGHT, B_BISHOP, B_ROOK, B_QUEEN, B_KING}; +} // namespace // operator<<(Position) returns an ASCII representation of the position std::ostream& operator<<(std::ostream& os, const Position& pos) { - os << "\n +---+---+---+---+---+---+---+---+\n"; + os << "\n +---+---+---+---+---+---+---+---+\n"; - for (Rank r = RANK_8; r >= RANK_1; --r) - { - for (File f = FILE_A; f <= FILE_H; ++f) - os << " | " << PieceToChar[pos.piece_on(make_square(f, r))]; + for (Rank r = RANK_8; r >= RANK_1; --r) { + for (File f = FILE_A; f <= FILE_H; ++f) + os << " | " << PieceToChar[pos.piece_on(make_square(f, r))]; - os << " | " << (1 + r) << "\n +---+---+---+---+---+---+---+---+\n"; - } - - os << " a b c d e f g h\n" - << "\nFen: " << pos.fen() << "\nKey: " << std::hex << std::uppercase - << std::setfill('0') << std::setw(16) << pos.key() - << std::setfill(' ') << std::dec << "\nCheckers: "; - - for (Bitboard b = pos.checkers(); b; ) - os << UCI::square(pop_lsb(b)) << " "; - - if ( int(Tablebases::MaxCardinality) >= popcount(pos.pieces()) - && !pos.can_castle(ANY_CASTLING)) - { - StateInfo st; - ASSERT_ALIGNED(&st, Eval::NNUE::CacheLineSize); + os << " | " << (1 + r) << "\n +---+---+---+---+---+---+---+---+\n"; + } - Position p; - p.set(pos.fen(), pos.is_chess960(), &st, pos.this_thread()); - Tablebases::ProbeState s1, s2; - Tablebases::WDLScore wdl = Tablebases::probe_wdl(p, &s1); - int dtz = Tablebases::probe_dtz(p, &s2); - os << "\nTablebases WDL: " << std::setw(4) << wdl << " (" << s1 << ")" - << "\nTablebases DTZ: " << std::setw(4) << dtz << " (" << s2 << ")"; - } + os << " a b c d e f g h\n" + << "\nFen: " << pos.fen() << "\nKey: " << std::hex << std::uppercase << std::setfill('0') + << std::setw(16) << pos.key() << std::setfill(' ') << std::dec << "\nCheckers: "; + + for (Bitboard b = pos.checkers(); b;) + os << UCI::square(pop_lsb(b)) << " "; + + if (int(Tablebases::MaxCardinality) >= popcount(pos.pieces()) + && !pos.can_castle(ANY_CASTLING)) { + StateInfo st; + ASSERT_ALIGNED(&st, Eval::NNUE::CacheLineSize); + + Position p; + p.set(pos.fen(), pos.is_chess960(), &st, pos.this_thread()); + Tablebases::ProbeState s1, s2; + Tablebases::WDLScore wdl = Tablebases::probe_wdl(p, &s1); + int dtz = Tablebases::probe_dtz(p, &s2); + os << "\nTablebases WDL: " << std::setw(4) << wdl << " (" << s1 << ")" + << "\nTablebases DTZ: " << std::setw(4) << dtz << " (" << s2 << ")"; + } - return os; + return os; } @@ -112,7 +109,7 @@ inline int H1(Key h) { return h & 0x1fff; } inline int H2(Key h) { return (h >> 16) & 0x1fff; } // Cuckoo tables with Zobrist hashes of valid reversible moves, and the moves themselves -Key cuckoo[8192]; +Key cuckoo[8192]; Move cuckooMove[8192]; @@ -120,43 +117,41 @@ Move cuckooMove[8192]; void Position::init() { - PRNG rng(1070372); - - for (Piece pc : Pieces) - for (Square s = SQ_A1; s <= SQ_H8; ++s) - Zobrist::psq[pc][s] = rng.rand(); - - for (File f = FILE_A; f <= FILE_H; ++f) - Zobrist::enpassant[f] = rng.rand(); - - for (int cr = NO_CASTLING; cr <= ANY_CASTLING; ++cr) - Zobrist::castling[cr] = rng.rand(); - - Zobrist::side = rng.rand(); - - // Prepare the cuckoo tables - std::memset(cuckoo, 0, sizeof(cuckoo)); - std::memset(cuckooMove, 0, sizeof(cuckooMove)); - [[maybe_unused]] int count = 0; - for (Piece pc : Pieces) - for (Square s1 = SQ_A1; s1 <= SQ_H8; ++s1) - for (Square s2 = Square(s1 + 1); s2 <= SQ_H8; ++s2) - if ((type_of(pc) != PAWN) && (attacks_bb(type_of(pc), s1, 0) & s2)) - { - Move move = make_move(s1, s2); - Key key = Zobrist::psq[pc][s1] ^ Zobrist::psq[pc][s2] ^ Zobrist::side; - int i = H1(key); - while (true) - { - std::swap(cuckoo[i], key); - std::swap(cuckooMove[i], move); - if (move == MOVE_NONE) // Arrived at empty slot? - break; - i = (i == H1(key)) ? H2(key) : H1(key); // Push victim to alternative slot - } - count++; - } - assert(count == 3668); + PRNG rng(1070372); + + for (Piece pc : Pieces) + for (Square s = SQ_A1; s <= SQ_H8; ++s) + Zobrist::psq[pc][s] = rng.rand(); + + for (File f = FILE_A; f <= FILE_H; ++f) + Zobrist::enpassant[f] = rng.rand(); + + for (int cr = NO_CASTLING; cr <= ANY_CASTLING; ++cr) + Zobrist::castling[cr] = rng.rand(); + + Zobrist::side = rng.rand(); + + // Prepare the cuckoo tables + std::memset(cuckoo, 0, sizeof(cuckoo)); + std::memset(cuckooMove, 0, sizeof(cuckooMove)); + [[maybe_unused]] int count = 0; + for (Piece pc : Pieces) + for (Square s1 = SQ_A1; s1 <= SQ_H8; ++s1) + for (Square s2 = Square(s1 + 1); s2 <= SQ_H8; ++s2) + if ((type_of(pc) != PAWN) && (attacks_bb(type_of(pc), s1, 0) & s2)) { + Move move = make_move(s1, s2); + Key key = Zobrist::psq[pc][s1] ^ Zobrist::psq[pc][s2] ^ Zobrist::side; + int i = H1(key); + while (true) { + std::swap(cuckoo[i], key); + std::swap(cuckooMove[i], move); + if (move == MOVE_NONE) // Arrived at empty slot? + break; + i = (i == H1(key)) ? H2(key) : H1(key); // Push victim to alternative slot + } + count++; + } + assert(count == 3668); } @@ -165,7 +160,7 @@ void Position::init() { // this is assumed to be the responsibility of the GUI. Position& Position::set(const string& fenStr, bool isChess960, StateInfo* si, Thread* th) { -/* + /* A FEN string defines a particular position using only the ASCII character set. A FEN string contains six fields separated by a space. The fields are: @@ -200,100 +195,97 @@ Position& Position::set(const string& fenStr, bool isChess960, StateInfo* si, Th incremented after Black's move. */ - unsigned char col, row, token; - size_t idx; - Square sq = SQ_A8; - std::istringstream ss(fenStr); + unsigned char col, row, token; + size_t idx; + Square sq = SQ_A8; + std::istringstream ss(fenStr); - std::memset(this, 0, sizeof(Position)); - std::memset(si, 0, sizeof(StateInfo)); - st = si; + std::memset(this, 0, sizeof(Position)); + std::memset(si, 0, sizeof(StateInfo)); + st = si; - ss >> std::noskipws; + ss >> std::noskipws; - // 1. Piece placement - while ((ss >> token) && !isspace(token)) - { - if (isdigit(token)) - sq += (token - '0') * EAST; // Advance the given number of files + // 1. Piece placement + while ((ss >> token) && !isspace(token)) { + if (isdigit(token)) + sq += (token - '0') * EAST; // Advance the given number of files - else if (token == '/') - sq += 2 * SOUTH; + else if (token == '/') + sq += 2 * SOUTH; - else if ((idx = PieceToChar.find(token)) != string::npos) { - put_piece(Piece(idx), sq); - ++sq; - } - } - - // 2. Active color - ss >> token; - sideToMove = (token == 'w' ? WHITE : BLACK); - ss >> token; - - // 3. Castling availability. Compatible with 3 standards: Normal FEN standard, - // Shredder-FEN that uses the letters of the columns on which the rooks began - // the game instead of KQkq and also X-FEN standard that, in case of Chess960, - // if an inner rook is associated with the castling right, the castling tag is - // replaced by the file letter of the involved rook, as for the Shredder-FEN. - while ((ss >> token) && !isspace(token)) - { - Square rsq; - Color c = islower(token) ? BLACK : WHITE; - Piece rook = make_piece(c, ROOK); + else if ((idx = PieceToChar.find(token)) != string::npos) { + put_piece(Piece(idx), sq); + ++sq; + } + } - token = char(toupper(token)); + // 2. Active color + ss >> token; + sideToMove = (token == 'w' ? WHITE : BLACK); + ss >> token; - if (token == 'K') - for (rsq = relative_square(c, SQ_H1); piece_on(rsq) != rook; --rsq) {} + // 3. Castling availability. Compatible with 3 standards: Normal FEN standard, + // Shredder-FEN that uses the letters of the columns on which the rooks began + // the game instead of KQkq and also X-FEN standard that, in case of Chess960, + // if an inner rook is associated with the castling right, the castling tag is + // replaced by the file letter of the involved rook, as for the Shredder-FEN. + while ((ss >> token) && !isspace(token)) { + Square rsq; + Color c = islower(token) ? BLACK : WHITE; + Piece rook = make_piece(c, ROOK); - else if (token == 'Q') - for (rsq = relative_square(c, SQ_A1); piece_on(rsq) != rook; ++rsq) {} + token = char(toupper(token)); - else if (token >= 'A' && token <= 'H') - rsq = make_square(File(token - 'A'), relative_rank(c, RANK_1)); + if (token == 'K') + for (rsq = relative_square(c, SQ_H1); piece_on(rsq) != rook; --rsq) {} - else - continue; + else if (token == 'Q') + for (rsq = relative_square(c, SQ_A1); piece_on(rsq) != rook; ++rsq) {} - set_castling_right(c, rsq); - } + else if (token >= 'A' && token <= 'H') + rsq = make_square(File(token - 'A'), relative_rank(c, RANK_1)); - // 4. En passant square. - // Ignore if square is invalid or not on side to move relative rank 6. - bool enpassant = false; + else + continue; - if ( ((ss >> col) && (col >= 'a' && col <= 'h')) - && ((ss >> row) && (row == (sideToMove == WHITE ? '6' : '3')))) - { - st->epSquare = make_square(File(col - 'a'), Rank(row - '1')); + set_castling_right(c, rsq); + } - // En passant square will be considered only if - // a) side to move have a pawn threatening epSquare - // b) there is an enemy pawn in front of epSquare - // c) there is no piece on epSquare or behind epSquare - enpassant = pawn_attacks_bb(~sideToMove, st->epSquare) & pieces(sideToMove, PAWN) - && (pieces(~sideToMove, PAWN) & (st->epSquare + pawn_push(~sideToMove))) - && !(pieces() & (st->epSquare | (st->epSquare + pawn_push(sideToMove)))); - } + // 4. En passant square. + // Ignore if square is invalid or not on side to move relative rank 6. + bool enpassant = false; + + if (((ss >> col) && (col >= 'a' && col <= 'h')) + && ((ss >> row) && (row == (sideToMove == WHITE ? '6' : '3')))) { + st->epSquare = make_square(File(col - 'a'), Rank(row - '1')); + + // En passant square will be considered only if + // a) side to move have a pawn threatening epSquare + // b) there is an enemy pawn in front of epSquare + // c) there is no piece on epSquare or behind epSquare + enpassant = pawn_attacks_bb(~sideToMove, st->epSquare) & pieces(sideToMove, PAWN) + && (pieces(~sideToMove, PAWN) & (st->epSquare + pawn_push(~sideToMove))) + && !(pieces() & (st->epSquare | (st->epSquare + pawn_push(sideToMove)))); + } - if (!enpassant) - st->epSquare = SQ_NONE; + if (!enpassant) + st->epSquare = SQ_NONE; - // 5-6. Halfmove clock and fullmove number - ss >> std::skipws >> st->rule50 >> gamePly; + // 5-6. Halfmove clock and fullmove number + ss >> std::skipws >> st->rule50 >> gamePly; - // Convert from fullmove starting from 1 to gamePly starting from 0, - // handle also common incorrect FEN with fullmove = 0. - gamePly = std::max(2 * (gamePly - 1), 0) + (sideToMove == BLACK); + // Convert from fullmove starting from 1 to gamePly starting from 0, + // handle also common incorrect FEN with fullmove = 0. + gamePly = std::max(2 * (gamePly - 1), 0) + (sideToMove == BLACK); - chess960 = isChess960; - thisThread = th; - set_state(); + chess960 = isChess960; + thisThread = th; + set_state(); - assert(pos_is_ok()); + assert(pos_is_ok()); - return *this; + return *this; } @@ -302,19 +294,18 @@ Position& Position::set(const string& fenStr, bool isChess960, StateInfo* si, Th void Position::set_castling_right(Color c, Square rfrom) { - Square kfrom = square(c); - CastlingRights cr = c & (kfrom < rfrom ? KING_SIDE: QUEEN_SIDE); + Square kfrom = square(c); + CastlingRights cr = c & (kfrom < rfrom ? KING_SIDE : QUEEN_SIDE); - st->castlingRights |= cr; - castlingRightsMask[kfrom] |= cr; - castlingRightsMask[rfrom] |= cr; - castlingRookSquare[cr] = rfrom; + st->castlingRights |= cr; + castlingRightsMask[kfrom] |= cr; + castlingRightsMask[rfrom] |= cr; + castlingRookSquare[cr] = rfrom; - Square kto = relative_square(c, cr & KING_SIDE ? SQ_G1 : SQ_C1); - Square rto = relative_square(c, cr & KING_SIDE ? SQ_F1 : SQ_D1); + Square kto = relative_square(c, cr & KING_SIDE ? SQ_G1 : SQ_C1); + Square rto = relative_square(c, cr & KING_SIDE ? SQ_F1 : SQ_D1); - castlingPath[cr] = (between_bb(rfrom, rto) | between_bb(kfrom, kto)) - & ~(kfrom | rfrom); + castlingPath[cr] = (between_bb(rfrom, rto) | between_bb(kfrom, kto)) & ~(kfrom | rfrom); } @@ -322,17 +313,17 @@ void Position::set_castling_right(Color c, Square rfrom) { void Position::set_check_info() const { - update_slider_blockers(WHITE); - update_slider_blockers(BLACK); + update_slider_blockers(WHITE); + update_slider_blockers(BLACK); - Square ksq = square(~sideToMove); + Square ksq = square(~sideToMove); - st->checkSquares[PAWN] = pawn_attacks_bb(~sideToMove, ksq); - st->checkSquares[KNIGHT] = attacks_bb(ksq); - st->checkSquares[BISHOP] = attacks_bb(ksq, pieces()); - st->checkSquares[ROOK] = attacks_bb(ksq, pieces()); - st->checkSquares[QUEEN] = st->checkSquares[BISHOP] | st->checkSquares[ROOK]; - st->checkSquares[KING] = 0; + st->checkSquares[PAWN] = pawn_attacks_bb(~sideToMove, ksq); + st->checkSquares[KNIGHT] = attacks_bb(ksq); + st->checkSquares[BISHOP] = attacks_bb(ksq, pieces()); + st->checkSquares[ROOK] = attacks_bb(ksq, pieces()); + st->checkSquares[QUEEN] = st->checkSquares[BISHOP] | st->checkSquares[ROOK]; + st->checkSquares[KING] = 0; } @@ -342,33 +333,32 @@ void Position::set_check_info() const { void Position::set_state() const { - st->key = st->materialKey = 0; - st->nonPawnMaterial[WHITE] = st->nonPawnMaterial[BLACK] = VALUE_ZERO; - st->checkersBB = attackers_to(square(sideToMove)) & pieces(~sideToMove); + st->key = st->materialKey = 0; + st->nonPawnMaterial[WHITE] = st->nonPawnMaterial[BLACK] = VALUE_ZERO; + st->checkersBB = attackers_to(square(sideToMove)) & pieces(~sideToMove); - set_check_info(); + set_check_info(); - for (Bitboard b = pieces(); b; ) - { - Square s = pop_lsb(b); - Piece pc = piece_on(s); - st->key ^= Zobrist::psq[pc][s]; + for (Bitboard b = pieces(); b;) { + Square s = pop_lsb(b); + Piece pc = piece_on(s); + st->key ^= Zobrist::psq[pc][s]; - if (type_of(pc) != KING && type_of(pc) != PAWN) - st->nonPawnMaterial[color_of(pc)] += PieceValue[pc]; - } + if (type_of(pc) != KING && type_of(pc) != PAWN) + st->nonPawnMaterial[color_of(pc)] += PieceValue[pc]; + } - if (st->epSquare != SQ_NONE) - st->key ^= Zobrist::enpassant[file_of(st->epSquare)]; + if (st->epSquare != SQ_NONE) + st->key ^= Zobrist::enpassant[file_of(st->epSquare)]; - if (sideToMove == BLACK) - st->key ^= Zobrist::side; + if (sideToMove == BLACK) + st->key ^= Zobrist::side; - st->key ^= Zobrist::castling[st->castlingRights]; + st->key ^= Zobrist::castling[st->castlingRights]; - for (Piece pc : Pieces) - for (int cnt = 0; cnt < pieceCount[pc]; ++cnt) - st->materialKey ^= Zobrist::psq[pc][cnt]; + for (Piece pc : Pieces) + for (int cnt = 0; cnt < pieceCount[pc]; ++cnt) + st->materialKey ^= Zobrist::psq[pc][cnt]; } @@ -378,20 +368,20 @@ void Position::set_state() const { Position& Position::set(const string& code, Color c, StateInfo* si) { - assert(code[0] == 'K'); + assert(code[0] == 'K'); - string sides[] = { code.substr(code.find('K', 1)), // Weak - code.substr(0, std::min(code.find('v'), code.find('K', 1))) }; // Strong + string sides[] = {code.substr(code.find('K', 1)), // Weak + code.substr(0, std::min(code.find('v'), code.find('K', 1)))}; // Strong - assert(sides[0].length() > 0 && sides[0].length() < 8); - assert(sides[1].length() > 0 && sides[1].length() < 8); + assert(sides[0].length() > 0 && sides[0].length() < 8); + assert(sides[1].length() > 0 && sides[1].length() < 8); - std::transform(sides[c].begin(), sides[c].end(), sides[c].begin(), tolower); + std::transform(sides[c].begin(), sides[c].end(), sides[c].begin(), tolower); - string fenStr = "8/" + sides[0] + char(8 - sides[0].length() + '0') + "/8/8/8/8/" - + sides[1] + char(8 - sides[1].length() + '0') + "/8 w - - 0 10"; + string fenStr = "8/" + sides[0] + char(8 - sides[0].length() + '0') + "/8/8/8/8/" + sides[1] + + char(8 - sides[1].length() + '0') + "/8 w - - 0 10"; - return set(fenStr, false, si, nullptr); + return set(fenStr, false, si, nullptr); } @@ -400,48 +390,46 @@ Position& Position::set(const string& code, Color c, StateInfo* si) { string Position::fen() const { - int emptyCnt; - std::ostringstream ss; + int emptyCnt; + std::ostringstream ss; - for (Rank r = RANK_8; r >= RANK_1; --r) - { - for (File f = FILE_A; f <= FILE_H; ++f) - { - for (emptyCnt = 0; f <= FILE_H && empty(make_square(f, r)); ++f) - ++emptyCnt; + for (Rank r = RANK_8; r >= RANK_1; --r) { + for (File f = FILE_A; f <= FILE_H; ++f) { + for (emptyCnt = 0; f <= FILE_H && empty(make_square(f, r)); ++f) + ++emptyCnt; - if (emptyCnt) - ss << emptyCnt; + if (emptyCnt) + ss << emptyCnt; - if (f <= FILE_H) - ss << PieceToChar[piece_on(make_square(f, r))]; - } + if (f <= FILE_H) + ss << PieceToChar[piece_on(make_square(f, r))]; + } - if (r > RANK_1) - ss << '/'; - } + if (r > RANK_1) + ss << '/'; + } - ss << (sideToMove == WHITE ? " w " : " b "); + ss << (sideToMove == WHITE ? " w " : " b "); - if (can_castle(WHITE_OO)) - ss << (chess960 ? char('A' + file_of(castling_rook_square(WHITE_OO ))) : 'K'); + if (can_castle(WHITE_OO)) + ss << (chess960 ? char('A' + file_of(castling_rook_square(WHITE_OO))) : 'K'); - if (can_castle(WHITE_OOO)) - ss << (chess960 ? char('A' + file_of(castling_rook_square(WHITE_OOO))) : 'Q'); + if (can_castle(WHITE_OOO)) + ss << (chess960 ? char('A' + file_of(castling_rook_square(WHITE_OOO))) : 'Q'); - if (can_castle(BLACK_OO)) - ss << (chess960 ? char('a' + file_of(castling_rook_square(BLACK_OO ))) : 'k'); + if (can_castle(BLACK_OO)) + ss << (chess960 ? char('a' + file_of(castling_rook_square(BLACK_OO))) : 'k'); - if (can_castle(BLACK_OOO)) - ss << (chess960 ? char('a' + file_of(castling_rook_square(BLACK_OOO))) : 'q'); + if (can_castle(BLACK_OOO)) + ss << (chess960 ? char('a' + file_of(castling_rook_square(BLACK_OOO))) : 'q'); - if (!can_castle(ANY_CASTLING)) - ss << '-'; + if (!can_castle(ANY_CASTLING)) + ss << '-'; - ss << (ep_square() == SQ_NONE ? " - " : " " + UCI::square(ep_square()) + " ") - << st->rule50 << " " << 1 + (gamePly - (sideToMove == BLACK)) / 2; + ss << (ep_square() == SQ_NONE ? " - " : " " + UCI::square(ep_square()) + " ") << st->rule50 + << " " << 1 + (gamePly - (sideToMove == BLACK)) / 2; - return ss.str(); + return ss.str(); } // update_slider_blockers() calculates st->blockersForKing[c] and st->pinners[~c], @@ -449,28 +437,27 @@ string Position::fen() const { // and the slider pieces of color ~c pinning pieces of color c to the king. void Position::update_slider_blockers(Color c) const { - Square ksq = square(c); + Square ksq = square(c); - st->blockersForKing[c] = 0; - st->pinners[~c] = 0; + st->blockersForKing[c] = 0; + st->pinners[~c] = 0; - // Snipers are sliders that attack 's' when a piece and other snipers are removed - Bitboard snipers = ( (attacks_bb< ROOK>(ksq) & pieces(QUEEN, ROOK)) - | (attacks_bb(ksq) & pieces(QUEEN, BISHOP))) & pieces(~c); - Bitboard occupancy = pieces() ^ snipers; + // Snipers are sliders that attack 's' when a piece and other snipers are removed + Bitboard snipers = ((attacks_bb(ksq) & pieces(QUEEN, ROOK)) + | (attacks_bb(ksq) & pieces(QUEEN, BISHOP))) + & pieces(~c); + Bitboard occupancy = pieces() ^ snipers; - while (snipers) - { - Square sniperSq = pop_lsb(snipers); - Bitboard b = between_bb(ksq, sniperSq) & occupancy; + while (snipers) { + Square sniperSq = pop_lsb(snipers); + Bitboard b = between_bb(ksq, sniperSq) & occupancy; - if (b && !more_than_one(b)) - { - st->blockersForKing[c] |= b; - if (b & pieces(c)) - st->pinners[~c] |= sniperSq; + if (b && !more_than_one(b)) { + st->blockersForKing[c] |= b; + if (b & pieces(c)) + st->pinners[~c] |= sniperSq; + } } - } } @@ -479,12 +466,12 @@ void Position::update_slider_blockers(Color c) const { Bitboard Position::attackers_to(Square s, Bitboard occupied) const { - return (pawn_attacks_bb(BLACK, s) & pieces(WHITE, PAWN)) - | (pawn_attacks_bb(WHITE, s) & pieces(BLACK, PAWN)) - | (attacks_bb(s) & pieces(KNIGHT)) - | (attacks_bb< ROOK>(s, occupied) & pieces( ROOK, QUEEN)) - | (attacks_bb(s, occupied) & pieces(BISHOP, QUEEN)) - | (attacks_bb(s) & pieces(KING)); + return (pawn_attacks_bb(BLACK, s) & pieces(WHITE, PAWN)) + | (pawn_attacks_bb(WHITE, s) & pieces(BLACK, PAWN)) + | (attacks_bb(s) & pieces(KNIGHT)) + | (attacks_bb(s, occupied) & pieces(ROOK, QUEEN)) + | (attacks_bb(s, occupied) & pieces(BISHOP, QUEEN)) + | (attacks_bb(s) & pieces(KING)); } @@ -492,60 +479,57 @@ Bitboard Position::attackers_to(Square s, Bitboard occupied) const { bool Position::legal(Move m) const { - assert(is_ok(m)); + assert(is_ok(m)); - Color us = sideToMove; - Square from = from_sq(m); - Square to = to_sq(m); + Color us = sideToMove; + Square from = from_sq(m); + Square to = to_sq(m); - assert(color_of(moved_piece(m)) == us); - assert(piece_on(square(us)) == make_piece(us, KING)); + assert(color_of(moved_piece(m)) == us); + assert(piece_on(square(us)) == make_piece(us, KING)); - // En passant captures are a tricky special case. Because they are rather - // uncommon, we do it simply by testing whether the king is attacked after - // the move is made. - if (type_of(m) == EN_PASSANT) - { - Square ksq = square(us); - Square capsq = to - pawn_push(us); - Bitboard occupied = (pieces() ^ from ^ capsq) | to; + // En passant captures are a tricky special case. Because they are rather + // uncommon, we do it simply by testing whether the king is attacked after + // the move is made. + if (type_of(m) == EN_PASSANT) { + Square ksq = square(us); + Square capsq = to - pawn_push(us); + Bitboard occupied = (pieces() ^ from ^ capsq) | to; - assert(to == ep_square()); - assert(moved_piece(m) == make_piece(us, PAWN)); - assert(piece_on(capsq) == make_piece(~us, PAWN)); - assert(piece_on(to) == NO_PIECE); + assert(to == ep_square()); + assert(moved_piece(m) == make_piece(us, PAWN)); + assert(piece_on(capsq) == make_piece(~us, PAWN)); + assert(piece_on(to) == NO_PIECE); - return !(attacks_bb< ROOK>(ksq, occupied) & pieces(~us, QUEEN, ROOK)) + return !(attacks_bb(ksq, occupied) & pieces(~us, QUEEN, ROOK)) && !(attacks_bb(ksq, occupied) & pieces(~us, QUEEN, BISHOP)); - } - - // Castling moves generation does not check if the castling path is clear of - // enemy attacks, it is delayed at a later time: now! - if (type_of(m) == CASTLING) - { - // After castling, the rook and king final positions are the same in - // Chess960 as they would be in standard chess. - to = relative_square(us, to > from ? SQ_G1 : SQ_C1); - Direction step = to > from ? WEST : EAST; - - for (Square s = to; s != from; s += step) - if (attackers_to(s) & pieces(~us)) - return false; - - // In case of Chess960, verify if the Rook blocks some checks. - // For instance an enemy queen in SQ_A1 when castling rook is in SQ_B1. - return !chess960 || !(blockers_for_king(us) & to_sq(m)); - } - - // If the moving piece is a king, check whether the destination square is - // attacked by the opponent. - if (type_of(piece_on(from)) == KING) - return !(attackers_to(to, pieces() ^ from) & pieces(~us)); - - // A non-king move is legal if and only if it is not pinned or it - // is moving along the ray towards or away from the king. - return !(blockers_for_king(us) & from) - || aligned(from, to, square(us)); + } + + // Castling moves generation does not check if the castling path is clear of + // enemy attacks, it is delayed at a later time: now! + if (type_of(m) == CASTLING) { + // After castling, the rook and king final positions are the same in + // Chess960 as they would be in standard chess. + to = relative_square(us, to > from ? SQ_G1 : SQ_C1); + Direction step = to > from ? WEST : EAST; + + for (Square s = to; s != from; s += step) + if (attackers_to(s) & pieces(~us)) + return false; + + // In case of Chess960, verify if the Rook blocks some checks. + // For instance an enemy queen in SQ_A1 when castling rook is in SQ_B1. + return !chess960 || !(blockers_for_king(us) & to_sq(m)); + } + + // If the moving piece is a king, check whether the destination square is + // attacked by the opponent. + if (type_of(piece_on(from)) == KING) + return !(attackers_to(to, pieces() ^ from) & pieces(~us)); + + // A non-king move is legal if and only if it is not pinned or it + // is moving along the ray towards or away from the king. + return !(blockers_for_king(us) & from) || aligned(from, to, square(us)); } @@ -555,70 +539,64 @@ bool Position::legal(Move m) const { bool Position::pseudo_legal(const Move m) const { - Color us = sideToMove; - Square from = from_sq(m); - Square to = to_sq(m); - Piece pc = moved_piece(m); - - // Use a slower but simpler function for uncommon cases - // yet we skip the legality check of MoveList(). - if (type_of(m) != NORMAL) - return checkers() ? MoveList< EVASIONS>(*this).contains(m) - : MoveList(*this).contains(m); - - // Is not a promotion, so the promotion piece must be empty - assert(promotion_type(m) - KNIGHT == NO_PIECE_TYPE); - - // If the 'from' square is not occupied by a piece belonging to the side to - // move, the move is obviously not legal. - if (pc == NO_PIECE || color_of(pc) != us) - return false; - - // The destination square cannot be occupied by a friendly piece - if (pieces(us) & to) - return false; - - // Handle the special case of a pawn move - if (type_of(pc) == PAWN) - { - // We have already handled promotion moves, so destination - // cannot be on the 8th/1st rank. - if ((Rank8BB | Rank1BB) & to) - return false; - - if ( !(pawn_attacks_bb(us, from) & pieces(~us) & to) // Not a capture - && !((from + pawn_push(us) == to) && empty(to)) // Not a single push - && !( (from + 2 * pawn_push(us) == to) // Not a double push - && (relative_rank(us, from) == RANK_2) - && empty(to) - && empty(to - pawn_push(us)))) - return false; - } - else if (!(attacks_bb(type_of(pc), from, pieces()) & to)) - return false; - - // Evasions generator already takes care to avoid some kind of illegal moves - // and legal() relies on this. We therefore have to take care that the same - // kind of moves are filtered out here. - if (checkers()) - { - if (type_of(pc) != KING) - { - // Double check? In this case, a king move is required - if (more_than_one(checkers())) - return false; - - // Our move must be a blocking interposition or a capture of the checking piece - if (!(between_bb(square(us), lsb(checkers())) & to)) - return false; - } - // In case of king moves under check we have to remove the king so as to catch - // invalid moves like b1a1 when opposite queen is on c1. - else if (attackers_to(to, pieces() ^ from) & pieces(~us)) - return false; - } - - return true; + Color us = sideToMove; + Square from = from_sq(m); + Square to = to_sq(m); + Piece pc = moved_piece(m); + + // Use a slower but simpler function for uncommon cases + // yet we skip the legality check of MoveList(). + if (type_of(m) != NORMAL) + return checkers() ? MoveList(*this).contains(m) + : MoveList(*this).contains(m); + + // Is not a promotion, so the promotion piece must be empty + assert(promotion_type(m) - KNIGHT == NO_PIECE_TYPE); + + // If the 'from' square is not occupied by a piece belonging to the side to + // move, the move is obviously not legal. + if (pc == NO_PIECE || color_of(pc) != us) + return false; + + // The destination square cannot be occupied by a friendly piece + if (pieces(us) & to) + return false; + + // Handle the special case of a pawn move + if (type_of(pc) == PAWN) { + // We have already handled promotion moves, so destination + // cannot be on the 8th/1st rank. + if ((Rank8BB | Rank1BB) & to) + return false; + + if (!(pawn_attacks_bb(us, from) & pieces(~us) & to) // Not a capture + && !((from + pawn_push(us) == to) && empty(to)) // Not a single push + && !((from + 2 * pawn_push(us) == to) // Not a double push + && (relative_rank(us, from) == RANK_2) && empty(to) && empty(to - pawn_push(us)))) + return false; + } else if (!(attacks_bb(type_of(pc), from, pieces()) & to)) + return false; + + // Evasions generator already takes care to avoid some kind of illegal moves + // and legal() relies on this. We therefore have to take care that the same + // kind of moves are filtered out here. + if (checkers()) { + if (type_of(pc) != KING) { + // Double check? In this case, a king move is required + if (more_than_one(checkers())) + return false; + + // Our move must be a blocking interposition or a capture of the checking piece + if (!(between_bb(square(us), lsb(checkers())) & to)) + return false; + } + // In case of king moves under check we have to remove the king so as to catch + // invalid moves like b1a1 when opposite queen is on c1. + else if (attackers_to(to, pieces() ^ from) & pieces(~us)) + return false; + } + + return true; } @@ -626,49 +604,46 @@ bool Position::pseudo_legal(const Move m) const { bool Position::gives_check(Move m) const { - assert(is_ok(m)); - assert(color_of(moved_piece(m)) == sideToMove); - - Square from = from_sq(m); - Square to = to_sq(m); - - // Is there a direct check? - if (check_squares(type_of(piece_on(from))) & to) - return true; - - // Is there a discovered check? - if (blockers_for_king(~sideToMove) & from) - return !aligned(from, to, square(~sideToMove)) - || type_of(m) == CASTLING; - - switch (type_of(m)) - { - case NORMAL: - return false; - - case PROMOTION: - return attacks_bb(promotion_type(m), to, pieces() ^ from) & square(~sideToMove); - - // En passant capture with check? We have already handled the case - // of direct checks and ordinary discovered check, so the only case we - // need to handle is the unusual case of a discovered check through - // the captured pawn. - case EN_PASSANT: - { - Square capsq = make_square(file_of(to), rank_of(from)); - Bitboard b = (pieces() ^ from ^ capsq) | to; - - return (attacks_bb< ROOK>(square(~sideToMove), b) & pieces(sideToMove, QUEEN, ROOK)) - | (attacks_bb(square(~sideToMove), b) & pieces(sideToMove, QUEEN, BISHOP)); - } - default: //CASTLING - { - // Castling is encoded as 'king captures the rook' - Square rto = relative_square(sideToMove, to > from ? SQ_F1 : SQ_D1); - - return check_squares(ROOK) & rto; - } - } + assert(is_ok(m)); + assert(color_of(moved_piece(m)) == sideToMove); + + Square from = from_sq(m); + Square to = to_sq(m); + + // Is there a direct check? + if (check_squares(type_of(piece_on(from))) & to) + return true; + + // Is there a discovered check? + if (blockers_for_king(~sideToMove) & from) + return !aligned(from, to, square(~sideToMove)) || type_of(m) == CASTLING; + + switch (type_of(m)) { + case NORMAL : return false; + + case PROMOTION : + return attacks_bb(promotion_type(m), to, pieces() ^ from) & square(~sideToMove); + + // En passant capture with check? We have already handled the case + // of direct checks and ordinary discovered check, so the only case we + // need to handle is the unusual case of a discovered check through + // the captured pawn. + case EN_PASSANT : { + Square capsq = make_square(file_of(to), rank_of(from)); + Bitboard b = (pieces() ^ from ^ capsq) | to; + + return (attacks_bb(square(~sideToMove), b) & pieces(sideToMove, QUEEN, ROOK)) + | (attacks_bb(square(~sideToMove), b) + & pieces(sideToMove, QUEEN, BISHOP)); + } + default : //CASTLING + { + // Castling is encoded as 'king captures the rook' + Square rto = relative_square(sideToMove, to > from ? SQ_F1 : SQ_D1); + + return check_squares(ROOK) & rto; + } + } } @@ -678,195 +653,181 @@ bool Position::gives_check(Move m) const { void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) { - assert(is_ok(m)); - assert(&newSt != st); - - thisThread->nodes.fetch_add(1, std::memory_order_relaxed); - Key k = st->key ^ Zobrist::side; - - // Copy some fields of the old state to our new StateInfo object except the - // ones which are going to be recalculated from scratch anyway and then switch - // our state pointer to point to the new (ready to be updated) state. - std::memcpy(&newSt, st, offsetof(StateInfo, key)); - newSt.previous = st; - st = &newSt; - - // Increment ply counters. In particular, rule50 will be reset to zero later on - // in case of a capture or a pawn move. - ++gamePly; - ++st->rule50; - ++st->pliesFromNull; - - // Used by NNUE - st->accumulator.computed[WHITE] = false; - st->accumulator.computed[BLACK] = false; - auto& dp = st->dirtyPiece; - dp.dirty_num = 1; - - Color us = sideToMove; - Color them = ~us; - Square from = from_sq(m); - Square to = to_sq(m); - Piece pc = piece_on(from); - Piece captured = type_of(m) == EN_PASSANT ? make_piece(them, PAWN) : piece_on(to); - - assert(color_of(pc) == us); - assert(captured == NO_PIECE || color_of(captured) == (type_of(m) != CASTLING ? them : us)); - assert(type_of(captured) != KING); - - if (type_of(m) == CASTLING) - { - assert(pc == make_piece(us, KING)); - assert(captured == make_piece(us, ROOK)); - - Square rfrom, rto; - do_castling(us, from, to, rfrom, rto); - - k ^= Zobrist::psq[captured][rfrom] ^ Zobrist::psq[captured][rto]; - captured = NO_PIECE; - } - - if (captured) - { - Square capsq = to; - - // If the captured piece is a pawn, update pawn hash key, otherwise - // update non-pawn material. - if (type_of(captured) == PAWN) - { - if (type_of(m) == EN_PASSANT) - { - capsq -= pawn_push(us); - - assert(pc == make_piece(us, PAWN)); - assert(to == st->epSquare); - assert(relative_rank(us, to) == RANK_6); - assert(piece_on(to) == NO_PIECE); - assert(piece_on(capsq) == make_piece(them, PAWN)); - } - } - else - st->nonPawnMaterial[them] -= PieceValue[captured]; - - dp.dirty_num = 2; // 1 piece moved, 1 piece captured - dp.piece[1] = captured; - dp.from[1] = capsq; - dp.to[1] = SQ_NONE; - - // Update board and piece lists - remove_piece(capsq); - - // Update material hash key and prefetch access to materialTable - k ^= Zobrist::psq[captured][capsq]; - st->materialKey ^= Zobrist::psq[captured][pieceCount[captured]]; - - // Reset rule 50 counter - st->rule50 = 0; - } - - // Update hash key - k ^= Zobrist::psq[pc][from] ^ Zobrist::psq[pc][to]; - - // Reset en passant square - if (st->epSquare != SQ_NONE) - { - k ^= Zobrist::enpassant[file_of(st->epSquare)]; - st->epSquare = SQ_NONE; - } - - // Update castling rights if needed - if (st->castlingRights && (castlingRightsMask[from] | castlingRightsMask[to])) - { - k ^= Zobrist::castling[st->castlingRights]; - st->castlingRights &= ~(castlingRightsMask[from] | castlingRightsMask[to]); - k ^= Zobrist::castling[st->castlingRights]; - } - - // Move the piece. The tricky Chess960 castling is handled earlier - if (type_of(m) != CASTLING) - { - dp.piece[0] = pc; - dp.from[0] = from; - dp.to[0] = to; - - move_piece(from, to); - } - - // If the moving piece is a pawn do some special extra work - if (type_of(pc) == PAWN) - { - // Set en passant square if the moved pawn can be captured - if ( (int(to) ^ int(from)) == 16 - && (pawn_attacks_bb(us, to - pawn_push(us)) & pieces(them, PAWN))) - { - st->epSquare = to - pawn_push(us); - k ^= Zobrist::enpassant[file_of(st->epSquare)]; - } - - else if (type_of(m) == PROMOTION) - { - Piece promotion = make_piece(us, promotion_type(m)); - - assert(relative_rank(us, to) == RANK_8); - assert(type_of(promotion) >= KNIGHT && type_of(promotion) <= QUEEN); - - remove_piece(to); - put_piece(promotion, to); - - // Promoting pawn to SQ_NONE, promoted piece from SQ_NONE - dp.to[0] = SQ_NONE; - dp.piece[dp.dirty_num] = promotion; - dp.from[dp.dirty_num] = SQ_NONE; - dp.to[dp.dirty_num] = to; - dp.dirty_num++; - - // Update hash keys - k ^= Zobrist::psq[pc][to] ^ Zobrist::psq[promotion][to]; - st->materialKey ^= Zobrist::psq[promotion][pieceCount[promotion]-1] - ^ Zobrist::psq[pc][pieceCount[pc]]; - - // Update material - st->nonPawnMaterial[us] += PieceValue[promotion]; - } - - // Reset rule 50 draw counter - st->rule50 = 0; - } - - // Set capture piece - st->capturedPiece = captured; - - // Update the key with the final value - st->key = k; - - // Calculate checkers bitboard (if move gives check) - st->checkersBB = givesCheck ? attackers_to(square(them)) & pieces(us) : 0; - - sideToMove = ~sideToMove; - - // Update king attacks used for fast check detection - set_check_info(); - - // Calculate the repetition info. It is the ply distance from the previous - // occurrence of the same position, negative in the 3-fold case, or zero - // if the position was not repeated. - st->repetition = 0; - int end = std::min(st->rule50, st->pliesFromNull); - if (end >= 4) - { - StateInfo* stp = st->previous->previous; - for (int i = 4; i <= end; i += 2) - { - stp = stp->previous->previous; - if (stp->key == st->key) - { - st->repetition = stp->repetition ? -i : i; - break; - } - } - } - - assert(pos_is_ok()); + assert(is_ok(m)); + assert(&newSt != st); + + thisThread->nodes.fetch_add(1, std::memory_order_relaxed); + Key k = st->key ^ Zobrist::side; + + // Copy some fields of the old state to our new StateInfo object except the + // ones which are going to be recalculated from scratch anyway and then switch + // our state pointer to point to the new (ready to be updated) state. + std::memcpy(&newSt, st, offsetof(StateInfo, key)); + newSt.previous = st; + st = &newSt; + + // Increment ply counters. In particular, rule50 will be reset to zero later on + // in case of a capture or a pawn move. + ++gamePly; + ++st->rule50; + ++st->pliesFromNull; + + // Used by NNUE + st->accumulator.computed[WHITE] = false; + st->accumulator.computed[BLACK] = false; + auto& dp = st->dirtyPiece; + dp.dirty_num = 1; + + Color us = sideToMove; + Color them = ~us; + Square from = from_sq(m); + Square to = to_sq(m); + Piece pc = piece_on(from); + Piece captured = type_of(m) == EN_PASSANT ? make_piece(them, PAWN) : piece_on(to); + + assert(color_of(pc) == us); + assert(captured == NO_PIECE || color_of(captured) == (type_of(m) != CASTLING ? them : us)); + assert(type_of(captured) != KING); + + if (type_of(m) == CASTLING) { + assert(pc == make_piece(us, KING)); + assert(captured == make_piece(us, ROOK)); + + Square rfrom, rto; + do_castling(us, from, to, rfrom, rto); + + k ^= Zobrist::psq[captured][rfrom] ^ Zobrist::psq[captured][rto]; + captured = NO_PIECE; + } + + if (captured) { + Square capsq = to; + + // If the captured piece is a pawn, update pawn hash key, otherwise + // update non-pawn material. + if (type_of(captured) == PAWN) { + if (type_of(m) == EN_PASSANT) { + capsq -= pawn_push(us); + + assert(pc == make_piece(us, PAWN)); + assert(to == st->epSquare); + assert(relative_rank(us, to) == RANK_6); + assert(piece_on(to) == NO_PIECE); + assert(piece_on(capsq) == make_piece(them, PAWN)); + } + } else + st->nonPawnMaterial[them] -= PieceValue[captured]; + + dp.dirty_num = 2; // 1 piece moved, 1 piece captured + dp.piece[1] = captured; + dp.from[1] = capsq; + dp.to[1] = SQ_NONE; + + // Update board and piece lists + remove_piece(capsq); + + // Update material hash key and prefetch access to materialTable + k ^= Zobrist::psq[captured][capsq]; + st->materialKey ^= Zobrist::psq[captured][pieceCount[captured]]; + + // Reset rule 50 counter + st->rule50 = 0; + } + + // Update hash key + k ^= Zobrist::psq[pc][from] ^ Zobrist::psq[pc][to]; + + // Reset en passant square + if (st->epSquare != SQ_NONE) { + k ^= Zobrist::enpassant[file_of(st->epSquare)]; + st->epSquare = SQ_NONE; + } + + // Update castling rights if needed + if (st->castlingRights && (castlingRightsMask[from] | castlingRightsMask[to])) { + k ^= Zobrist::castling[st->castlingRights]; + st->castlingRights &= ~(castlingRightsMask[from] | castlingRightsMask[to]); + k ^= Zobrist::castling[st->castlingRights]; + } + + // Move the piece. The tricky Chess960 castling is handled earlier + if (type_of(m) != CASTLING) { + dp.piece[0] = pc; + dp.from[0] = from; + dp.to[0] = to; + + move_piece(from, to); + } + + // If the moving piece is a pawn do some special extra work + if (type_of(pc) == PAWN) { + // Set en passant square if the moved pawn can be captured + if ((int(to) ^ int(from)) == 16 + && (pawn_attacks_bb(us, to - pawn_push(us)) & pieces(them, PAWN))) { + st->epSquare = to - pawn_push(us); + k ^= Zobrist::enpassant[file_of(st->epSquare)]; + } + + else if (type_of(m) == PROMOTION) { + Piece promotion = make_piece(us, promotion_type(m)); + + assert(relative_rank(us, to) == RANK_8); + assert(type_of(promotion) >= KNIGHT && type_of(promotion) <= QUEEN); + + remove_piece(to); + put_piece(promotion, to); + + // Promoting pawn to SQ_NONE, promoted piece from SQ_NONE + dp.to[0] = SQ_NONE; + dp.piece[dp.dirty_num] = promotion; + dp.from[dp.dirty_num] = SQ_NONE; + dp.to[dp.dirty_num] = to; + dp.dirty_num++; + + // Update hash keys + k ^= Zobrist::psq[pc][to] ^ Zobrist::psq[promotion][to]; + st->materialKey ^= + Zobrist::psq[promotion][pieceCount[promotion] - 1] ^ Zobrist::psq[pc][pieceCount[pc]]; + + // Update material + st->nonPawnMaterial[us] += PieceValue[promotion]; + } + + // Reset rule 50 draw counter + st->rule50 = 0; + } + + // Set capture piece + st->capturedPiece = captured; + + // Update the key with the final value + st->key = k; + + // Calculate checkers bitboard (if move gives check) + st->checkersBB = givesCheck ? attackers_to(square(them)) & pieces(us) : 0; + + sideToMove = ~sideToMove; + + // Update king attacks used for fast check detection + set_check_info(); + + // Calculate the repetition info. It is the ply distance from the previous + // occurrence of the same position, negative in the 3-fold case, or zero + // if the position was not repeated. + st->repetition = 0; + int end = std::min(st->rule50, st->pliesFromNull); + if (end >= 4) { + StateInfo* stp = st->previous->previous; + for (int i = 4; i <= end; i += 2) { + stp = stp->previous->previous; + if (stp->key == st->key) { + st->repetition = stp->repetition ? -i : i; + break; + } + } + } + + assert(pos_is_ok()); } @@ -875,62 +836,56 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) { void Position::undo_move(Move m) { - assert(is_ok(m)); - - sideToMove = ~sideToMove; - - Color us = sideToMove; - Square from = from_sq(m); - Square to = to_sq(m); - Piece pc = piece_on(to); - - assert(empty(from) || type_of(m) == CASTLING); - assert(type_of(st->capturedPiece) != KING); - - if (type_of(m) == PROMOTION) - { - assert(relative_rank(us, to) == RANK_8); - assert(type_of(pc) == promotion_type(m)); - assert(type_of(pc) >= KNIGHT && type_of(pc) <= QUEEN); - - remove_piece(to); - pc = make_piece(us, PAWN); - put_piece(pc, to); - } - - if (type_of(m) == CASTLING) - { - Square rfrom, rto; - do_castling(us, from, to, rfrom, rto); - } - else - { - move_piece(to, from); // Put the piece back at the source square - - if (st->capturedPiece) - { - Square capsq = to; - - if (type_of(m) == EN_PASSANT) - { - capsq -= pawn_push(us); - - assert(type_of(pc) == PAWN); - assert(to == st->previous->epSquare); - assert(relative_rank(us, to) == RANK_6); - assert(piece_on(capsq) == NO_PIECE); - assert(st->capturedPiece == make_piece(~us, PAWN)); - } - - put_piece(st->capturedPiece, capsq); // Restore the captured piece - } - } - - // Finally point our state pointer back to the previous state - st = st->previous; - --gamePly; - - assert(pos_is_ok()); + assert(is_ok(m)); + + sideToMove = ~sideToMove; + + Color us = sideToMove; + Square from = from_sq(m); + Square to = to_sq(m); + Piece pc = piece_on(to); + + assert(empty(from) || type_of(m) == CASTLING); + assert(type_of(st->capturedPiece) != KING); + + if (type_of(m) == PROMOTION) { + assert(relative_rank(us, to) == RANK_8); + assert(type_of(pc) == promotion_type(m)); + assert(type_of(pc) >= KNIGHT && type_of(pc) <= QUEEN); + + remove_piece(to); + pc = make_piece(us, PAWN); + put_piece(pc, to); + } + + if (type_of(m) == CASTLING) { + Square rfrom, rto; + do_castling(us, from, to, rfrom, rto); + } else { + move_piece(to, from); // Put the piece back at the source square + + if (st->capturedPiece) { + Square capsq = to; + + if (type_of(m) == EN_PASSANT) { + capsq -= pawn_push(us); + + assert(type_of(pc) == PAWN); + assert(to == st->previous->epSquare); + assert(relative_rank(us, to) == RANK_6); + assert(piece_on(capsq) == NO_PIECE); + assert(st->capturedPiece == make_piece(~us, PAWN)); + } + + put_piece(st->capturedPiece, capsq); // Restore the captured piece + } + } + + // Finally point our state pointer back to the previous state + st = st->previous; + --gamePly; + + assert(pos_is_ok()); } @@ -939,29 +894,29 @@ void Position::undo_move(Move m) { template void Position::do_castling(Color us, Square from, Square& to, Square& rfrom, Square& rto) { - bool kingSide = to > from; - rfrom = to; // Castling is encoded as "king captures friendly rook" - rto = relative_square(us, kingSide ? SQ_F1 : SQ_D1); - to = relative_square(us, kingSide ? SQ_G1 : SQ_C1); - - if (Do) - { - auto& dp = st->dirtyPiece; - dp.piece[0] = make_piece(us, KING); - dp.from[0] = from; - dp.to[0] = to; - dp.piece[1] = make_piece(us, ROOK); - dp.from[1] = rfrom; - dp.to[1] = rto; - dp.dirty_num = 2; - } - - // Remove both pieces first since squares could overlap in Chess960 - remove_piece(Do ? from : to); - remove_piece(Do ? rfrom : rto); - board[Do ? from : to] = board[Do ? rfrom : rto] = NO_PIECE; // remove_piece does not do this for us - put_piece(make_piece(us, KING), Do ? to : from); - put_piece(make_piece(us, ROOK), Do ? rto : rfrom); + bool kingSide = to > from; + rfrom = to; // Castling is encoded as "king captures friendly rook" + rto = relative_square(us, kingSide ? SQ_F1 : SQ_D1); + to = relative_square(us, kingSide ? SQ_G1 : SQ_C1); + + if (Do) { + auto& dp = st->dirtyPiece; + dp.piece[0] = make_piece(us, KING); + dp.from[0] = from; + dp.to[0] = to; + dp.piece[1] = make_piece(us, ROOK); + dp.from[1] = rfrom; + dp.to[1] = rto; + dp.dirty_num = 2; + } + + // Remove both pieces first since squares could overlap in Chess960 + remove_piece(Do ? from : to); + remove_piece(Do ? rfrom : rto); + board[Do ? from : to] = board[Do ? rfrom : rto] = + NO_PIECE; // remove_piece does not do this for us + put_piece(make_piece(us, KING), Do ? to : from); + put_piece(make_piece(us, ROOK), Do ? rto : rfrom); } @@ -970,38 +925,37 @@ void Position::do_castling(Color us, Square from, Square& to, Square& rfrom, Squ void Position::do_null_move(StateInfo& newSt) { - assert(!checkers()); - assert(&newSt != st); + assert(!checkers()); + assert(&newSt != st); - std::memcpy(&newSt, st, offsetof(StateInfo, accumulator)); + std::memcpy(&newSt, st, offsetof(StateInfo, accumulator)); - newSt.previous = st; - st = &newSt; + newSt.previous = st; + st = &newSt; - st->dirtyPiece.dirty_num = 0; - st->dirtyPiece.piece[0] = NO_PIECE; // Avoid checks in UpdateAccumulator() - st->accumulator.computed[WHITE] = false; - st->accumulator.computed[BLACK] = false; + st->dirtyPiece.dirty_num = 0; + st->dirtyPiece.piece[0] = NO_PIECE; // Avoid checks in UpdateAccumulator() + st->accumulator.computed[WHITE] = false; + st->accumulator.computed[BLACK] = false; - if (st->epSquare != SQ_NONE) - { - st->key ^= Zobrist::enpassant[file_of(st->epSquare)]; - st->epSquare = SQ_NONE; - } + if (st->epSquare != SQ_NONE) { + st->key ^= Zobrist::enpassant[file_of(st->epSquare)]; + st->epSquare = SQ_NONE; + } - st->key ^= Zobrist::side; - ++st->rule50; - prefetch(TT.first_entry(key())); + st->key ^= Zobrist::side; + ++st->rule50; + prefetch(TT.first_entry(key())); - st->pliesFromNull = 0; + st->pliesFromNull = 0; - sideToMove = ~sideToMove; + sideToMove = ~sideToMove; - set_check_info(); + set_check_info(); - st->repetition = 0; + st->repetition = 0; - assert(pos_is_ok()); + assert(pos_is_ok()); } @@ -1009,10 +963,10 @@ void Position::do_null_move(StateInfo& newSt) { void Position::undo_null_move() { - assert(!checkers()); + assert(!checkers()); - st = st->previous; - sideToMove = ~sideToMove; + st = st->previous; + sideToMove = ~sideToMove; } @@ -1022,19 +976,18 @@ void Position::undo_null_move() { Key Position::key_after(Move m) const { - Square from = from_sq(m); - Square to = to_sq(m); - Piece pc = piece_on(from); - Piece captured = piece_on(to); - Key k = st->key ^ Zobrist::side; + Square from = from_sq(m); + Square to = to_sq(m); + Piece pc = piece_on(from); + Piece captured = piece_on(to); + Key k = st->key ^ Zobrist::side; - if (captured) - k ^= Zobrist::psq[captured][to]; + if (captured) + k ^= Zobrist::psq[captured][to]; - k ^= Zobrist::psq[pc][to] ^ Zobrist::psq[pc][from]; + k ^= Zobrist::psq[pc][to] ^ Zobrist::psq[pc][from]; - return (captured || type_of(pc) == PAWN) - ? k : adjust_key50(k); + return (captured || type_of(pc) == PAWN) ? k : adjust_key50(k); } @@ -1044,103 +997,96 @@ Key Position::key_after(Move m) const { bool Position::see_ge(Move m, Value threshold) const { - assert(is_ok(m)); - - // Only deal with normal moves, assume others pass a simple SEE - if (type_of(m) != NORMAL) - return VALUE_ZERO >= threshold; - - Square from = from_sq(m), to = to_sq(m); - - int swap = PieceValue[piece_on(to)] - threshold; - if (swap < 0) - return false; - - swap = PieceValue[piece_on(from)] - swap; - if (swap <= 0) - return true; - - assert(color_of(piece_on(from)) == sideToMove); - Bitboard occupied = pieces() ^ from ^ to; // xoring to is important for pinned piece logic - Color stm = sideToMove; - Bitboard attackers = attackers_to(to, occupied); - Bitboard stmAttackers, bb; - int res = 1; - - while (true) - { - stm = ~stm; - attackers &= occupied; - - // If stm has no more attackers then give up: stm loses - if (!(stmAttackers = attackers & pieces(stm))) - break; - - // Don't allow pinned pieces to attack as long as there are - // pinners on their original square. - if (pinners(~stm) & occupied) - { - stmAttackers &= ~blockers_for_king(stm); - - if (!stmAttackers) - break; - } - - res ^= 1; - - // Locate and remove the next least valuable attacker, and add to - // the bitboard 'attackers' any X-ray attackers behind it. - if ((bb = stmAttackers & pieces(PAWN))) - { - if ((swap = PawnValue - swap) < res) - break; - occupied ^= least_significant_square_bb(bb); - - attackers |= attacks_bb(to, occupied) & pieces(BISHOP, QUEEN); - } - - else if ((bb = stmAttackers & pieces(KNIGHT))) - { - if ((swap = KnightValue - swap) < res) - break; - occupied ^= least_significant_square_bb(bb); - } - - else if ((bb = stmAttackers & pieces(BISHOP))) - { - if ((swap = BishopValue - swap) < res) - break; - occupied ^= least_significant_square_bb(bb); - - attackers |= attacks_bb(to, occupied) & pieces(BISHOP, QUEEN); - } - - else if ((bb = stmAttackers & pieces(ROOK))) - { - if ((swap = RookValue - swap) < res) - break; - occupied ^= least_significant_square_bb(bb); - - attackers |= attacks_bb(to, occupied) & pieces(ROOK, QUEEN); - } - - else if ((bb = stmAttackers & pieces(QUEEN))) - { - if ((swap = QueenValue - swap) < res) - break; - occupied ^= least_significant_square_bb(bb); - - attackers |= (attacks_bb(to, occupied) & pieces(BISHOP, QUEEN)) - | (attacks_bb(to, occupied) & pieces(ROOK , QUEEN)); - } - - else // KING - // If we "capture" with the king but the opponent still has attackers, - // reverse the result. - return (attackers & ~pieces(stm)) ? res ^ 1 : res; - } - - return bool(res); + assert(is_ok(m)); + + // Only deal with normal moves, assume others pass a simple SEE + if (type_of(m) != NORMAL) + return VALUE_ZERO >= threshold; + + Square from = from_sq(m), to = to_sq(m); + + int swap = PieceValue[piece_on(to)] - threshold; + if (swap < 0) + return false; + + swap = PieceValue[piece_on(from)] - swap; + if (swap <= 0) + return true; + + assert(color_of(piece_on(from)) == sideToMove); + Bitboard occupied = pieces() ^ from ^ to; // xoring to is important for pinned piece logic + Color stm = sideToMove; + Bitboard attackers = attackers_to(to, occupied); + Bitboard stmAttackers, bb; + int res = 1; + + while (true) { + stm = ~stm; + attackers &= occupied; + + // If stm has no more attackers then give up: stm loses + if (!(stmAttackers = attackers & pieces(stm))) + break; + + // Don't allow pinned pieces to attack as long as there are + // pinners on their original square. + if (pinners(~stm) & occupied) { + stmAttackers &= ~blockers_for_king(stm); + + if (!stmAttackers) + break; + } + + res ^= 1; + + // Locate and remove the next least valuable attacker, and add to + // the bitboard 'attackers' any X-ray attackers behind it. + if ((bb = stmAttackers & pieces(PAWN))) { + if ((swap = PawnValue - swap) < res) + break; + occupied ^= least_significant_square_bb(bb); + + attackers |= attacks_bb(to, occupied) & pieces(BISHOP, QUEEN); + } + + else if ((bb = stmAttackers & pieces(KNIGHT))) { + if ((swap = KnightValue - swap) < res) + break; + occupied ^= least_significant_square_bb(bb); + } + + else if ((bb = stmAttackers & pieces(BISHOP))) { + if ((swap = BishopValue - swap) < res) + break; + occupied ^= least_significant_square_bb(bb); + + attackers |= attacks_bb(to, occupied) & pieces(BISHOP, QUEEN); + } + + else if ((bb = stmAttackers & pieces(ROOK))) { + if ((swap = RookValue - swap) < res) + break; + occupied ^= least_significant_square_bb(bb); + + attackers |= attacks_bb(to, occupied) & pieces(ROOK, QUEEN); + } + + else if ((bb = stmAttackers & pieces(QUEEN))) { + if ((swap = QueenValue - swap) < res) + break; + occupied ^= least_significant_square_bb(bb); + + attackers |= (attacks_bb(to, occupied) & pieces(BISHOP, QUEEN)) + | (attacks_bb(to, occupied) & pieces(ROOK, QUEEN)); + } + + else // KING + // If we "capture" with the king but the opponent still has attackers, + // reverse the result. + return (attackers & ~pieces(stm)) ? res ^ 1 : res; + } + + return bool(res); } // Position::is_draw() tests whether the position is drawn by 50-move rule @@ -1148,12 +1094,12 @@ bool Position::see_ge(Move m, Value threshold) const { bool Position::is_draw(int ply) const { - if (st->rule50 > 99 && (!checkers() || MoveList(*this).size())) - return true; + if (st->rule50 > 99 && (!checkers() || MoveList(*this).size())) + return true; - // Return a draw score if a position repeats once earlier but strictly - // after the root, or repeats twice before or at the root. - return st->repetition && st->repetition < ply; + // Return a draw score if a position repeats once earlier but strictly + // after the root, or repeats twice before or at the root. + return st->repetition && st->repetition < ply; } @@ -1163,9 +1109,8 @@ bool Position::is_draw(int ply) const { bool Position::has_repeated() const { StateInfo* stc = st; - int end = std::min(st->rule50, st->pliesFromNull); - while (end-- >= 4) - { + int end = std::min(st->rule50, st->pliesFromNull); + while (end-- >= 4) { if (stc->repetition) return true; @@ -1180,47 +1125,43 @@ bool Position::has_repeated() const { bool Position::has_game_cycle(int ply) const { - int j; + int j; - int end = std::min(st->rule50, st->pliesFromNull); + int end = std::min(st->rule50, st->pliesFromNull); - if (end < 3) + if (end < 3) + return false; + + Key originalKey = st->key; + StateInfo* stp = st->previous; + + for (int i = 3; i <= end; i += 2) { + stp = stp->previous->previous; + + Key moveKey = originalKey ^ stp->key; + if ((j = H1(moveKey), cuckoo[j] == moveKey) || (j = H2(moveKey), cuckoo[j] == moveKey)) { + Move move = cuckooMove[j]; + Square s1 = from_sq(move); + Square s2 = to_sq(move); + + if (!((between_bb(s1, s2) ^ s2) & pieces())) { + if (ply > i) + return true; + + // For nodes before or at the root, check that the move is a + // repetition rather than a move to the current position. + // In the cuckoo table, both moves Rc1c5 and Rc5c1 are stored in + // the same location, so we have to select which square to check. + if (color_of(piece_on(empty(s1) ? s2 : s1)) != side_to_move()) + continue; + + // For repetitions before or at the root, require one more + if (stp->repetition) + return true; + } + } + } return false; - - Key originalKey = st->key; - StateInfo* stp = st->previous; - - for (int i = 3; i <= end; i += 2) - { - stp = stp->previous->previous; - - Key moveKey = originalKey ^ stp->key; - if ( (j = H1(moveKey), cuckoo[j] == moveKey) - || (j = H2(moveKey), cuckoo[j] == moveKey)) - { - Move move = cuckooMove[j]; - Square s1 = from_sq(move); - Square s2 = to_sq(move); - - if (!((between_bb(s1, s2) ^ s2) & pieces())) - { - if (ply > i) - return true; - - // For nodes before or at the root, check that the move is a - // repetition rather than a move to the current position. - // In the cuckoo table, both moves Rc1c5 and Rc5c1 are stored in - // the same location, so we have to select which square to check. - if (color_of(piece_on(empty(s1) ? s2 : s1)) != side_to_move()) - continue; - - // For repetitions before or at the root, require one more - if (stp->repetition) - return true; - } - } - } - return false; } @@ -1229,33 +1170,33 @@ bool Position::has_game_cycle(int ply) const { void Position::flip() { - string f, token; - std::stringstream ss(fen()); + string f, token; + std::stringstream ss(fen()); - for (Rank r = RANK_8; r >= RANK_1; --r) // Piece placement - { - std::getline(ss, token, r > RANK_1 ? '/' : ' '); - f.insert(0, token + (f.empty() ? " " : "/")); - } + for (Rank r = RANK_8; r >= RANK_1; --r) // Piece placement + { + std::getline(ss, token, r > RANK_1 ? '/' : ' '); + f.insert(0, token + (f.empty() ? " " : "/")); + } - ss >> token; // Active color - f += (token == "w" ? "B " : "W "); // Will be lowercased later + ss >> token; // Active color + f += (token == "w" ? "B " : "W "); // Will be lowercased later - ss >> token; // Castling availability - f += token + " "; + ss >> token; // Castling availability + f += token + " "; - std::transform(f.begin(), f.end(), f.begin(), - [](char c) { return char(islower(c) ? toupper(c) : tolower(c)); }); + std::transform(f.begin(), f.end(), f.begin(), + [](char c) { return char(islower(c) ? toupper(c) : tolower(c)); }); - ss >> token; // En passant square - f += (token == "-" ? token : token.replace(1, 1, token[1] == '3' ? "6" : "3")); + ss >> token; // En passant square + f += (token == "-" ? token : token.replace(1, 1, token[1] == '3' ? "6" : "3")); - std::getline(ss, token); // Half and full moves - f += token; + std::getline(ss, token); // Half and full moves + f += token; - set(f, is_chess960(), st, this_thread()); + set(f, is_chess960(), st, this_thread()); - assert(pos_is_ok()); + assert(pos_is_ok()); } @@ -1265,58 +1206,50 @@ void Position::flip() { bool Position::pos_is_ok() const { - constexpr bool Fast = true; // Quick (default) or full check? - - if ( (sideToMove != WHITE && sideToMove != BLACK) - || piece_on(square(WHITE)) != W_KING - || piece_on(square(BLACK)) != B_KING - || ( ep_square() != SQ_NONE - && relative_rank(sideToMove, ep_square()) != RANK_6)) - assert(0 && "pos_is_ok: Default"); - - if (Fast) - return true; - - if ( pieceCount[W_KING] != 1 - || pieceCount[B_KING] != 1 - || attackers_to(square(~sideToMove)) & pieces(sideToMove)) - assert(0 && "pos_is_ok: Kings"); - - if ( (pieces(PAWN) & (Rank1BB | Rank8BB)) - || pieceCount[W_PAWN] > 8 - || pieceCount[B_PAWN] > 8) - assert(0 && "pos_is_ok: Pawns"); - - if ( (pieces(WHITE) & pieces(BLACK)) - || (pieces(WHITE) | pieces(BLACK)) != pieces() - || popcount(pieces(WHITE)) > 16 - || popcount(pieces(BLACK)) > 16) - assert(0 && "pos_is_ok: Bitboards"); - - for (PieceType p1 = PAWN; p1 <= KING; ++p1) - for (PieceType p2 = PAWN; p2 <= KING; ++p2) - if (p1 != p2 && (pieces(p1) & pieces(p2))) - assert(0 && "pos_is_ok: Bitboards"); - - - for (Piece pc : Pieces) - if ( pieceCount[pc] != popcount(pieces(color_of(pc), type_of(pc))) - || pieceCount[pc] != std::count(board, board + SQUARE_NB, pc)) - assert(0 && "pos_is_ok: Pieces"); - - for (Color c : { WHITE, BLACK }) - for (CastlingRights cr : {c & KING_SIDE, c & QUEEN_SIDE}) - { - if (!can_castle(cr)) - continue; - - if ( piece_on(castlingRookSquare[cr]) != make_piece(c, ROOK) - || castlingRightsMask[castlingRookSquare[cr]] != cr - || (castlingRightsMask[square(c)] & cr) != cr) - assert(0 && "pos_is_ok: Castling"); - } - - return true; + constexpr bool Fast = true; // Quick (default) or full check? + + if ((sideToMove != WHITE && sideToMove != BLACK) || piece_on(square(WHITE)) != W_KING + || piece_on(square(BLACK)) != B_KING + || (ep_square() != SQ_NONE && relative_rank(sideToMove, ep_square()) != RANK_6)) + assert(0 && "pos_is_ok: Default"); + + if (Fast) + return true; + + if (pieceCount[W_KING] != 1 || pieceCount[B_KING] != 1 + || attackers_to(square(~sideToMove)) & pieces(sideToMove)) + assert(0 && "pos_is_ok: Kings"); + + if ((pieces(PAWN) & (Rank1BB | Rank8BB)) || pieceCount[W_PAWN] > 8 || pieceCount[B_PAWN] > 8) + assert(0 && "pos_is_ok: Pawns"); + + if ((pieces(WHITE) & pieces(BLACK)) || (pieces(WHITE) | pieces(BLACK)) != pieces() + || popcount(pieces(WHITE)) > 16 || popcount(pieces(BLACK)) > 16) + assert(0 && "pos_is_ok: Bitboards"); + + for (PieceType p1 = PAWN; p1 <= KING; ++p1) + for (PieceType p2 = PAWN; p2 <= KING; ++p2) + if (p1 != p2 && (pieces(p1) & pieces(p2))) + assert(0 && "pos_is_ok: Bitboards"); + + + for (Piece pc : Pieces) + if (pieceCount[pc] != popcount(pieces(color_of(pc), type_of(pc))) + || pieceCount[pc] != std::count(board, board + SQUARE_NB, pc)) + assert(0 && "pos_is_ok: Pieces"); + + for (Color c : {WHITE, BLACK}) + for (CastlingRights cr : {c & KING_SIDE, c & QUEEN_SIDE}) { + if (!can_castle(cr)) + continue; + + if (piece_on(castlingRookSquare[cr]) != make_piece(c, ROOK) + || castlingRightsMask[castlingRookSquare[cr]] != cr + || (castlingRightsMask[square(c)] & cr) != cr) + assert(0 && "pos_is_ok: Castling"); + } + + return true; } -} // namespace Stockfish +} // namespace Stockfish diff --git a/src/position.h b/src/position.h index 23fd5bf5688..2651a0af68f 100644 --- a/src/position.h +++ b/src/position.h @@ -37,27 +37,27 @@ namespace Stockfish { struct StateInfo { - // Copied when making a move - Key materialKey; - Value nonPawnMaterial[COLOR_NB]; - int castlingRights; - int rule50; - int pliesFromNull; - Square epSquare; - - // Not copied when making a move (will be recomputed anyhow) - Key key; - Bitboard checkersBB; - StateInfo* previous; - Bitboard blockersForKing[COLOR_NB]; - Bitboard pinners[COLOR_NB]; - Bitboard checkSquares[PIECE_TYPE_NB]; - Piece capturedPiece; - int repetition; - - // Used by NNUE - Eval::NNUE::Accumulator accumulator; - DirtyPiece dirtyPiece; + // Copied when making a move + Key materialKey; + Value nonPawnMaterial[COLOR_NB]; + int castlingRights; + int rule50; + int pliesFromNull; + Square epSquare; + + // Not copied when making a move (will be recomputed anyhow) + Key key; + Bitboard checkersBB; + StateInfo* previous; + Bitboard blockersForKing[COLOR_NB]; + Bitboard pinners[COLOR_NB]; + Bitboard checkSquares[PIECE_TYPE_NB]; + Piece capturedPiece; + int repetition; + + // Used by NNUE + Eval::NNUE::Accumulator accumulator; + DirtyPiece dirtyPiece; }; @@ -75,329 +75,289 @@ using StateListPtr = std::unique_ptr>; class Thread; class Position { -public: - static void init(); - - Position() = default; - Position(const Position&) = delete; - Position& operator=(const Position&) = delete; - - // FEN string input/output - Position& set(const std::string& fenStr, bool isChess960, StateInfo* si, Thread* th); - Position& set(const std::string& code, Color c, StateInfo* si); - std::string fen() const; - - // Position representation - Bitboard pieces(PieceType pt = ALL_PIECES) const; - template Bitboard pieces(PieceType pt, PieceTypes... pts) const; - Bitboard pieces(Color c) const; - template Bitboard pieces(Color c, PieceTypes... pts) const; - Piece piece_on(Square s) const; - Square ep_square() const; - bool empty(Square s) const; - template int count(Color c) const; - template int count() const; - template Square square(Color c) const; - - // Castling - CastlingRights castling_rights(Color c) const; - bool can_castle(CastlingRights cr) const; - bool castling_impeded(CastlingRights cr) const; - Square castling_rook_square(CastlingRights cr) const; - - // Checking - Bitboard checkers() const; - Bitboard blockers_for_king(Color c) const; - Bitboard check_squares(PieceType pt) const; - Bitboard pinners(Color c) const; - - // Attacks to/from a given square - Bitboard attackers_to(Square s) const; - Bitboard attackers_to(Square s, Bitboard occupied) const; - void update_slider_blockers(Color c) const; - template Bitboard attacks_by(Color c) const; - - // Properties of moves - bool legal(Move m) const; - bool pseudo_legal(const Move m) const; - bool capture(Move m) const; - bool capture_stage(Move m) const; - bool gives_check(Move m) const; - Piece moved_piece(Move m) const; - Piece captured_piece() const; - - // Doing and undoing moves - void do_move(Move m, StateInfo& newSt); - void do_move(Move m, StateInfo& newSt, bool givesCheck); - void undo_move(Move m); - void do_null_move(StateInfo& newSt); - void undo_null_move(); - - // Static Exchange Evaluation - bool see_ge(Move m, Value threshold = VALUE_ZERO) const; - - // Accessing hash keys - Key key() const; - Key key_after(Move m) const; - Key material_key() const; - - // Other properties of the position - Color side_to_move() const; - int game_ply() const; - bool is_chess960() const; - Thread* this_thread() const; - bool is_draw(int ply) const; - bool has_game_cycle(int ply) const; - bool has_repeated() const; - int rule50_count() const; - Value non_pawn_material(Color c) const; - Value non_pawn_material() const; - - // Position consistency check, for debugging - bool pos_is_ok() const; - void flip(); - - // Used by NNUE - StateInfo* state() const; - - void put_piece(Piece pc, Square s); - void remove_piece(Square s); - -private: - // Initialization helpers (used while setting up a position) - void set_castling_right(Color c, Square rfrom); - void set_state() const; - void set_check_info() const; - - // Other helpers - void move_piece(Square from, Square to); - template - void do_castling(Color us, Square from, Square& to, Square& rfrom, Square& rto); - template - Key adjust_key50(Key k) const; - - // Data members - Piece board[SQUARE_NB]; - Bitboard byTypeBB[PIECE_TYPE_NB]; - Bitboard byColorBB[COLOR_NB]; - int pieceCount[PIECE_NB]; - int castlingRightsMask[SQUARE_NB]; - Square castlingRookSquare[CASTLING_RIGHT_NB]; - Bitboard castlingPath[CASTLING_RIGHT_NB]; - Thread* thisThread; - StateInfo* st; - int gamePly; - Color sideToMove; - bool chess960; + public: + static void init(); + + Position() = default; + Position(const Position&) = delete; + Position& operator=(const Position&) = delete; + + // FEN string input/output + Position& set(const std::string& fenStr, bool isChess960, StateInfo* si, Thread* th); + Position& set(const std::string& code, Color c, StateInfo* si); + std::string fen() const; + + // Position representation + Bitboard pieces(PieceType pt = ALL_PIECES) const; + template + Bitboard pieces(PieceType pt, PieceTypes... pts) const; + Bitboard pieces(Color c) const; + template + Bitboard pieces(Color c, PieceTypes... pts) const; + Piece piece_on(Square s) const; + Square ep_square() const; + bool empty(Square s) const; + template + int count(Color c) const; + template + int count() const; + template + Square square(Color c) const; + + // Castling + CastlingRights castling_rights(Color c) const; + bool can_castle(CastlingRights cr) const; + bool castling_impeded(CastlingRights cr) const; + Square castling_rook_square(CastlingRights cr) const; + + // Checking + Bitboard checkers() const; + Bitboard blockers_for_king(Color c) const; + Bitboard check_squares(PieceType pt) const; + Bitboard pinners(Color c) const; + + // Attacks to/from a given square + Bitboard attackers_to(Square s) const; + Bitboard attackers_to(Square s, Bitboard occupied) const; + void update_slider_blockers(Color c) const; + template + Bitboard attacks_by(Color c) const; + + // Properties of moves + bool legal(Move m) const; + bool pseudo_legal(const Move m) const; + bool capture(Move m) const; + bool capture_stage(Move m) const; + bool gives_check(Move m) const; + Piece moved_piece(Move m) const; + Piece captured_piece() const; + + // Doing and undoing moves + void do_move(Move m, StateInfo& newSt); + void do_move(Move m, StateInfo& newSt, bool givesCheck); + void undo_move(Move m); + void do_null_move(StateInfo& newSt); + void undo_null_move(); + + // Static Exchange Evaluation + bool see_ge(Move m, Value threshold = VALUE_ZERO) const; + + // Accessing hash keys + Key key() const; + Key key_after(Move m) const; + Key material_key() const; + + // Other properties of the position + Color side_to_move() const; + int game_ply() const; + bool is_chess960() const; + Thread* this_thread() const; + bool is_draw(int ply) const; + bool has_game_cycle(int ply) const; + bool has_repeated() const; + int rule50_count() const; + Value non_pawn_material(Color c) const; + Value non_pawn_material() const; + + // Position consistency check, for debugging + bool pos_is_ok() const; + void flip(); + + // Used by NNUE + StateInfo* state() const; + + void put_piece(Piece pc, Square s); + void remove_piece(Square s); + + private: + // Initialization helpers (used while setting up a position) + void set_castling_right(Color c, Square rfrom); + void set_state() const; + void set_check_info() const; + + // Other helpers + void move_piece(Square from, Square to); + template + void do_castling(Color us, Square from, Square& to, Square& rfrom, Square& rto); + template + Key adjust_key50(Key k) const; + + // Data members + Piece board[SQUARE_NB]; + Bitboard byTypeBB[PIECE_TYPE_NB]; + Bitboard byColorBB[COLOR_NB]; + int pieceCount[PIECE_NB]; + int castlingRightsMask[SQUARE_NB]; + Square castlingRookSquare[CASTLING_RIGHT_NB]; + Bitboard castlingPath[CASTLING_RIGHT_NB]; + Thread* thisThread; + StateInfo* st; + int gamePly; + Color sideToMove; + bool chess960; }; std::ostream& operator<<(std::ostream& os, const Position& pos); -inline Color Position::side_to_move() const { - return sideToMove; -} +inline Color Position::side_to_move() const { return sideToMove; } inline Piece Position::piece_on(Square s) const { - assert(is_ok(s)); - return board[s]; + assert(is_ok(s)); + return board[s]; } -inline bool Position::empty(Square s) const { - return piece_on(s) == NO_PIECE; -} +inline bool Position::empty(Square s) const { return piece_on(s) == NO_PIECE; } -inline Piece Position::moved_piece(Move m) const { - return piece_on(from_sq(m)); -} +inline Piece Position::moved_piece(Move m) const { return piece_on(from_sq(m)); } -inline Bitboard Position::pieces(PieceType pt) const { - return byTypeBB[pt]; -} +inline Bitboard Position::pieces(PieceType pt) const { return byTypeBB[pt]; } -template +template inline Bitboard Position::pieces(PieceType pt, PieceTypes... pts) const { - return pieces(pt) | pieces(pts...); + return pieces(pt) | pieces(pts...); } -inline Bitboard Position::pieces(Color c) const { - return byColorBB[c]; -} +inline Bitboard Position::pieces(Color c) const { return byColorBB[c]; } -template +template inline Bitboard Position::pieces(Color c, PieceTypes... pts) const { - return pieces(c) & pieces(pts...); + return pieces(c) & pieces(pts...); } -template inline int Position::count(Color c) const { - return pieceCount[make_piece(c, Pt)]; +template +inline int Position::count(Color c) const { + return pieceCount[make_piece(c, Pt)]; } -template inline int Position::count() const { - return count(WHITE) + count(BLACK); +template +inline int Position::count() const { + return count(WHITE) + count(BLACK); } -template inline Square Position::square(Color c) const { - assert(count(c) == 1); - return lsb(pieces(c, Pt)); +template +inline Square Position::square(Color c) const { + assert(count(c) == 1); + return lsb(pieces(c, Pt)); } -inline Square Position::ep_square() const { - return st->epSquare; -} +inline Square Position::ep_square() const { return st->epSquare; } -inline bool Position::can_castle(CastlingRights cr) const { - return st->castlingRights & cr; -} +inline bool Position::can_castle(CastlingRights cr) const { return st->castlingRights & cr; } inline CastlingRights Position::castling_rights(Color c) const { - return c & CastlingRights(st->castlingRights); + return c & CastlingRights(st->castlingRights); } inline bool Position::castling_impeded(CastlingRights cr) const { - assert(cr == WHITE_OO || cr == WHITE_OOO || cr == BLACK_OO || cr == BLACK_OOO); + assert(cr == WHITE_OO || cr == WHITE_OOO || cr == BLACK_OO || cr == BLACK_OOO); - return pieces() & castlingPath[cr]; + return pieces() & castlingPath[cr]; } inline Square Position::castling_rook_square(CastlingRights cr) const { - assert(cr == WHITE_OO || cr == WHITE_OOO || cr == BLACK_OO || cr == BLACK_OOO); + assert(cr == WHITE_OO || cr == WHITE_OOO || cr == BLACK_OO || cr == BLACK_OOO); - return castlingRookSquare[cr]; + return castlingRookSquare[cr]; } -inline Bitboard Position::attackers_to(Square s) const { - return attackers_to(s, pieces()); -} +inline Bitboard Position::attackers_to(Square s) const { return attackers_to(s, pieces()); } template inline Bitboard Position::attacks_by(Color c) const { - if constexpr (Pt == PAWN) - return c == WHITE ? pawn_attacks_bb(pieces(WHITE, PAWN)) - : pawn_attacks_bb(pieces(BLACK, PAWN)); - else - { - Bitboard threats = 0; - Bitboard attackers = pieces(c, Pt); - while (attackers) - threats |= attacks_bb(pop_lsb(attackers), pieces()); - return threats; - } + if constexpr (Pt == PAWN) + return c == WHITE ? pawn_attacks_bb(pieces(WHITE, PAWN)) + : pawn_attacks_bb(pieces(BLACK, PAWN)); + else { + Bitboard threats = 0; + Bitboard attackers = pieces(c, Pt); + while (attackers) + threats |= attacks_bb(pop_lsb(attackers), pieces()); + return threats; + } } -inline Bitboard Position::checkers() const { - return st->checkersBB; -} +inline Bitboard Position::checkers() const { return st->checkersBB; } -inline Bitboard Position::blockers_for_king(Color c) const { - return st->blockersForKing[c]; -} +inline Bitboard Position::blockers_for_king(Color c) const { return st->blockersForKing[c]; } -inline Bitboard Position::pinners(Color c) const { - return st->pinners[c]; -} +inline Bitboard Position::pinners(Color c) const { return st->pinners[c]; } -inline Bitboard Position::check_squares(PieceType pt) const { - return st->checkSquares[pt]; -} +inline Bitboard Position::check_squares(PieceType pt) const { return st->checkSquares[pt]; } -inline Key Position::key() const { - return adjust_key50(st->key); -} +inline Key Position::key() const { return adjust_key50(st->key); } template -inline Key Position::adjust_key50(Key k) const -{ - return st->rule50 < 14 - AfterMove - ? k : k ^ make_key((st->rule50 - (14 - AfterMove)) / 8); +inline Key Position::adjust_key50(Key k) const { + return st->rule50 < 14 - AfterMove ? k : k ^ make_key((st->rule50 - (14 - AfterMove)) / 8); } -inline Key Position::material_key() const { - return st->materialKey; -} +inline Key Position::material_key() const { return st->materialKey; } -inline Value Position::non_pawn_material(Color c) const { - return st->nonPawnMaterial[c]; -} +inline Value Position::non_pawn_material(Color c) const { return st->nonPawnMaterial[c]; } inline Value Position::non_pawn_material() const { - return non_pawn_material(WHITE) + non_pawn_material(BLACK); + return non_pawn_material(WHITE) + non_pawn_material(BLACK); } -inline int Position::game_ply() const { - return gamePly; -} +inline int Position::game_ply() const { return gamePly; } -inline int Position::rule50_count() const { - return st->rule50; -} +inline int Position::rule50_count() const { return st->rule50; } -inline bool Position::is_chess960() const { - return chess960; -} +inline bool Position::is_chess960() const { return chess960; } inline bool Position::capture(Move m) const { - assert(is_ok(m)); - return (!empty(to_sq(m)) && type_of(m) != CASTLING) - || type_of(m) == EN_PASSANT; + assert(is_ok(m)); + return (!empty(to_sq(m)) && type_of(m) != CASTLING) || type_of(m) == EN_PASSANT; } // Returns true if a move is generated from the capture stage, having also // queen promotions covered, i.e. consistency with the capture stage move generation // is needed to avoid the generation of duplicate moves. inline bool Position::capture_stage(Move m) const { - assert(is_ok(m)); - return capture(m) || promotion_type(m) == QUEEN; + assert(is_ok(m)); + return capture(m) || promotion_type(m) == QUEEN; } -inline Piece Position::captured_piece() const { - return st->capturedPiece; -} +inline Piece Position::captured_piece() const { return st->capturedPiece; } -inline Thread* Position::this_thread() const { - return thisThread; -} +inline Thread* Position::this_thread() const { return thisThread; } inline void Position::put_piece(Piece pc, Square s) { - board[s] = pc; - byTypeBB[ALL_PIECES] |= byTypeBB[type_of(pc)] |= s; - byColorBB[color_of(pc)] |= s; - pieceCount[pc]++; - pieceCount[make_piece(color_of(pc), ALL_PIECES)]++; + board[s] = pc; + byTypeBB[ALL_PIECES] |= byTypeBB[type_of(pc)] |= s; + byColorBB[color_of(pc)] |= s; + pieceCount[pc]++; + pieceCount[make_piece(color_of(pc), ALL_PIECES)]++; } inline void Position::remove_piece(Square s) { - Piece pc = board[s]; - byTypeBB[ALL_PIECES] ^= s; - byTypeBB[type_of(pc)] ^= s; - byColorBB[color_of(pc)] ^= s; - board[s] = NO_PIECE; - pieceCount[pc]--; - pieceCount[make_piece(color_of(pc), ALL_PIECES)]--; + Piece pc = board[s]; + byTypeBB[ALL_PIECES] ^= s; + byTypeBB[type_of(pc)] ^= s; + byColorBB[color_of(pc)] ^= s; + board[s] = NO_PIECE; + pieceCount[pc]--; + pieceCount[make_piece(color_of(pc), ALL_PIECES)]--; } inline void Position::move_piece(Square from, Square to) { - Piece pc = board[from]; - Bitboard fromTo = from | to; - byTypeBB[ALL_PIECES] ^= fromTo; - byTypeBB[type_of(pc)] ^= fromTo; - byColorBB[color_of(pc)] ^= fromTo; - board[from] = NO_PIECE; - board[to] = pc; + Piece pc = board[from]; + Bitboard fromTo = from | to; + byTypeBB[ALL_PIECES] ^= fromTo; + byTypeBB[type_of(pc)] ^= fromTo; + byColorBB[color_of(pc)] ^= fromTo; + board[from] = NO_PIECE; + board[to] = pc; } -inline void Position::do_move(Move m, StateInfo& newSt) { - do_move(m, newSt, gives_check(m)); -} - -inline StateInfo* Position::state() const { +inline void Position::do_move(Move m, StateInfo& newSt) { do_move(m, newSt, gives_check(m)); } - return st; -} +inline StateInfo* Position::state() const { return st; } -} // namespace Stockfish +} // namespace Stockfish -#endif // #ifndef POSITION_H_INCLUDED +#endif // #ifndef POSITION_H_INCLUDED diff --git a/src/search.cpp b/src/search.cpp index baf819687c5..8efbfa5b8e6 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -49,15 +49,15 @@ namespace Stockfish { namespace Search { - LimitsType Limits; +LimitsType Limits; } namespace Tablebases { - int Cardinality; - bool RootInTB; - bool UseRule50; - Depth ProbeDepth; +int Cardinality; +bool RootInTB; +bool UseRule50; +Depth ProbeDepth; } namespace TB = Tablebases; @@ -68,52 +68,51 @@ using namespace Search; namespace { - // Different node types, used as a template parameter - enum NodeType { NonPV, PV, Root }; +// Different node types, used as a template parameter +enum NodeType { + NonPV, + PV, + Root +}; - // Futility margin - Value futility_margin(Depth d, bool noTtCutNode, bool improving) { +// Futility margin +Value futility_margin(Depth d, bool noTtCutNode, bool improving) { return Value((126 - 42 * noTtCutNode) * (d - improving)); - } +} - // Reductions lookup table initialized at startup - int Reductions[MAX_MOVES]; // [depth or moveNumber] +// Reductions lookup table initialized at startup +int Reductions[MAX_MOVES]; // [depth or moveNumber] - Depth reduction(bool i, Depth d, int mn, Value delta, Value rootDelta) { +Depth reduction(bool i, Depth d, int mn, Value delta, Value rootDelta) { int reductionScale = Reductions[d] * Reductions[mn]; - return (reductionScale + 1560 - int(delta) * 945 / int(rootDelta)) / 1024 - + (!i && reductionScale > 791); - } - - constexpr int futility_move_count(bool improving, Depth depth) { - return improving ? (3 + depth * depth) - : (3 + depth * depth) / 2; - } - - // History and stats update bonus, based on depth - int stat_bonus(Depth d) { - return std::min(334 * d - 531, 1538); - } - - // Add a small random component to draw evaluations to avoid 3-fold blindness - Value value_draw(const Thread* thisThread) { + return (reductionScale + 1560 - int(delta) * 945 / int(rootDelta)) / 1024 + + (!i && reductionScale > 791); +} + +constexpr int futility_move_count(bool improving, Depth depth) { + return improving ? (3 + depth * depth) : (3 + depth * depth) / 2; +} + +// History and stats update bonus, based on depth +int stat_bonus(Depth d) { return std::min(334 * d - 531, 1538); } + +// Add a small random component to draw evaluations to avoid 3-fold blindness +Value value_draw(const Thread* thisThread) { return VALUE_DRAW - 1 + Value(thisThread->nodes & 0x2); - } - - // Skill structure is used to implement strength limit. If we have a UCI_Elo, - // we convert it to an appropriate skill level, anchored to the Stash engine. - // This method is based on a fit of the Elo results for games played between - // Stockfish at various skill levels and various versions of the Stash engine. - // Skill 0 .. 19 now covers CCRL Blitz Elo from 1320 to 3190, approximately - // Reference: https://github.com/vondele/Stockfish/commit/a08b8d4e9711c2 - struct Skill { +} + +// Skill structure is used to implement strength limit. If we have a UCI_Elo, +// we convert it to an appropriate skill level, anchored to the Stash engine. +// This method is based on a fit of the Elo results for games played between +// Stockfish at various skill levels and various versions of the Stash engine. +// Skill 0 .. 19 now covers CCRL Blitz Elo from 1320 to 3190, approximately +// Reference: https://github.com/vondele/Stockfish/commit/a08b8d4e9711c2 +struct Skill { Skill(int skill_level, int uci_elo) { - if (uci_elo) - { + if (uci_elo) { double e = double(uci_elo - 1320) / (3190 - 1320); level = std::clamp((((37.2473 * e - 40.8525) * e + 22.2943) * e - 0.311438), 0.0, 19.0); - } - else + } else level = double(skill_level); } bool enabled() const { return level < 20.0; } @@ -121,40 +120,47 @@ namespace { Move pick_best(size_t multiPV); double level; - Move best = MOVE_NONE; - }; - - template - Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, bool cutNode); - - template - Value qsearch(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth = 0); - - Value value_to_tt(Value v, int ply); - Value value_from_tt(Value v, int ply, int r50c); - void update_pv(Move* pv, Move move, const Move* childPv); - void update_continuation_histories(Stack* ss, Piece pc, Square to, int bonus); - void update_quiet_stats(const Position& pos, Stack* ss, Move move, int bonus); - void update_all_stats(const Position& pos, Stack* ss, Move bestMove, Value bestValue, Value beta, Square prevSq, - Move* quietsSearched, int quietCount, Move* capturesSearched, int captureCount, Depth depth); - - // perft() is our utility to verify move generation. All the leaf nodes up - // to the given depth are generated and counted, and the sum is returned. - template - uint64_t perft(Position& pos, Depth depth) { + Move best = MOVE_NONE; +}; + +template +Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, bool cutNode); + +template +Value qsearch(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth = 0); + +Value value_to_tt(Value v, int ply); +Value value_from_tt(Value v, int ply, int r50c); +void update_pv(Move* pv, Move move, const Move* childPv); +void update_continuation_histories(Stack* ss, Piece pc, Square to, int bonus); +void update_quiet_stats(const Position& pos, Stack* ss, Move move, int bonus); +void update_all_stats(const Position& pos, + Stack* ss, + Move bestMove, + Value bestValue, + Value beta, + Square prevSq, + Move* quietsSearched, + int quietCount, + Move* capturesSearched, + int captureCount, + Depth depth); + +// perft() is our utility to verify move generation. All the leaf nodes up +// to the given depth are generated and counted, and the sum is returned. +template +uint64_t perft(Position& pos, Depth depth) { StateInfo st; ASSERT_ALIGNED(&st, Eval::NNUE::CacheLineSize); - uint64_t cnt, nodes = 0; + uint64_t cnt, nodes = 0; const bool leaf = (depth == 2); - for (const auto& m : MoveList(pos)) - { + for (const auto& m : MoveList(pos)) { if (Root && depth <= 1) cnt = 1, nodes++; - else - { + else { pos.do_move(m, st); cnt = leaf ? MoveList(pos).size() : perft(pos, depth - 1); nodes += cnt; @@ -164,17 +170,17 @@ namespace { sync_cout << UCI::move(m, pos.is_chess960()) << ": " << cnt << sync_endl; } return nodes; - } +} -} // namespace +} // namespace // Search::init() is called at startup to initialize various lookup tables void Search::init() { - for (int i = 1; i < MAX_MOVES; ++i) - Reductions[i] = int((20.37 + std::log(Threads.size()) / 2) * std::log(i)); + for (int i = 1; i < MAX_MOVES; ++i) + Reductions[i] = int((20.37 + std::log(Threads.size()) / 2) * std::log(i)); } @@ -182,12 +188,12 @@ void Search::init() { void Search::clear() { - Threads.main()->wait_for_search_finished(); + Threads.main()->wait_for_search_finished(); - Time.availableNodes = 0; - TT.clear(); - Threads.clear(); - Tablebases::init(Options["SyzygyPath"]); // Free mapped files + Time.availableNodes = 0; + TT.clear(); + Threads.clear(); + Tablebases::init(Options["SyzygyPath"]); // Free mapped files } @@ -196,75 +202,70 @@ void Search::clear() { void MainThread::search() { - if (Limits.perft) - { - nodes = perft(rootPos, Limits.perft); - sync_cout << "\nNodes searched: " << nodes << "\n" << sync_endl; - return; - } - - Color us = rootPos.side_to_move(); - Time.init(Limits, us, rootPos.game_ply()); - TT.new_search(); - - Eval::NNUE::verify(); - - if (rootMoves.empty()) - { - rootMoves.emplace_back(MOVE_NONE); - sync_cout << "info depth 0 score " - << UCI::value(rootPos.checkers() ? -VALUE_MATE : VALUE_DRAW) - << sync_endl; - } - else - { - Threads.start_searching(); // start non-main threads - Thread::search(); // main thread start searching - } - - // When we reach the maximum depth, we can arrive here without a raise of - // Threads.stop. However, if we are pondering or in an infinite search, - // the UCI protocol states that we shouldn't print the best move before the - // GUI sends a "stop" or "ponderhit" command. We therefore simply wait here - // until the GUI sends one of those commands. - - while (!Threads.stop && (ponder || Limits.infinite)) - {} // Busy wait for a stop or a ponder reset - - // Stop the threads if not already stopped (also raise the stop if - // "ponderhit" just reset Threads.ponder). - Threads.stop = true; - - // Wait until all threads have finished - Threads.wait_for_search_finished(); - - // When playing in 'nodes as time' mode, subtract the searched nodes from - // the available ones before exiting. - if (Limits.npmsec) - Time.availableNodes += Limits.inc[us] - Threads.nodes_searched(); - - Thread* bestThread = this; - Skill skill = Skill(Options["Skill Level"], Options["UCI_LimitStrength"] ? int(Options["UCI_Elo"]) : 0); - - if ( int(Options["MultiPV"]) == 1 - && !Limits.depth - && !skill.enabled() - && rootMoves[0].pv[0] != MOVE_NONE) - bestThread = Threads.get_best_thread(); - - bestPreviousScore = bestThread->rootMoves[0].score; - bestPreviousAverageScore = bestThread->rootMoves[0].averageScore; - - // Send again PV info if we have a new best thread - if (bestThread != this) - sync_cout << UCI::pv(bestThread->rootPos, bestThread->completedDepth) << sync_endl; - - sync_cout << "bestmove " << UCI::move(bestThread->rootMoves[0].pv[0], rootPos.is_chess960()); - - if (bestThread->rootMoves[0].pv.size() > 1 || bestThread->rootMoves[0].extract_ponder_from_tt(rootPos)) - std::cout << " ponder " << UCI::move(bestThread->rootMoves[0].pv[1], rootPos.is_chess960()); - - std::cout << sync_endl; + if (Limits.perft) { + nodes = perft(rootPos, Limits.perft); + sync_cout << "\nNodes searched: " << nodes << "\n" << sync_endl; + return; + } + + Color us = rootPos.side_to_move(); + Time.init(Limits, us, rootPos.game_ply()); + TT.new_search(); + + Eval::NNUE::verify(); + + if (rootMoves.empty()) { + rootMoves.emplace_back(MOVE_NONE); + sync_cout << "info depth 0 score " + << UCI::value(rootPos.checkers() ? -VALUE_MATE : VALUE_DRAW) << sync_endl; + } else { + Threads.start_searching(); // start non-main threads + Thread::search(); // main thread start searching + } + + // When we reach the maximum depth, we can arrive here without a raise of + // Threads.stop. However, if we are pondering or in an infinite search, + // the UCI protocol states that we shouldn't print the best move before the + // GUI sends a "stop" or "ponderhit" command. We therefore simply wait here + // until the GUI sends one of those commands. + + while (!Threads.stop && (ponder || Limits.infinite)) { + } // Busy wait for a stop or a ponder reset + + // Stop the threads if not already stopped (also raise the stop if + // "ponderhit" just reset Threads.ponder). + Threads.stop = true; + + // Wait until all threads have finished + Threads.wait_for_search_finished(); + + // When playing in 'nodes as time' mode, subtract the searched nodes from + // the available ones before exiting. + if (Limits.npmsec) + Time.availableNodes += Limits.inc[us] - Threads.nodes_searched(); + + Thread* bestThread = this; + Skill skill = + Skill(Options["Skill Level"], Options["UCI_LimitStrength"] ? int(Options["UCI_Elo"]) : 0); + + if (int(Options["MultiPV"]) == 1 && !Limits.depth && !skill.enabled() + && rootMoves[0].pv[0] != MOVE_NONE) + bestThread = Threads.get_best_thread(); + + bestPreviousScore = bestThread->rootMoves[0].score; + bestPreviousAverageScore = bestThread->rootMoves[0].averageScore; + + // Send again PV info if we have a new best thread + if (bestThread != this) + sync_cout << UCI::pv(bestThread->rootPos, bestThread->completedDepth) << sync_endl; + + sync_cout << "bestmove " << UCI::move(bestThread->rootMoves[0].pv[0], rootPos.is_chess960()); + + if (bestThread->rootMoves[0].pv.size() > 1 + || bestThread->rootMoves[0].extract_ponder_from_tt(rootPos)) + std::cout << " ponder " << UCI::move(bestThread->rootMoves[0].pv[1], rootPos.is_chess960()); + + std::cout << sync_endl; } @@ -274,267 +275,244 @@ void MainThread::search() { void Thread::search() { - // Allocate stack with extra size to allow access from (ss-7) to (ss+2): - // (ss-7) is needed for update_continuation_histories(ss-1) which accesses (ss-6), - // (ss+2) is needed for initialization of statScore and killers. - Stack stack[MAX_PLY+10], *ss = stack+7; - Move pv[MAX_PLY+1]; - Value alpha, beta, delta; - Move lastBestMove = MOVE_NONE; - Depth lastBestMoveDepth = 0; - MainThread* mainThread = (this == Threads.main() ? Threads.main() : nullptr); - double timeReduction = 1, totBestMoveChanges = 0; - Color us = rootPos.side_to_move(); - int iterIdx = 0; - - std::memset(ss-7, 0, 10 * sizeof(Stack)); - for (int i = 7; i > 0; --i) - { - (ss-i)->continuationHistory = &this->continuationHistory[0][0][NO_PIECE][0]; // Use as a sentinel - (ss-i)->staticEval = VALUE_NONE; - } - - for (int i = 0; i <= MAX_PLY + 2; ++i) - (ss+i)->ply = i; - - ss->pv = pv; - - bestValue = -VALUE_INFINITE; - - if (mainThread) - { - if (mainThread->bestPreviousScore == VALUE_INFINITE) - for (int i = 0; i < 4; ++i) - mainThread->iterValue[i] = VALUE_ZERO; - else - for (int i = 0; i < 4; ++i) - mainThread->iterValue[i] = mainThread->bestPreviousScore; - } - - size_t multiPV = size_t(Options["MultiPV"]); - Skill skill(Options["Skill Level"], Options["UCI_LimitStrength"] ? int(Options["UCI_Elo"]) : 0); - - // When playing with strength handicap enable MultiPV search that we will - // use behind-the-scenes to retrieve a set of possible moves. - if (skill.enabled()) - multiPV = std::max(multiPV, size_t(4)); - - multiPV = std::min(multiPV, rootMoves.size()); - - int searchAgainCounter = 0; - - // Iterative deepening loop until requested to stop or the target depth is reached - while ( ++rootDepth < MAX_PLY - && !Threads.stop - && !(Limits.depth && mainThread && rootDepth > Limits.depth)) - { - // Age out PV variability metric - if (mainThread) - totBestMoveChanges /= 2; - - // Save the last iteration's scores before the first PV line is searched and - // all the move scores except the (new) PV are set to -VALUE_INFINITE. - for (RootMove& rm : rootMoves) - rm.previousScore = rm.score; - - size_t pvFirst = 0; - pvLast = 0; - - if (!Threads.increaseDepth) - searchAgainCounter++; - - // MultiPV loop. We perform a full root search for each PV line - for (pvIdx = 0; pvIdx < multiPV && !Threads.stop; ++pvIdx) - { - if (pvIdx == pvLast) - { - pvFirst = pvLast; - for (pvLast++; pvLast < rootMoves.size(); pvLast++) - if (rootMoves[pvLast].tbRank != rootMoves[pvFirst].tbRank) - break; - } - - // Reset UCI info selDepth for each depth and each PV line - selDepth = 0; - - // Reset aspiration window starting size - Value prev = rootMoves[pvIdx].averageScore; - delta = Value(10) + int(prev) * prev / 17470; - alpha = std::max(prev - delta,-VALUE_INFINITE); - beta = std::min(prev + delta, VALUE_INFINITE); - - // Adjust optimism based on root move's previousScore (~4 Elo) - int opt = 113 * prev / (std::abs(prev) + 109); - optimism[ us] = Value(opt); - optimism[~us] = -optimism[us]; - - // Start with a small aspiration window and, in the case of a fail - // high/low, re-search with a bigger window until we don't fail - // high/low anymore. - int failedHighCnt = 0; - while (true) - { - // Adjust the effective depth searched, but ensure at least one effective increment for every - // four searchAgain steps (see issue #2717). - Depth adjustedDepth = std::max(1, rootDepth - failedHighCnt - 3 * (searchAgainCounter + 1) / 4); - bestValue = Stockfish::search(rootPos, ss, alpha, beta, adjustedDepth, false); - - // Bring the best move to the front. It is critical that sorting - // is done with a stable algorithm because all the values but the - // first and eventually the new best one is set to -VALUE_INFINITE - // and we want to keep the same order for all the moves except the - // new PV that goes to the front. Note that in the case of MultiPV - // search the already searched PV lines are preserved. - std::stable_sort(rootMoves.begin() + pvIdx, rootMoves.begin() + pvLast); - - // If search has been stopped, we break immediately. Sorting is - // safe because RootMoves is still valid, although it refers to - // the previous iteration. - if (Threads.stop) - break; - - // When failing high/low give some update (without cluttering - // the UI) before a re-search. - if ( mainThread - && multiPV == 1 - && (bestValue <= alpha || bestValue >= beta) - && Time.elapsed() > 3000) - sync_cout << UCI::pv(rootPos, rootDepth) << sync_endl; - - // In case of failing low/high increase aspiration window and - // re-search, otherwise exit the loop. - if (bestValue <= alpha) - { - beta = (alpha + beta) / 2; - alpha = std::max(bestValue - delta, -VALUE_INFINITE); - - failedHighCnt = 0; - if (mainThread) - mainThread->stopOnPonderhit = false; - } - else if (bestValue >= beta) - { - beta = std::min(bestValue + delta, VALUE_INFINITE); - ++failedHighCnt; - } - else - break; - - delta += delta / 3; - - assert(alpha >= -VALUE_INFINITE && beta <= VALUE_INFINITE); - } - - // Sort the PV lines searched so far and update the GUI - std::stable_sort(rootMoves.begin() + pvFirst, rootMoves.begin() + pvIdx + 1); - - if ( mainThread - && (Threads.stop || pvIdx + 1 == multiPV || Time.elapsed() > 3000)) - sync_cout << UCI::pv(rootPos, rootDepth) << sync_endl; - } - - if (!Threads.stop) - completedDepth = rootDepth; - - if (rootMoves[0].pv[0] != lastBestMove) - { - lastBestMove = rootMoves[0].pv[0]; - lastBestMoveDepth = rootDepth; - } - - // Have we found a "mate in x"? - if ( Limits.mate - && bestValue >= VALUE_MATE_IN_MAX_PLY - && VALUE_MATE - bestValue <= 2 * Limits.mate) - Threads.stop = true; - - if (!mainThread) - continue; - - // If the skill level is enabled and time is up, pick a sub-optimal best move - if (skill.enabled() && skill.time_to_pick(rootDepth)) - skill.pick_best(multiPV); - - // Use part of the gained time from a previous stable move for the current move - for (Thread* th : Threads) - { - totBestMoveChanges += th->bestMoveChanges; - th->bestMoveChanges = 0; - } - - // Do we have time for the next iteration? Can we stop searching now? - if ( Limits.use_time_management() - && !Threads.stop - && !mainThread->stopOnPonderhit) - { - double fallingEval = (69 + 13 * (mainThread->bestPreviousAverageScore - bestValue) - + 6 * (mainThread->iterValue[iterIdx] - bestValue)) / 619.6; - fallingEval = std::clamp(fallingEval, 0.5, 1.5); - - // If the bestMove is stable over several iterations, reduce time accordingly - timeReduction = lastBestMoveDepth + 8 < completedDepth ? 1.57 : 0.65; - double reduction = (1.4 + mainThread->previousTimeReduction) / (2.08 * timeReduction); - double bestMoveInstability = 1 + 1.8 * totBestMoveChanges / Threads.size(); - - double totalTime = Time.optimum() * fallingEval * reduction * bestMoveInstability; - - // Cap used time in case of a single legal move for a better viewer experience - if (rootMoves.size() == 1) - totalTime = std::min(500.0, totalTime); - - // Stop the search if we have exceeded the totalTime - if (Time.elapsed() > totalTime) - { - // If we are allowed to ponder do not stop the search now but - // keep pondering until the GUI sends "ponderhit" or "stop". - if (mainThread->ponder) - mainThread->stopOnPonderhit = true; - else - Threads.stop = true; - } - else if ( !mainThread->ponder - && Time.elapsed() > totalTime * 0.50) - Threads.increaseDepth = false; - else - Threads.increaseDepth = true; - } - - mainThread->iterValue[iterIdx] = bestValue; - iterIdx = (iterIdx + 1) & 3; - } - - if (!mainThread) - return; - - mainThread->previousTimeReduction = timeReduction; - - // If the skill level is enabled, swap the best PV line with the sub-optimal one - if (skill.enabled()) - std::swap(rootMoves[0], *std::find(rootMoves.begin(), rootMoves.end(), - skill.best ? skill.best : skill.pick_best(multiPV))); + // Allocate stack with extra size to allow access from (ss-7) to (ss+2): + // (ss-7) is needed for update_continuation_histories(ss-1) which accesses (ss-6), + // (ss+2) is needed for initialization of statScore and killers. + Stack stack[MAX_PLY + 10], *ss = stack + 7; + Move pv[MAX_PLY + 1]; + Value alpha, beta, delta; + Move lastBestMove = MOVE_NONE; + Depth lastBestMoveDepth = 0; + MainThread* mainThread = (this == Threads.main() ? Threads.main() : nullptr); + double timeReduction = 1, totBestMoveChanges = 0; + Color us = rootPos.side_to_move(); + int iterIdx = 0; + + std::memset(ss - 7, 0, 10 * sizeof(Stack)); + for (int i = 7; i > 0; --i) { + (ss - i)->continuationHistory = + &this->continuationHistory[0][0][NO_PIECE][0]; // Use as a sentinel + (ss - i)->staticEval = VALUE_NONE; + } + + for (int i = 0; i <= MAX_PLY + 2; ++i) + (ss + i)->ply = i; + + ss->pv = pv; + + bestValue = -VALUE_INFINITE; + + if (mainThread) { + if (mainThread->bestPreviousScore == VALUE_INFINITE) + for (int i = 0; i < 4; ++i) + mainThread->iterValue[i] = VALUE_ZERO; + else + for (int i = 0; i < 4; ++i) + mainThread->iterValue[i] = mainThread->bestPreviousScore; + } + + size_t multiPV = size_t(Options["MultiPV"]); + Skill skill(Options["Skill Level"], Options["UCI_LimitStrength"] ? int(Options["UCI_Elo"]) : 0); + + // When playing with strength handicap enable MultiPV search that we will + // use behind-the-scenes to retrieve a set of possible moves. + if (skill.enabled()) + multiPV = std::max(multiPV, size_t(4)); + + multiPV = std::min(multiPV, rootMoves.size()); + + int searchAgainCounter = 0; + + // Iterative deepening loop until requested to stop or the target depth is reached + while (++rootDepth < MAX_PLY && !Threads.stop + && !(Limits.depth && mainThread && rootDepth > Limits.depth)) { + // Age out PV variability metric + if (mainThread) + totBestMoveChanges /= 2; + + // Save the last iteration's scores before the first PV line is searched and + // all the move scores except the (new) PV are set to -VALUE_INFINITE. + for (RootMove& rm : rootMoves) + rm.previousScore = rm.score; + + size_t pvFirst = 0; + pvLast = 0; + + if (!Threads.increaseDepth) + searchAgainCounter++; + + // MultiPV loop. We perform a full root search for each PV line + for (pvIdx = 0; pvIdx < multiPV && !Threads.stop; ++pvIdx) { + if (pvIdx == pvLast) { + pvFirst = pvLast; + for (pvLast++; pvLast < rootMoves.size(); pvLast++) + if (rootMoves[pvLast].tbRank != rootMoves[pvFirst].tbRank) + break; + } + + // Reset UCI info selDepth for each depth and each PV line + selDepth = 0; + + // Reset aspiration window starting size + Value prev = rootMoves[pvIdx].averageScore; + delta = Value(10) + int(prev) * prev / 17470; + alpha = std::max(prev - delta, -VALUE_INFINITE); + beta = std::min(prev + delta, VALUE_INFINITE); + + // Adjust optimism based on root move's previousScore (~4 Elo) + int opt = 113 * prev / (std::abs(prev) + 109); + optimism[us] = Value(opt); + optimism[~us] = -optimism[us]; + + // Start with a small aspiration window and, in the case of a fail + // high/low, re-search with a bigger window until we don't fail + // high/low anymore. + int failedHighCnt = 0; + while (true) { + // Adjust the effective depth searched, but ensure at least one effective increment for every + // four searchAgain steps (see issue #2717). + Depth adjustedDepth = + std::max(1, rootDepth - failedHighCnt - 3 * (searchAgainCounter + 1) / 4); + bestValue = Stockfish::search(rootPos, ss, alpha, beta, adjustedDepth, false); + + // Bring the best move to the front. It is critical that sorting + // is done with a stable algorithm because all the values but the + // first and eventually the new best one is set to -VALUE_INFINITE + // and we want to keep the same order for all the moves except the + // new PV that goes to the front. Note that in the case of MultiPV + // search the already searched PV lines are preserved. + std::stable_sort(rootMoves.begin() + pvIdx, rootMoves.begin() + pvLast); + + // If search has been stopped, we break immediately. Sorting is + // safe because RootMoves is still valid, although it refers to + // the previous iteration. + if (Threads.stop) + break; + + // When failing high/low give some update (without cluttering + // the UI) before a re-search. + if (mainThread && multiPV == 1 && (bestValue <= alpha || bestValue >= beta) + && Time.elapsed() > 3000) + sync_cout << UCI::pv(rootPos, rootDepth) << sync_endl; + + // In case of failing low/high increase aspiration window and + // re-search, otherwise exit the loop. + if (bestValue <= alpha) { + beta = (alpha + beta) / 2; + alpha = std::max(bestValue - delta, -VALUE_INFINITE); + + failedHighCnt = 0; + if (mainThread) + mainThread->stopOnPonderhit = false; + } else if (bestValue >= beta) { + beta = std::min(bestValue + delta, VALUE_INFINITE); + ++failedHighCnt; + } else + break; + + delta += delta / 3; + + assert(alpha >= -VALUE_INFINITE && beta <= VALUE_INFINITE); + } + + // Sort the PV lines searched so far and update the GUI + std::stable_sort(rootMoves.begin() + pvFirst, rootMoves.begin() + pvIdx + 1); + + if (mainThread && (Threads.stop || pvIdx + 1 == multiPV || Time.elapsed() > 3000)) + sync_cout << UCI::pv(rootPos, rootDepth) << sync_endl; + } + + if (!Threads.stop) + completedDepth = rootDepth; + + if (rootMoves[0].pv[0] != lastBestMove) { + lastBestMove = rootMoves[0].pv[0]; + lastBestMoveDepth = rootDepth; + } + + // Have we found a "mate in x"? + if (Limits.mate && bestValue >= VALUE_MATE_IN_MAX_PLY + && VALUE_MATE - bestValue <= 2 * Limits.mate) + Threads.stop = true; + + if (!mainThread) + continue; + + // If the skill level is enabled and time is up, pick a sub-optimal best move + if (skill.enabled() && skill.time_to_pick(rootDepth)) + skill.pick_best(multiPV); + + // Use part of the gained time from a previous stable move for the current move + for (Thread* th : Threads) { + totBestMoveChanges += th->bestMoveChanges; + th->bestMoveChanges = 0; + } + + // Do we have time for the next iteration? Can we stop searching now? + if (Limits.use_time_management() && !Threads.stop && !mainThread->stopOnPonderhit) { + double fallingEval = (69 + 13 * (mainThread->bestPreviousAverageScore - bestValue) + + 6 * (mainThread->iterValue[iterIdx] - bestValue)) + / 619.6; + fallingEval = std::clamp(fallingEval, 0.5, 1.5); + + // If the bestMove is stable over several iterations, reduce time accordingly + timeReduction = lastBestMoveDepth + 8 < completedDepth ? 1.57 : 0.65; + double reduction = (1.4 + mainThread->previousTimeReduction) / (2.08 * timeReduction); + double bestMoveInstability = 1 + 1.8 * totBestMoveChanges / Threads.size(); + + double totalTime = Time.optimum() * fallingEval * reduction * bestMoveInstability; + + // Cap used time in case of a single legal move for a better viewer experience + if (rootMoves.size() == 1) + totalTime = std::min(500.0, totalTime); + + // Stop the search if we have exceeded the totalTime + if (Time.elapsed() > totalTime) { + // If we are allowed to ponder do not stop the search now but + // keep pondering until the GUI sends "ponderhit" or "stop". + if (mainThread->ponder) + mainThread->stopOnPonderhit = true; + else + Threads.stop = true; + } else if (!mainThread->ponder && Time.elapsed() > totalTime * 0.50) + Threads.increaseDepth = false; + else + Threads.increaseDepth = true; + } + + mainThread->iterValue[iterIdx] = bestValue; + iterIdx = (iterIdx + 1) & 3; + } + + if (!mainThread) + return; + + mainThread->previousTimeReduction = timeReduction; + + // If the skill level is enabled, swap the best PV line with the sub-optimal one + if (skill.enabled()) + std::swap(rootMoves[0], *std::find(rootMoves.begin(), rootMoves.end(), + skill.best ? skill.best : skill.pick_best(multiPV))); } namespace { - // search<>() is the main search function for both PV and non-PV nodes +// search<>() is the main search function for both PV and non-PV nodes - template - Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, bool cutNode) { +template +Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, bool cutNode) { - constexpr bool PvNode = nodeType != NonPV; + constexpr bool PvNode = nodeType != NonPV; constexpr bool rootNode = nodeType == Root; // Dive into quiescence search when the depth reaches zero if (depth <= 0) - return qsearch(pos, ss, alpha, beta); + return qsearch < PvNode ? PV : NonPV > (pos, ss, alpha, beta); // Check if we have an upcoming move that draws by repetition, or // if the opponent had an alternative move earlier to this position. - if ( !rootNode - && alpha < VALUE_DRAW - && pos.has_game_cycle(ss->ply)) - { + if (!rootNode && alpha < VALUE_DRAW && pos.has_game_cycle(ss->ply)) { alpha = value_draw(pos.this_thread()); if (alpha >= beta) return alpha; @@ -545,43 +523,40 @@ namespace { assert(0 < depth && depth < MAX_PLY); assert(!(PvNode && cutNode)); - Move pv[MAX_PLY+1], capturesSearched[32], quietsSearched[32]; + Move pv[MAX_PLY + 1], capturesSearched[32], quietsSearched[32]; StateInfo st; ASSERT_ALIGNED(&st, Eval::NNUE::CacheLineSize); TTEntry* tte; - Key posKey; - Move ttMove, move, excludedMove, bestMove; - Depth extension, newDepth; - Value bestValue, value, ttValue, eval, maxValue, probCutBeta; - bool givesCheck, improving, priorCapture, singularQuietLMR; - bool capture, moveCountPruning, ttCapture; - Piece movedPiece; - int moveCount, captureCount, quietCount; + Key posKey; + Move ttMove, move, excludedMove, bestMove; + Depth extension, newDepth; + Value bestValue, value, ttValue, eval, maxValue, probCutBeta; + bool givesCheck, improving, priorCapture, singularQuietLMR; + bool capture, moveCountPruning, ttCapture; + Piece movedPiece; + int moveCount, captureCount, quietCount; // Step 1. Initialize node Thread* thisThread = pos.this_thread(); ss->inCheck = pos.checkers(); priorCapture = pos.captured_piece(); Color us = pos.side_to_move(); - moveCount = captureCount = quietCount = ss->moveCount = 0; - bestValue = -VALUE_INFINITE; - maxValue = VALUE_INFINITE; + moveCount = captureCount = quietCount = ss->moveCount = 0; + bestValue = -VALUE_INFINITE; + maxValue = VALUE_INFINITE; // Check for the available remaining time if (thisThread == Threads.main()) static_cast(thisThread)->check_time(); // Used to send selDepth info to GUI (selDepth counts from 1, ply from 0) - if ( PvNode - && thisThread->selDepth < ss->ply + 1) + if (PvNode && thisThread->selDepth < ss->ply + 1) thisThread->selDepth = ss->ply + 1; - if (!rootNode) - { + if (!rootNode) { // Step 2. Check for aborted search and immediate draw - if ( Threads.stop.load(std::memory_order_relaxed) - || pos.is_draw(ss->ply) + if (Threads.stop.load(std::memory_order_relaxed) || pos.is_draw(ss->ply) || ss->ply >= MAX_PLY) return (ss->ply >= MAX_PLY && !ss->inCheck) ? evaluate(pos) : value_draw(pos.this_thread()); @@ -593,29 +568,29 @@ namespace { // signs apply also in the opposite condition of being mated instead of giving // mate. In this case, return a fail-high score. alpha = std::max(mated_in(ss->ply), alpha); - beta = std::min(mate_in(ss->ply+1), beta); + beta = std::min(mate_in(ss->ply + 1), beta); if (alpha >= beta) return alpha; - } - else + } else thisThread->rootDelta = beta - alpha; assert(0 <= ss->ply && ss->ply < MAX_PLY); - (ss+1)->excludedMove = bestMove = MOVE_NONE; - (ss+2)->killers[0] = (ss+2)->killers[1] = MOVE_NONE; - (ss+2)->cutoffCnt = 0; - ss->doubleExtensions = (ss-1)->doubleExtensions; - Square prevSq = is_ok((ss-1)->currentMove) ? to_sq((ss-1)->currentMove) : SQ_NONE; - ss->statScore = 0; + (ss + 1)->excludedMove = bestMove = MOVE_NONE; + (ss + 2)->killers[0] = (ss + 2)->killers[1] = MOVE_NONE; + (ss + 2)->cutoffCnt = 0; + ss->doubleExtensions = (ss - 1)->doubleExtensions; + Square prevSq = is_ok((ss - 1)->currentMove) ? to_sq((ss - 1)->currentMove) : SQ_NONE; + ss->statScore = 0; // Step 4. Transposition table lookup. excludedMove = ss->excludedMove; - posKey = pos.key(); - tte = TT.probe(posKey, ss->ttHit); - ttValue = ss->ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE; - ttMove = rootNode ? thisThread->rootMoves[thisThread->pvIdx].pv[0] - : ss->ttHit ? tte->move() : MOVE_NONE; + posKey = pos.key(); + tte = TT.probe(posKey, ss->ttHit); + ttValue = ss->ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE; + ttMove = rootNode ? thisThread->rootMoves[thisThread->pvIdx].pv[0] + : ss->ttHit ? tte->move() + : MOVE_NONE; ttCapture = ttMove && pos.capture_stage(ttMove); // At this point, if excluded, skip straight to step 6, static eval. However, @@ -624,30 +599,23 @@ namespace { ss->ttPv = PvNode || (ss->ttHit && tte->is_pv()); // At non-PV nodes we check for an early TT cutoff - if ( !PvNode - && !excludedMove - && tte->depth() > depth - && ttValue != VALUE_NONE // Possible in case of TT access race or if !ttHit - && (tte->bound() & (ttValue >= beta ? BOUND_LOWER : BOUND_UPPER))) - { + if (!PvNode && !excludedMove && tte->depth() > depth + && ttValue != VALUE_NONE // Possible in case of TT access race or if !ttHit + && (tte->bound() & (ttValue >= beta ? BOUND_LOWER : BOUND_UPPER))) { // If ttMove is quiet, update move sorting heuristics on TT hit (~2 Elo) - if (ttMove) - { - if (ttValue >= beta) - { + if (ttMove) { + if (ttValue >= beta) { // Bonus for a quiet ttMove that fails high (~2 Elo) if (!ttCapture) update_quiet_stats(pos, ss, ttMove, stat_bonus(depth)); // Extra penalty for early quiet moves of the previous ply (~0 Elo on STC, ~2 Elo on LTC) - if ( prevSq != SQ_NONE - && (ss-1)->moveCount <= 2 - && !priorCapture) - update_continuation_histories(ss-1, pos.piece_on(prevSq), prevSq, -stat_bonus(depth + 1)); + if (prevSq != SQ_NONE && (ss - 1)->moveCount <= 2 && !priorCapture) + update_continuation_histories(ss - 1, pos.piece_on(prevSq), prevSq, + -stat_bonus(depth + 1)); } // Penalty for a quiet ttMove that fails low (~1 Elo) - else if (!ttCapture) - { + else if (!ttCapture) { int penalty = -stat_bonus(depth); thisThread->mainHistory[us][from_to(ttMove)] << penalty; update_continuation_histories(ss, pos.moved_piece(ttMove), to_sq(ttMove), penalty); @@ -661,48 +629,41 @@ namespace { } // Step 5. Tablebases probe - if (!rootNode && !excludedMove && TB::Cardinality) - { + if (!rootNode && !excludedMove && TB::Cardinality) { int piecesCount = pos.count(); - if ( piecesCount <= TB::Cardinality - && (piecesCount < TB::Cardinality || depth >= TB::ProbeDepth) - && pos.rule50_count() == 0 - && !pos.can_castle(ANY_CASTLING)) - { + if (piecesCount <= TB::Cardinality + && (piecesCount < TB::Cardinality || depth >= TB::ProbeDepth) && pos.rule50_count() == 0 + && !pos.can_castle(ANY_CASTLING)) { TB::ProbeState err; - TB::WDLScore wdl = Tablebases::probe_wdl(pos, &err); + TB::WDLScore wdl = Tablebases::probe_wdl(pos, &err); // Force check of time on the next occasion if (thisThread == Threads.main()) static_cast(thisThread)->callsCnt = 0; - if (err != TB::ProbeState::FAIL) - { + if (err != TB::ProbeState::FAIL) { thisThread->tbHits.fetch_add(1, std::memory_order_relaxed); int drawScore = TB::UseRule50 ? 1 : 0; // use the range VALUE_MATE_IN_MAX_PLY to VALUE_TB_WIN_IN_MAX_PLY to score - value = wdl < -drawScore ? VALUE_MATED_IN_MAX_PLY + ss->ply + 1 - : wdl > drawScore ? VALUE_MATE_IN_MAX_PLY - ss->ply - 1 - : VALUE_DRAW + 2 * wdl * drawScore; + value = wdl < -drawScore ? VALUE_MATED_IN_MAX_PLY + ss->ply + 1 + : wdl > drawScore ? VALUE_MATE_IN_MAX_PLY - ss->ply - 1 + : VALUE_DRAW + 2 * wdl * drawScore; - Bound b = wdl < -drawScore ? BOUND_UPPER - : wdl > drawScore ? BOUND_LOWER : BOUND_EXACT; + Bound b = wdl < -drawScore ? BOUND_UPPER + : wdl > drawScore ? BOUND_LOWER + : BOUND_EXACT; - if ( b == BOUND_EXACT - || (b == BOUND_LOWER ? value >= beta : value <= alpha)) - { + if (b == BOUND_EXACT || (b == BOUND_LOWER ? value >= beta : value <= alpha)) { tte->save(posKey, value_to_tt(value, ss->ply), ss->ttPv, b, - std::min(MAX_PLY - 1, depth + 6), - MOVE_NONE, VALUE_NONE); + std::min(MAX_PLY - 1, depth + 6), MOVE_NONE, VALUE_NONE); return value; } - if (PvNode) - { + if (PvNode) { if (b == BOUND_LOWER) bestValue = value, alpha = std::max(alpha, bestValue); else @@ -715,21 +676,16 @@ namespace { CapturePieceToHistory& captureHistory = thisThread->captureHistory; // Step 6. Static evaluation of the position - if (ss->inCheck) - { + if (ss->inCheck) { // Skip early pruning when in check ss->staticEval = eval = VALUE_NONE; - improving = false; + improving = false; goto moves_loop; - } - else if (excludedMove) - { + } else if (excludedMove) { // Providing the hint that this node's accumulator will be used often brings significant Elo gain (~13 Elo) Eval::NNUE::hint_common_parent_position(pos); eval = ss->staticEval; - } - else if (ss->ttHit) - { + } else if (ss->ttHit) { // Never assume anything about values stored in TT ss->staticEval = eval = tte->eval(); if (eval == VALUE_NONE) @@ -738,24 +694,18 @@ namespace { Eval::NNUE::hint_common_parent_position(pos); // ttValue can be used as a better position evaluation (~7 Elo) - if ( ttValue != VALUE_NONE - && (tte->bound() & (ttValue > eval ? BOUND_LOWER : BOUND_UPPER))) + if (ttValue != VALUE_NONE && (tte->bound() & (ttValue > eval ? BOUND_LOWER : BOUND_UPPER))) eval = ttValue; - } - else - { + } else { ss->staticEval = eval = evaluate(pos); // Save static evaluation into the transposition table tte->save(posKey, VALUE_NONE, ss->ttPv, BOUND_NONE, DEPTH_NONE, MOVE_NONE, eval); } // Use static evaluation difference to improve quiet move ordering (~4 Elo) - if ( is_ok((ss-1)->currentMove) - && !(ss-1)->inCheck - && !priorCapture) - { - int bonus = std::clamp(-18 * int((ss-1)->staticEval + ss->staticEval), -1812, 1812); - thisThread->mainHistory[~us][from_to((ss-1)->currentMove)] << bonus; + if (is_ok((ss - 1)->currentMove) && !(ss - 1)->inCheck && !priorCapture) { + int bonus = std::clamp(-18 * int((ss - 1)->staticEval + ss->staticEval), -1812, 1812); + thisThread->mainHistory[~us][from_to((ss - 1)->currentMove)] << bonus; } // Set up the improving flag, which is true if current static evaluation is @@ -763,16 +713,15 @@ namespace { // check at our previous move we look at static evaluation at move prior to it // and if we were in check at move prior to it flag is set to true) and is // false otherwise. The improving flag is used in various pruning heuristics. - improving = (ss-2)->staticEval != VALUE_NONE ? ss->staticEval > (ss-2)->staticEval - : (ss-4)->staticEval != VALUE_NONE ? ss->staticEval > (ss-4)->staticEval - : true; + improving = (ss - 2)->staticEval != VALUE_NONE ? ss->staticEval > (ss - 2)->staticEval + : (ss - 4)->staticEval != VALUE_NONE ? ss->staticEval > (ss - 4)->staticEval + : true; // Step 7. Razoring (~1 Elo) // If eval is really low check with qsearch if it can exceed alpha, if it can't, // return a fail low. // Adjust razor margin according to cutoffCnt. (~1 Elo) - if (eval < alpha - 492 - (257 - 200 * ((ss+1)->cutoffCnt > 3)) * depth * depth) - { + if (eval < alpha - 492 - (257 - 200 * ((ss + 1)->cutoffCnt > 3)) * depth * depth) { value = qsearch(pos, ss, alpha - 1, alpha); if (value < alpha) return value; @@ -780,54 +729,45 @@ namespace { // Step 8. Futility pruning: child node (~40 Elo) // The depth condition is important for mate finding. - if ( !ss->ttPv - && depth < 9 - && eval - futility_margin(depth, cutNode && !ss->ttHit, improving) - (ss-1)->statScore / 321 >= beta - && eval >= beta - && eval < 29462 // smaller than TB wins - && !( !ttCapture - && ttMove)) + if (!ss->ttPv && depth < 9 + && eval - futility_margin(depth, cutNode && !ss->ttHit, improving) + - (ss - 1)->statScore / 321 + >= beta + && eval >= beta && eval < 29462 // smaller than TB wins + && !(!ttCapture && ttMove)) return eval; // Step 9. Null move search with verification search (~35 Elo) - if ( !PvNode - && (ss-1)->currentMove != MOVE_NULL - && (ss-1)->statScore < 17257 - && eval >= beta - && eval >= ss->staticEval - && ss->staticEval >= beta - 24 * depth + 281 - && !excludedMove - && pos.non_pawn_material(us) - && ss->ply >= thisThread->nmpMinPly - && beta > VALUE_TB_LOSS_IN_MAX_PLY) - { + if (!PvNode && (ss - 1)->currentMove != MOVE_NULL && (ss - 1)->statScore < 17257 && eval >= beta + && eval >= ss->staticEval && ss->staticEval >= beta - 24 * depth + 281 && !excludedMove + && pos.non_pawn_material(us) && ss->ply >= thisThread->nmpMinPly + && beta > VALUE_TB_LOSS_IN_MAX_PLY) { assert(eval - beta >= 0); // Null move dynamic reduction based on depth and eval Depth R = std::min(int(eval - beta) / 152, 6) + depth / 3 + 4; - ss->currentMove = MOVE_NULL; + ss->currentMove = MOVE_NULL; ss->continuationHistory = &thisThread->continuationHistory[0][0][NO_PIECE][0]; pos.do_null_move(st); - Value nullValue = -search(pos, ss+1, -beta, -beta+1, depth-R, !cutNode); + Value nullValue = -search(pos, ss + 1, -beta, -beta + 1, depth - R, !cutNode); pos.undo_null_move(); // Do not return unproven mate or TB scores - if (nullValue >= beta && nullValue < VALUE_TB_WIN_IN_MAX_PLY) - { + if (nullValue >= beta && nullValue < VALUE_TB_WIN_IN_MAX_PLY) { if (thisThread->nmpMinPly || depth < 14) return nullValue; - assert(!thisThread->nmpMinPly); // Recursive verification is not allowed + assert(!thisThread->nmpMinPly); // Recursive verification is not allowed // Do verification search at high depths, with null move pruning disabled // until ply exceeds nmpMinPly. - thisThread->nmpMinPly = ss->ply + 3 * (depth-R) / 4; + thisThread->nmpMinPly = ss->ply + 3 * (depth - R) / 4; - Value v = search(pos, ss, beta-1, beta, depth-R, false); + Value v = search(pos, ss, beta - 1, beta, depth - R, false); thisThread->nmpMinPly = 0; @@ -839,16 +779,13 @@ namespace { // Step 10. If the position doesn't have a ttMove, decrease depth by 2 // (or by 4 if the TT entry for the current position was hit and the stored depth is greater than or equal to the current depth). // Use qsearch if depth is equal or below zero (~9 Elo) - if ( PvNode - && !ttMove) + if (PvNode && !ttMove) depth -= 2 + 2 * (ss->ttHit && tte->depth() >= depth); if (depth <= 0) return qsearch(pos, ss, alpha, beta); - if ( cutNode - && depth >= 8 - && !ttMove) + if (cutNode && depth >= 8 && !ttMove) depth -= 2; probCutBeta = beta + 168 - 70 * improving; @@ -856,47 +793,43 @@ namespace { // Step 11. ProbCut (~10 Elo) // If we have a good enough capture (or queen promotion) and a reduced search returns a value // much above beta, we can (almost) safely prune the previous move. - if ( !PvNode - && depth > 3 - && abs(beta) < VALUE_TB_WIN_IN_MAX_PLY - // If value from transposition table is lower than probCutBeta, don't attempt probCut - // there and in further interactions with transposition table cutoff depth is set to depth - 3 - // because probCut search has depth set to depth - 4 but we also do a move before it - // So effective depth is equal to depth - 3 - && !( tte->depth() >= depth - 3 - && ttValue != VALUE_NONE - && ttValue < probCutBeta)) - { + if ( + !PvNode && depth > 3 + && abs(beta) < VALUE_TB_WIN_IN_MAX_PLY + // If value from transposition table is lower than probCutBeta, don't attempt probCut + // there and in further interactions with transposition table cutoff depth is set to depth - 3 + // because probCut search has depth set to depth - 4 but we also do a move before it + // So effective depth is equal to depth - 3 + && !(tte->depth() >= depth - 3 && ttValue != VALUE_NONE && ttValue < probCutBeta)) { assert(probCutBeta < VALUE_INFINITE); MovePicker mp(pos, ttMove, probCutBeta - ss->staticEval, &captureHistory); while ((move = mp.next_move()) != MOVE_NONE) - if (move != excludedMove && pos.legal(move)) - { + if (move != excludedMove && pos.legal(move)) { assert(pos.capture_stage(move)); ss->currentMove = move; - ss->continuationHistory = &thisThread->continuationHistory[ss->inCheck] - [true] - [pos.moved_piece(move)] - [to_sq(move)]; + ss->continuationHistory = + &thisThread + ->continuationHistory[ss->inCheck][true][pos.moved_piece(move)][to_sq(move)]; pos.do_move(move, st); // Perform a preliminary qsearch to verify that the move holds - value = -qsearch(pos, ss+1, -probCutBeta, -probCutBeta+1); + value = -qsearch(pos, ss + 1, -probCutBeta, -probCutBeta + 1); // If the qsearch held, perform the regular search if (value >= probCutBeta) - value = -search(pos, ss+1, -probCutBeta, -probCutBeta+1, depth - 4, !cutNode); + value = -search(pos, ss + 1, -probCutBeta, -probCutBeta + 1, depth - 4, + !cutNode); pos.undo_move(move); - if (value >= probCutBeta) - { + if (value >= probCutBeta) { // Save ProbCut data into transposition table - tte->save(posKey, value_to_tt(value, ss->ply), ss->ttPv, BOUND_LOWER, depth - 3, move, ss->staticEval); + tte->save(posKey, value_to_tt(value, ss->ply), ss->ttPv, BOUND_LOWER, depth - 3, + move, ss->staticEval); return value - (probCutBeta - beta); } } @@ -904,447 +837,391 @@ namespace { Eval::NNUE::hint_common_parent_position(pos); } -moves_loop: // When in check, search starts here +moves_loop: // When in check, search starts here // Step 12. A small Probcut idea, when we are in check (~4 Elo) probCutBeta = beta + 416; - if ( ss->inCheck - && !PvNode - && ttCapture - && (tte->bound() & BOUND_LOWER) - && tte->depth() >= depth - 4 - && ttValue >= probCutBeta - && abs(ttValue) < VALUE_TB_WIN_IN_MAX_PLY - && abs(beta) < VALUE_TB_WIN_IN_MAX_PLY) + if (ss->inCheck && !PvNode && ttCapture && (tte->bound() & BOUND_LOWER) + && tte->depth() >= depth - 4 && ttValue >= probCutBeta + && abs(ttValue) < VALUE_TB_WIN_IN_MAX_PLY && abs(beta) < VALUE_TB_WIN_IN_MAX_PLY) return probCutBeta; - const PieceToHistory* contHist[] = { (ss-1)->continuationHistory, (ss-2)->continuationHistory, - (ss-3)->continuationHistory, (ss-4)->continuationHistory, - nullptr , (ss-6)->continuationHistory }; + const PieceToHistory* contHist[] = {(ss - 1)->continuationHistory, + (ss - 2)->continuationHistory, + (ss - 3)->continuationHistory, + (ss - 4)->continuationHistory, + nullptr, + (ss - 6)->continuationHistory}; - Move countermove = prevSq != SQ_NONE ? thisThread->counterMoves[pos.piece_on(prevSq)][prevSq] : MOVE_NONE; + Move countermove = + prevSq != SQ_NONE ? thisThread->counterMoves[pos.piece_on(prevSq)][prevSq] : MOVE_NONE; - MovePicker mp(pos, ttMove, depth, &thisThread->mainHistory, - &captureHistory, - contHist, - countermove, - ss->killers); + MovePicker mp(pos, ttMove, depth, &thisThread->mainHistory, &captureHistory, contHist, + countermove, ss->killers); - value = bestValue; + value = bestValue; moveCountPruning = singularQuietLMR = false; // Indicate PvNodes that will probably fail low if the node was searched // at a depth equal to or greater than the current depth, and the result // of this search was a fail low. - bool likelyFailLow = PvNode - && ttMove - && (tte->bound() & BOUND_UPPER) - && tte->depth() >= depth; + bool likelyFailLow = PvNode && ttMove && (tte->bound() & BOUND_UPPER) && tte->depth() >= depth; // Step 13. Loop through all pseudo-legal moves until no moves remain // or a beta cutoff occurs. - while ((move = mp.next_move(moveCountPruning)) != MOVE_NONE) - { - assert(is_ok(move)); - - if (move == excludedMove) - continue; - - // Check for legality - if (!pos.legal(move)) - continue; - - // At root obey the "searchmoves" option and skip moves not listed in Root - // Move List. In MultiPV mode we also skip PV moves that have been already - // searched and those of lower "TB rank" if we are in a TB root position. - if (rootNode && !std::count(thisThread->rootMoves.begin() + thisThread->pvIdx, - thisThread->rootMoves.begin() + thisThread->pvLast, move)) - continue; - - ss->moveCount = ++moveCount; - - if (rootNode && thisThread == Threads.main() && Time.elapsed() > 3000) - sync_cout << "info depth " << depth - << " currmove " << UCI::move(move, pos.is_chess960()) - << " currmovenumber " << moveCount + thisThread->pvIdx << sync_endl; - if (PvNode) - (ss+1)->pv = nullptr; - - extension = 0; - capture = pos.capture_stage(move); - movedPiece = pos.moved_piece(move); - givesCheck = pos.gives_check(move); - - // Calculate new depth for this move - newDepth = depth - 1; - - Value delta = beta - alpha; - - Depth r = reduction(improving, depth, moveCount, delta, thisThread->rootDelta); - - // Step 14. Pruning at shallow depth (~120 Elo). - // Depth conditions are important for mate finding. - if ( !rootNode - && pos.non_pawn_material(us) - && bestValue > VALUE_TB_LOSS_IN_MAX_PLY) - { - // Skip quiet moves if movecount exceeds our FutilityMoveCount threshold (~8 Elo) - if (!moveCountPruning) - moveCountPruning = moveCount >= futility_move_count(improving, depth); - - // Reduced depth of the next LMR search - int lmrDepth = newDepth - r; - - if ( capture - || givesCheck) - { - // Futility pruning for captures (~2 Elo) - if ( !givesCheck - && lmrDepth < 7 - && !ss->inCheck - && ss->staticEval + 188 + 206 * lmrDepth + PieceValue[pos.piece_on(to_sq(move))] - + captureHistory[movedPiece][to_sq(move)][type_of(pos.piece_on(to_sq(move)))] / 7 < alpha) - continue; - - // SEE based pruning for captures and checks (~11 Elo) - if (!pos.see_ge(move, Value(-185) * depth)) - continue; - } - else - { - int history = (*contHist[0])[movedPiece][to_sq(move)] + while ((move = mp.next_move(moveCountPruning)) != MOVE_NONE) { + assert(is_ok(move)); + + if (move == excludedMove) + continue; + + // Check for legality + if (!pos.legal(move)) + continue; + + // At root obey the "searchmoves" option and skip moves not listed in Root + // Move List. In MultiPV mode we also skip PV moves that have been already + // searched and those of lower "TB rank" if we are in a TB root position. + if (rootNode + && !std::count(thisThread->rootMoves.begin() + thisThread->pvIdx, + thisThread->rootMoves.begin() + thisThread->pvLast, move)) + continue; + + ss->moveCount = ++moveCount; + + if (rootNode && thisThread == Threads.main() && Time.elapsed() > 3000) + sync_cout << "info depth " << depth << " currmove " + << UCI::move(move, pos.is_chess960()) << " currmovenumber " + << moveCount + thisThread->pvIdx << sync_endl; + if (PvNode) + (ss + 1)->pv = nullptr; + + extension = 0; + capture = pos.capture_stage(move); + movedPiece = pos.moved_piece(move); + givesCheck = pos.gives_check(move); + + // Calculate new depth for this move + newDepth = depth - 1; + + Value delta = beta - alpha; + + Depth r = reduction(improving, depth, moveCount, delta, thisThread->rootDelta); + + // Step 14. Pruning at shallow depth (~120 Elo). + // Depth conditions are important for mate finding. + if (!rootNode && pos.non_pawn_material(us) && bestValue > VALUE_TB_LOSS_IN_MAX_PLY) { + // Skip quiet moves if movecount exceeds our FutilityMoveCount threshold (~8 Elo) + if (!moveCountPruning) + moveCountPruning = moveCount >= futility_move_count(improving, depth); + + // Reduced depth of the next LMR search + int lmrDepth = newDepth - r; + + if (capture || givesCheck) { + // Futility pruning for captures (~2 Elo) + if (!givesCheck && lmrDepth < 7 && !ss->inCheck + && ss->staticEval + 188 + 206 * lmrDepth + PieceValue[pos.piece_on(to_sq(move))] + + captureHistory[movedPiece][to_sq(move)] + [type_of(pos.piece_on(to_sq(move)))] + / 7 + < alpha) + continue; + + // SEE based pruning for captures and checks (~11 Elo) + if (!pos.see_ge(move, Value(-185) * depth)) + continue; + } else { + int history = (*contHist[0])[movedPiece][to_sq(move)] + (*contHist[1])[movedPiece][to_sq(move)] + (*contHist[3])[movedPiece][to_sq(move)]; - // Continuation history based pruning (~2 Elo) - if ( lmrDepth < 6 - && history < -3232 * depth) - continue; - - history += 2 * thisThread->mainHistory[us][from_to(move)]; - - lmrDepth += history / 5793; - lmrDepth = std::max(lmrDepth, -2); - - // Futility pruning: parent node (~13 Elo) - if ( !ss->inCheck - && lmrDepth < 13 - && ss->staticEval + 115 + 122 * lmrDepth <= alpha) - continue; - - lmrDepth = std::max(lmrDepth, 0); - - // Prune moves with negative SEE (~4 Elo) - if (!pos.see_ge(move, Value(-27 * lmrDepth * lmrDepth))) - continue; - } - } - - // Step 15. Extensions (~100 Elo) - // We take care to not overdo to avoid search getting stuck. - if (ss->ply < thisThread->rootDepth * 2) - { - // Singular extension search (~94 Elo). If all moves but one fail low on a - // search of (alpha-s, beta-s), and just one fails high on (alpha, beta), - // then that move is singular and should be extended. To verify this we do - // a reduced search on all the other moves but the ttMove and if the result - // is lower than ttValue minus a margin, then we will extend the ttMove. Note - // that depth margin and singularBeta margin are known for having non-linear - // scaling. Their values are optimized to time controls of 180+1.8 and longer - // so changing them requires tests at this type of time controls. - if ( !rootNode - && depth >= 4 - (thisThread->completedDepth > 24) + 2 * (PvNode && tte->is_pv()) - && move == ttMove - && !excludedMove // Avoid recursive singular search - && abs(ttValue) < VALUE_TB_WIN_IN_MAX_PLY - && (tte->bound() & BOUND_LOWER) - && tte->depth() >= depth - 3) - { - Value singularBeta = ttValue - (64 + 57 * (ss->ttPv && !PvNode)) * depth / 64; - Depth singularDepth = (depth - 1) / 2; - - ss->excludedMove = move; - value = search(pos, ss, singularBeta - 1, singularBeta, singularDepth, cutNode); - ss->excludedMove = MOVE_NONE; - - if (value < singularBeta) - { - extension = 1; - singularQuietLMR = !ttCapture; - - // Avoid search explosion by limiting the number of double extensions - if ( !PvNode - && value < singularBeta - 18 - && ss->doubleExtensions <= 11) - { - extension = 2; - depth += depth < 15; - } - } - - // Multi-cut pruning - // Our ttMove is assumed to fail high, and now we failed high also on a - // reduced search without the ttMove. So we assume this expected cut-node - // is not singular, that multiple moves fail high, and we can prune the - // whole subtree by returning a softbound. - else if (singularBeta >= beta) - return singularBeta; - - // If the eval of ttMove is greater than beta, we reduce it (negative extension) (~7 Elo) - else if (ttValue >= beta) - extension = -2 - !PvNode; - - // If we are on a cutNode, reduce it based on depth (negative extension) (~1 Elo) - else if (cutNode) - extension = depth < 19 ? -2 : -1; - - // If the eval of ttMove is less than value, we reduce it (negative extension) (~1 Elo) - else if (ttValue <= value) - extension = -1; - } - - // Check extensions (~1 Elo) - else if ( givesCheck - && depth > 9) - extension = 1; - - // Quiet ttMove extensions (~1 Elo) - else if ( PvNode - && move == ttMove - && move == ss->killers[0] - && (*contHist[0])[movedPiece][to_sq(move)] >= 4194) - extension = 1; - } - - // Add extension to new depth - newDepth += extension; - ss->doubleExtensions = (ss-1)->doubleExtensions + (extension == 2); - - // Speculative prefetch as early as possible - prefetch(TT.first_entry(pos.key_after(move))); - - // Update the current move (this must be done after singular extension search) - ss->currentMove = move; - ss->continuationHistory = &thisThread->continuationHistory[ss->inCheck] - [capture] - [movedPiece] - [to_sq(move)]; - - // Step 16. Make the move - pos.do_move(move, st, givesCheck); - - // Decrease reduction if position is or has been on the PV (~4 Elo) - if ( ss->ttPv - && !likelyFailLow) - r -= cutNode && tte->depth() >= depth ? 3 : 2; - - // Decrease reduction if opponent's move count is high (~1 Elo) - if ((ss-1)->moveCount > 7) - r--; - - // Increase reduction for cut nodes (~3 Elo) - if (cutNode) - r += 2; - - // Increase reduction if ttMove is a capture (~3 Elo) - if (ttCapture) - r++; - - // Decrease reduction for PvNodes (~2 Elo) - if (PvNode) - r--; - - // Decrease reduction if ttMove has been singularly extended (~1 Elo) - if (singularQuietLMR) - r--; - - // Increase reduction on repetition (~1 Elo) - if ( move == (ss-4)->currentMove - && pos.has_repeated()) - r += 2; - - // Increase reduction if next ply has a lot of fail high (~5 Elo) - if ((ss+1)->cutoffCnt > 3) - r++; - - // Decrease reduction for first generated move (ttMove) - else if (move == ttMove) - r--; - - ss->statScore = 2 * thisThread->mainHistory[us][from_to(move)] - + (*contHist[0])[movedPiece][to_sq(move)] - + (*contHist[1])[movedPiece][to_sq(move)] - + (*contHist[3])[movedPiece][to_sq(move)] - - 3848; - - // Decrease/increase reduction for moves with a good/bad history (~25 Elo) - r -= ss->statScore / (10216 + 3855 * (depth > 5 && depth < 23)); - - // Step 17. Late moves reduction / extension (LMR, ~117 Elo) - // We use various heuristics for the sons of a node after the first son has - // been searched. In general, we would like to reduce them, but there are many - // cases where we extend a son if it has good chances to be "interesting". - if ( depth >= 2 - && moveCount > 1 + (PvNode && ss->ply <= 1) - && ( !ss->ttPv - || !capture - || (cutNode && (ss-1)->moveCount > 1))) - { - // In general we want to cap the LMR depth search at newDepth, but when - // reduction is negative, we allow this move a limited search extension - // beyond the first move depth. This may lead to hidden double extensions. - Depth d = std::clamp(newDepth - r, 1, newDepth + 1); - - value = -search(pos, ss+1, -(alpha+1), -alpha, d, true); - - // Do a full-depth search when reduced LMR search fails high - if ( value > alpha - && d < newDepth) - { - // Adjust full-depth search based on LMR results - if the result - // was good enough search deeper, if it was bad enough search shallower. - const bool doDeeperSearch = value > (bestValue + 51 + 10 * (newDepth - d)); - const bool doEvenDeeperSearch = value > alpha + 700 && ss->doubleExtensions <= 6; - const bool doShallowerSearch = value < bestValue + newDepth; - - ss->doubleExtensions = ss->doubleExtensions + doEvenDeeperSearch; - - newDepth += doDeeperSearch - doShallowerSearch + doEvenDeeperSearch; - - if (newDepth > d) - value = -search(pos, ss+1, -(alpha+1), -alpha, newDepth, !cutNode); - - int bonus = value <= alpha ? -stat_bonus(newDepth) - : value >= beta ? stat_bonus(newDepth) - : 0; - - update_continuation_histories(ss, movedPiece, to_sq(move), bonus); - } - } - - // Step 18. Full-depth search when LMR is skipped - else if (!PvNode || moveCount > 1) - { - // Increase reduction for cut nodes and not ttMove (~1 Elo) - if ( !ttMove - && cutNode) - r += 2; - - // Note that if expected reduction is high, we reduce search depth by 1 here - value = -search(pos, ss+1, -(alpha+1), -alpha, newDepth - (r > 3), !cutNode); - } - - // For PV nodes only, do a full PV search on the first move or after a fail high, - // otherwise let the parent node fail low with value <= alpha and try another move. - if ( PvNode - && (moveCount == 1 || value > alpha)) - { - (ss+1)->pv = pv; - (ss+1)->pv[0] = MOVE_NONE; - - value = -search(pos, ss+1, -beta, -alpha, newDepth, false); - } - - // Step 19. Undo move - pos.undo_move(move); - - assert(value > -VALUE_INFINITE && value < VALUE_INFINITE); - - // Step 20. Check for a new best move - // Finished searching the move. If a stop occurred, the return value of - // the search cannot be trusted, and we return immediately without - // updating best move, PV and TT. - if (Threads.stop.load(std::memory_order_relaxed)) - return VALUE_ZERO; - - if (rootNode) - { - RootMove& rm = *std::find(thisThread->rootMoves.begin(), - thisThread->rootMoves.end(), move); - - rm.averageScore = rm.averageScore != -VALUE_INFINITE ? (2 * value + rm.averageScore) / 3 : value; - - // PV move or new best move? - if (moveCount == 1 || value > alpha) - { - rm.score = rm.uciScore = value; - rm.selDepth = thisThread->selDepth; - rm.scoreLowerbound = rm.scoreUpperbound = false; - - if (value >= beta) - { - rm.scoreLowerbound = true; - rm.uciScore = beta; - } - else if (value <= alpha) - { - rm.scoreUpperbound = true; - rm.uciScore = alpha; - } - - rm.pv.resize(1); - - assert((ss+1)->pv); - - for (Move* m = (ss+1)->pv; *m != MOVE_NONE; ++m) - rm.pv.push_back(*m); - - // We record how often the best move has been changed in each iteration. - // This information is used for time management. In MultiPV mode, - // we must take care to only do this for the first PV line. - if ( moveCount > 1 - && !thisThread->pvIdx) - ++thisThread->bestMoveChanges; - } - else - // All other moves but the PV, are set to the lowest value: this - // is not a problem when sorting because the sort is stable and the - // move position in the list is preserved - just the PV is pushed up. - rm.score = -VALUE_INFINITE; - } - - if (value > bestValue) - { - bestValue = value; - - if (value > alpha) - { - bestMove = move; - - if (PvNode && !rootNode) // Update pv even in fail-high case - update_pv(ss->pv, move, (ss+1)->pv); - - if (value >= beta) - { - ss->cutoffCnt += 1 + !ttMove; - assert(value >= beta); // Fail high - break; - } - else - { - // Reduce other moves if we have found at least one score improvement (~2 Elo) - if ( depth > 2 - && depth < 12 - && beta < 13828 - && value > -11369) - depth -= 2; - - assert(depth > 0); - alpha = value; // Update alpha! Always alpha < beta - } - } - } - - // If the move is worse than some previously searched move, - // remember it, to update its stats later. - if (move != bestMove && moveCount <= 32) - { - if (capture) - capturesSearched[captureCount++] = move; - - else - quietsSearched[quietCount++] = move; - } + // Continuation history based pruning (~2 Elo) + if (lmrDepth < 6 && history < -3232 * depth) + continue; + + history += 2 * thisThread->mainHistory[us][from_to(move)]; + + lmrDepth += history / 5793; + lmrDepth = std::max(lmrDepth, -2); + + // Futility pruning: parent node (~13 Elo) + if (!ss->inCheck && lmrDepth < 13 && ss->staticEval + 115 + 122 * lmrDepth <= alpha) + continue; + + lmrDepth = std::max(lmrDepth, 0); + + // Prune moves with negative SEE (~4 Elo) + if (!pos.see_ge(move, Value(-27 * lmrDepth * lmrDepth))) + continue; + } + } + + // Step 15. Extensions (~100 Elo) + // We take care to not overdo to avoid search getting stuck. + if (ss->ply < thisThread->rootDepth * 2) { + // Singular extension search (~94 Elo). If all moves but one fail low on a + // search of (alpha-s, beta-s), and just one fails high on (alpha, beta), + // then that move is singular and should be extended. To verify this we do + // a reduced search on all the other moves but the ttMove and if the result + // is lower than ttValue minus a margin, then we will extend the ttMove. Note + // that depth margin and singularBeta margin are known for having non-linear + // scaling. Their values are optimized to time controls of 180+1.8 and longer + // so changing them requires tests at this type of time controls. + if (!rootNode + && depth >= 4 - (thisThread->completedDepth > 24) + 2 * (PvNode && tte->is_pv()) + && move == ttMove && !excludedMove // Avoid recursive singular search + && abs(ttValue) < VALUE_TB_WIN_IN_MAX_PLY && (tte->bound() & BOUND_LOWER) + && tte->depth() >= depth - 3) { + Value singularBeta = ttValue - (64 + 57 * (ss->ttPv && !PvNode)) * depth / 64; + Depth singularDepth = (depth - 1) / 2; + + ss->excludedMove = move; + value = + search(pos, ss, singularBeta - 1, singularBeta, singularDepth, cutNode); + ss->excludedMove = MOVE_NONE; + + if (value < singularBeta) { + extension = 1; + singularQuietLMR = !ttCapture; + + // Avoid search explosion by limiting the number of double extensions + if (!PvNode && value < singularBeta - 18 && ss->doubleExtensions <= 11) { + extension = 2; + depth += depth < 15; + } + } + + // Multi-cut pruning + // Our ttMove is assumed to fail high, and now we failed high also on a + // reduced search without the ttMove. So we assume this expected cut-node + // is not singular, that multiple moves fail high, and we can prune the + // whole subtree by returning a softbound. + else if (singularBeta >= beta) + return singularBeta; + + // If the eval of ttMove is greater than beta, we reduce it (negative extension) (~7 Elo) + else if (ttValue >= beta) + extension = -2 - !PvNode; + + // If we are on a cutNode, reduce it based on depth (negative extension) (~1 Elo) + else if (cutNode) + extension = depth < 19 ? -2 : -1; + + // If the eval of ttMove is less than value, we reduce it (negative extension) (~1 Elo) + else if (ttValue <= value) + extension = -1; + } + + // Check extensions (~1 Elo) + else if (givesCheck && depth > 9) + extension = 1; + + // Quiet ttMove extensions (~1 Elo) + else if (PvNode && move == ttMove && move == ss->killers[0] + && (*contHist[0])[movedPiece][to_sq(move)] >= 4194) + extension = 1; + } + + // Add extension to new depth + newDepth += extension; + ss->doubleExtensions = (ss - 1)->doubleExtensions + (extension == 2); + + // Speculative prefetch as early as possible + prefetch(TT.first_entry(pos.key_after(move))); + + // Update the current move (this must be done after singular extension search) + ss->currentMove = move; + ss->continuationHistory = + &thisThread->continuationHistory[ss->inCheck][capture][movedPiece][to_sq(move)]; + + // Step 16. Make the move + pos.do_move(move, st, givesCheck); + + // Decrease reduction if position is or has been on the PV (~4 Elo) + if (ss->ttPv && !likelyFailLow) + r -= cutNode && tte->depth() >= depth ? 3 : 2; + + // Decrease reduction if opponent's move count is high (~1 Elo) + if ((ss - 1)->moveCount > 7) + r--; + + // Increase reduction for cut nodes (~3 Elo) + if (cutNode) + r += 2; + + // Increase reduction if ttMove is a capture (~3 Elo) + if (ttCapture) + r++; + + // Decrease reduction for PvNodes (~2 Elo) + if (PvNode) + r--; + + // Decrease reduction if ttMove has been singularly extended (~1 Elo) + if (singularQuietLMR) + r--; + + // Increase reduction on repetition (~1 Elo) + if (move == (ss - 4)->currentMove && pos.has_repeated()) + r += 2; + + // Increase reduction if next ply has a lot of fail high (~5 Elo) + if ((ss + 1)->cutoffCnt > 3) + r++; + + // Decrease reduction for first generated move (ttMove) + else if (move == ttMove) + r--; + + ss->statScore = 2 * thisThread->mainHistory[us][from_to(move)] + + (*contHist[0])[movedPiece][to_sq(move)] + + (*contHist[1])[movedPiece][to_sq(move)] + + (*contHist[3])[movedPiece][to_sq(move)] - 3848; + + // Decrease/increase reduction for moves with a good/bad history (~25 Elo) + r -= ss->statScore / (10216 + 3855 * (depth > 5 && depth < 23)); + + // Step 17. Late moves reduction / extension (LMR, ~117 Elo) + // We use various heuristics for the sons of a node after the first son has + // been searched. In general, we would like to reduce them, but there are many + // cases where we extend a son if it has good chances to be "interesting". + if (depth >= 2 && moveCount > 1 + (PvNode && ss->ply <= 1) + && (!ss->ttPv || !capture || (cutNode && (ss - 1)->moveCount > 1))) { + // In general we want to cap the LMR depth search at newDepth, but when + // reduction is negative, we allow this move a limited search extension + // beyond the first move depth. This may lead to hidden double extensions. + Depth d = std::clamp(newDepth - r, 1, newDepth + 1); + + value = -search(pos, ss + 1, -(alpha + 1), -alpha, d, true); + + // Do a full-depth search when reduced LMR search fails high + if (value > alpha && d < newDepth) { + // Adjust full-depth search based on LMR results - if the result + // was good enough search deeper, if it was bad enough search shallower. + const bool doDeeperSearch = value > (bestValue + 51 + 10 * (newDepth - d)); + const bool doEvenDeeperSearch = value > alpha + 700 && ss->doubleExtensions <= 6; + const bool doShallowerSearch = value < bestValue + newDepth; + + ss->doubleExtensions = ss->doubleExtensions + doEvenDeeperSearch; + + newDepth += doDeeperSearch - doShallowerSearch + doEvenDeeperSearch; + + if (newDepth > d) + value = -search(pos, ss + 1, -(alpha + 1), -alpha, newDepth, !cutNode); + + int bonus = value <= alpha ? -stat_bonus(newDepth) + : value >= beta ? stat_bonus(newDepth) + : 0; + + update_continuation_histories(ss, movedPiece, to_sq(move), bonus); + } + } + + // Step 18. Full-depth search when LMR is skipped + else if (!PvNode || moveCount > 1) { + // Increase reduction for cut nodes and not ttMove (~1 Elo) + if (!ttMove && cutNode) + r += 2; + + // Note that if expected reduction is high, we reduce search depth by 1 here + value = -search(pos, ss + 1, -(alpha + 1), -alpha, newDepth - (r > 3), !cutNode); + } + + // For PV nodes only, do a full PV search on the first move or after a fail high, + // otherwise let the parent node fail low with value <= alpha and try another move. + if (PvNode && (moveCount == 1 || value > alpha)) { + (ss + 1)->pv = pv; + (ss + 1)->pv[0] = MOVE_NONE; + + value = -search(pos, ss + 1, -beta, -alpha, newDepth, false); + } + + // Step 19. Undo move + pos.undo_move(move); + + assert(value > -VALUE_INFINITE && value < VALUE_INFINITE); + + // Step 20. Check for a new best move + // Finished searching the move. If a stop occurred, the return value of + // the search cannot be trusted, and we return immediately without + // updating best move, PV and TT. + if (Threads.stop.load(std::memory_order_relaxed)) + return VALUE_ZERO; + + if (rootNode) { + RootMove& rm = + *std::find(thisThread->rootMoves.begin(), thisThread->rootMoves.end(), move); + + rm.averageScore = + rm.averageScore != -VALUE_INFINITE ? (2 * value + rm.averageScore) / 3 : value; + + // PV move or new best move? + if (moveCount == 1 || value > alpha) { + rm.score = rm.uciScore = value; + rm.selDepth = thisThread->selDepth; + rm.scoreLowerbound = rm.scoreUpperbound = false; + + if (value >= beta) { + rm.scoreLowerbound = true; + rm.uciScore = beta; + } else if (value <= alpha) { + rm.scoreUpperbound = true; + rm.uciScore = alpha; + } + + rm.pv.resize(1); + + assert((ss + 1)->pv); + + for (Move* m = (ss + 1)->pv; *m != MOVE_NONE; ++m) + rm.pv.push_back(*m); + + // We record how often the best move has been changed in each iteration. + // This information is used for time management. In MultiPV mode, + // we must take care to only do this for the first PV line. + if (moveCount > 1 && !thisThread->pvIdx) + ++thisThread->bestMoveChanges; + } else + // All other moves but the PV, are set to the lowest value: this + // is not a problem when sorting because the sort is stable and the + // move position in the list is preserved - just the PV is pushed up. + rm.score = -VALUE_INFINITE; + } + + if (value > bestValue) { + bestValue = value; + + if (value > alpha) { + bestMove = move; + + if (PvNode && !rootNode) // Update pv even in fail-high case + update_pv(ss->pv, move, (ss + 1)->pv); + + if (value >= beta) { + ss->cutoffCnt += 1 + !ttMove; + assert(value >= beta); // Fail high + break; + } else { + // Reduce other moves if we have found at least one score improvement (~2 Elo) + if (depth > 2 && depth < 12 && beta < 13828 && value > -11369) + depth -= 2; + + assert(depth > 0); + alpha = value; // Update alpha! Always alpha < beta + } + } + } + + // If the move is worse than some previously searched move, + // remember it, to update its stats later. + if (move != bestMove && moveCount <= 32) { + if (capture) + capturesSearched[captureCount++] = move; + + else + quietsSearched[quietCount++] = move; + } } // Step 21. Check for mate and stalemate @@ -1355,21 +1232,21 @@ namespace { assert(moveCount || !ss->inCheck || excludedMove || !MoveList(pos).size()); if (!moveCount) - bestValue = excludedMove ? alpha : - ss->inCheck ? mated_in(ss->ply) - : VALUE_DRAW; + bestValue = excludedMove ? alpha : ss->inCheck ? mated_in(ss->ply) : VALUE_DRAW; // If there is a move that produces search value greater than alpha we update the stats of searched moves else if (bestMove) - update_all_stats(pos, ss, bestMove, bestValue, beta, prevSq, - quietsSearched, quietCount, capturesSearched, captureCount, depth); + update_all_stats(pos, ss, bestMove, bestValue, beta, prevSq, quietsSearched, quietCount, + capturesSearched, captureCount, depth); // Bonus for prior countermove that caused the fail low - else if (!priorCapture && prevSq != SQ_NONE) - { - int bonus = (depth > 6) + (PvNode || cutNode) + (bestValue < alpha - 653) + ((ss-1)->moveCount > 11); - update_continuation_histories(ss-1, pos.piece_on(prevSq), prevSq, stat_bonus(depth) * bonus); - thisThread->mainHistory[~us][from_to((ss-1)->currentMove)] << stat_bonus(depth) * bonus / 2; + else if (!priorCapture && prevSq != SQ_NONE) { + int bonus = (depth > 6) + (PvNode || cutNode) + (bestValue < alpha - 653) + + ((ss - 1)->moveCount > 11); + update_continuation_histories(ss - 1, pos.piece_on(prevSq), prevSq, + stat_bonus(depth) * bonus); + thisThread->mainHistory[~us][from_to((ss - 1)->currentMove)] + << stat_bonus(depth) * bonus / 2; } if (PvNode) @@ -1378,26 +1255,27 @@ namespace { // If no good move is found and the previous position was ttPv, then the previous // opponent move is probably good and the new position is added to the search tree. (~7 Elo) if (bestValue <= alpha) - ss->ttPv = ss->ttPv || ((ss-1)->ttPv && depth > 3); + ss->ttPv = ss->ttPv || ((ss - 1)->ttPv && depth > 3); // Write gathered information in transposition table if (!excludedMove && !(rootNode && thisThread->pvIdx)) tte->save(posKey, value_to_tt(bestValue, ss->ply), ss->ttPv, - bestValue >= beta ? BOUND_LOWER : - PvNode && bestMove ? BOUND_EXACT : BOUND_UPPER, + bestValue >= beta ? BOUND_LOWER + : PvNode && bestMove ? BOUND_EXACT + : BOUND_UPPER, depth, bestMove, ss->staticEval); assert(bestValue > -VALUE_INFINITE && bestValue < VALUE_INFINITE); return bestValue; - } +} - // qsearch() is the quiescence search function, which is called by the main search - // function with zero depth, or recursively with further decreasing depth per call. - // (~155 Elo) - template - Value qsearch(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth) { +// qsearch() is the quiescence search function, which is called by the main search +// function with zero depth, or recursively with further decreasing depth per call. +// (~155 Elo) +template +Value qsearch(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth) { static_assert(nodeType != Root); constexpr bool PvNode = nodeType == PV; @@ -1408,42 +1286,38 @@ namespace { // Check if we have an upcoming move that draws by repetition, or // if the opponent had an alternative move earlier to this position. - if ( alpha < VALUE_DRAW - && pos.has_game_cycle(ss->ply)) - { + if (alpha < VALUE_DRAW && pos.has_game_cycle(ss->ply)) { alpha = value_draw(pos.this_thread()); if (alpha >= beta) return alpha; } - Move pv[MAX_PLY+1]; + Move pv[MAX_PLY + 1]; StateInfo st; ASSERT_ALIGNED(&st, Eval::NNUE::CacheLineSize); TTEntry* tte; - Key posKey; - Move ttMove, move, bestMove; - Depth ttDepth; - Value bestValue, value, ttValue, futilityValue, futilityBase; - bool pvHit, givesCheck, capture; - int moveCount; - Color us = pos.side_to_move(); + Key posKey; + Move ttMove, move, bestMove; + Depth ttDepth; + Value bestValue, value, ttValue, futilityValue, futilityBase; + bool pvHit, givesCheck, capture; + int moveCount; + Color us = pos.side_to_move(); // Step 1. Initialize node - if (PvNode) - { - (ss+1)->pv = pv; - ss->pv[0] = MOVE_NONE; + if (PvNode) { + (ss + 1)->pv = pv; + ss->pv[0] = MOVE_NONE; } Thread* thisThread = pos.this_thread(); - bestMove = MOVE_NONE; - ss->inCheck = pos.checkers(); - moveCount = 0; + bestMove = MOVE_NONE; + ss->inCheck = pos.checkers(); + moveCount = 0; // Step 2. Check for an immediate draw or maximum ply reached - if ( pos.is_draw(ss->ply) - || ss->ply >= MAX_PLY) + if (pos.is_draw(ss->ply) || ss->ply >= MAX_PLY) return (ss->ply >= MAX_PLY && !ss->inCheck) ? evaluate(pos) : VALUE_DRAW; assert(0 <= ss->ply && ss->ply < MAX_PLY); @@ -1451,50 +1325,44 @@ namespace { // Decide whether or not to include checks: this fixes also the type of // TT entry depth that we are going to use. Note that in qsearch we use // only two types of depth in TT: DEPTH_QS_CHECKS or DEPTH_QS_NO_CHECKS. - ttDepth = ss->inCheck || depth >= DEPTH_QS_CHECKS ? DEPTH_QS_CHECKS - : DEPTH_QS_NO_CHECKS; + ttDepth = ss->inCheck || depth >= DEPTH_QS_CHECKS ? DEPTH_QS_CHECKS : DEPTH_QS_NO_CHECKS; // Step 3. Transposition table lookup - posKey = pos.key(); - tte = TT.probe(posKey, ss->ttHit); + posKey = pos.key(); + tte = TT.probe(posKey, ss->ttHit); ttValue = ss->ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE; - ttMove = ss->ttHit ? tte->move() : MOVE_NONE; - pvHit = ss->ttHit && tte->is_pv(); + ttMove = ss->ttHit ? tte->move() : MOVE_NONE; + pvHit = ss->ttHit && tte->is_pv(); // At non-PV nodes we check for an early TT cutoff - if ( !PvNode - && tte->depth() >= ttDepth - && ttValue != VALUE_NONE // Only in case of TT access race or if !ttHit + if (!PvNode && tte->depth() >= ttDepth + && ttValue != VALUE_NONE // Only in case of TT access race or if !ttHit && (tte->bound() & (ttValue >= beta ? BOUND_LOWER : BOUND_UPPER))) return ttValue; // Step 4. Static evaluation of the position if (ss->inCheck) bestValue = futilityBase = -VALUE_INFINITE; - else - { - if (ss->ttHit) - { + else { + if (ss->ttHit) { // Never assume anything about values stored in TT if ((ss->staticEval = bestValue = tte->eval()) == VALUE_NONE) ss->staticEval = bestValue = evaluate(pos); // ttValue can be used as a better position evaluation (~13 Elo) - if ( ttValue != VALUE_NONE + if (ttValue != VALUE_NONE && (tte->bound() & (ttValue > bestValue ? BOUND_LOWER : BOUND_UPPER))) bestValue = ttValue; - } - else + } else // In case of null move search use previous static eval with a different sign - ss->staticEval = bestValue = (ss-1)->currentMove != MOVE_NULL ? evaluate(pos) - : -(ss-1)->staticEval; + ss->staticEval = bestValue = + (ss - 1)->currentMove != MOVE_NULL ? evaluate(pos) : -(ss - 1)->staticEval; // Stand pat. Return immediately if static value is at least beta - if (bestValue >= beta) - { + if (bestValue >= beta) { if (!ss->ttHit) - tte->save(posKey, value_to_tt(bestValue, ss->ply), false, BOUND_LOWER, - DEPTH_NONE, MOVE_NONE, ss->staticEval); + tte->save(posKey, value_to_tt(bestValue, ss->ply), false, BOUND_LOWER, DEPTH_NONE, + MOVE_NONE, ss->staticEval); return bestValue; } @@ -1505,24 +1373,22 @@ namespace { futilityBase = std::min(ss->staticEval, bestValue) + 200; } - const PieceToHistory* contHist[] = {(ss-1)->continuationHistory, (ss-2)->continuationHistory}; + const PieceToHistory* contHist[] = {(ss - 1)->continuationHistory, + (ss - 2)->continuationHistory}; // Initialize a MovePicker object for the current position, and prepare // to search the moves. Because the depth is <= 0 here, only captures, // queen promotions, and other checks (only if depth >= DEPTH_QS_CHECKS) // will be generated. - Square prevSq = is_ok((ss-1)->currentMove) ? to_sq((ss-1)->currentMove) : SQ_NONE; - MovePicker mp(pos, ttMove, depth, &thisThread->mainHistory, - &thisThread->captureHistory, - contHist, - prevSq); + Square prevSq = is_ok((ss - 1)->currentMove) ? to_sq((ss - 1)->currentMove) : SQ_NONE; + MovePicker mp(pos, ttMove, depth, &thisThread->mainHistory, &thisThread->captureHistory, + contHist, prevSq); int quietCheckEvasions = 0; // Step 5. Loop through all pseudo-legal moves until no moves remain // or a beta cutoff occurs. - while ((move = mp.next_move()) != MOVE_NONE) - { + while ((move = mp.next_move()) != MOVE_NONE) { assert(is_ok(move)); // Check for legality @@ -1530,20 +1396,15 @@ namespace { continue; givesCheck = pos.gives_check(move); - capture = pos.capture_stage(move); + capture = pos.capture_stage(move); moveCount++; // Step 6. Pruning - if ( bestValue > VALUE_TB_LOSS_IN_MAX_PLY - && pos.non_pawn_material(us)) - { + if (bestValue > VALUE_TB_LOSS_IN_MAX_PLY && pos.non_pawn_material(us)) { // Futility pruning and moveCount pruning (~10 Elo) - if ( !givesCheck - && to_sq(move) != prevSq - && futilityBase > VALUE_TB_LOSS_IN_MAX_PLY - && type_of(move) != PROMOTION) - { + if (!givesCheck && to_sq(move) != prevSq && futilityBase > VALUE_TB_LOSS_IN_MAX_PLY + && type_of(move) != PROMOTION) { if (moveCount > 2) continue; @@ -1551,25 +1412,21 @@ namespace { // If static eval + value of piece we are going to capture is much lower // than alpha we can prune this move. - if (futilityValue <= alpha) - { + if (futilityValue <= alpha) { bestValue = std::max(bestValue, futilityValue); continue; } // If static eval is much lower than alpha and move is not winning material // we can prune this move. - if ( futilityBase <= alpha - && !pos.see_ge(move, VALUE_ZERO + 1)) - { + if (futilityBase <= alpha && !pos.see_ge(move, VALUE_ZERO + 1)) { bestValue = std::max(bestValue, futilityBase); continue; } // If static exchange evaluation is much worse than what is needed to not // fall below alpha we can prune this move. - if (futilityBase > alpha && !pos.see_ge(move, (alpha - futilityBase) * 4)) - { + if (futilityBase > alpha && !pos.see_ge(move, (alpha - futilityBase) * 4)) { bestValue = alpha; continue; } @@ -1582,8 +1439,7 @@ namespace { break; // Continuation history based pruning (~3 Elo) - if ( !capture - && (*contHist[0])[pos.moved_piece(move)][to_sq(move)] < 0 + if (!capture && (*contHist[0])[pos.moved_piece(move)][to_sq(move)] < 0 && (*contHist[1])[pos.moved_piece(move)][to_sq(move)] < 0) continue; @@ -1597,36 +1453,33 @@ namespace { // Update the current move ss->currentMove = move; - ss->continuationHistory = &thisThread->continuationHistory[ss->inCheck] - [capture] - [pos.moved_piece(move)] - [to_sq(move)]; + ss->continuationHistory = + &thisThread + ->continuationHistory[ss->inCheck][capture][pos.moved_piece(move)][to_sq(move)]; quietCheckEvasions += !capture && ss->inCheck; // Step 7. Make and search the move pos.do_move(move, st, givesCheck); - value = -qsearch(pos, ss+1, -beta, -alpha, depth - 1); + value = -qsearch(pos, ss + 1, -beta, -alpha, depth - 1); pos.undo_move(move); assert(value > -VALUE_INFINITE && value < VALUE_INFINITE); // Step 8. Check for a new best move - if (value > bestValue) - { + if (value > bestValue) { bestValue = value; - if (value > alpha) - { + if (value > alpha) { bestMove = move; - if (PvNode) // Update pv even in fail-high case - update_pv(ss->pv, move, (ss+1)->pv); + if (PvNode) // Update pv even in fail-high case + update_pv(ss->pv, move, (ss + 1)->pv); - if (value < beta) // Update alpha here! + if (value < beta) // Update alpha here! alpha = value; else - break; // Fail high + break; // Fail high } } } @@ -1634,44 +1487,41 @@ namespace { // Step 9. Check for mate // All legal moves have been searched. A special case: if we're in check // and no legal moves were found, it is checkmate. - if (ss->inCheck && bestValue == -VALUE_INFINITE) - { + if (ss->inCheck && bestValue == -VALUE_INFINITE) { assert(!MoveList(pos).size()); - return mated_in(ss->ply); // Plies to mate from the root + return mated_in(ss->ply); // Plies to mate from the root } // Save gathered info in transposition table tte->save(posKey, value_to_tt(bestValue, ss->ply), pvHit, - bestValue >= beta ? BOUND_LOWER : BOUND_UPPER, - ttDepth, bestMove, ss->staticEval); + bestValue >= beta ? BOUND_LOWER : BOUND_UPPER, ttDepth, bestMove, ss->staticEval); assert(bestValue > -VALUE_INFINITE && bestValue < VALUE_INFINITE); return bestValue; - } +} - // value_to_tt() adjusts a mate or TB score from "plies to mate from the root" - // to "plies to mate from the current position". Standard scores are unchanged. - // The function is called before storing a value in the transposition table. +// value_to_tt() adjusts a mate or TB score from "plies to mate from the root" +// to "plies to mate from the current position". Standard scores are unchanged. +// The function is called before storing a value in the transposition table. - Value value_to_tt(Value v, int ply) { +Value value_to_tt(Value v, int ply) { assert(v != VALUE_NONE); - return v >= VALUE_TB_WIN_IN_MAX_PLY ? v + ply - : v <= VALUE_TB_LOSS_IN_MAX_PLY ? v - ply : v; - } + return v >= VALUE_TB_WIN_IN_MAX_PLY ? v + ply : v <= VALUE_TB_LOSS_IN_MAX_PLY ? v - ply : v; +} - // value_from_tt() is the inverse of value_to_tt(): it adjusts a mate or TB score - // from the transposition table (which refers to the plies to mate/be mated from - // current position) to "plies to mate/be mated (TB win/loss) from the root". - // However, to avoid potentially false mate scores related to the 50 moves rule - // and the graph history interaction problem, we return an optimal TB score instead. +// value_from_tt() is the inverse of value_to_tt(): it adjusts a mate or TB score +// from the transposition table (which refers to the plies to mate/be mated from +// current position) to "plies to mate/be mated (TB win/loss) from the root". +// However, to avoid potentially false mate scores related to the 50 moves rule +// and the graph history interaction problem, we return an optimal TB score instead. - Value value_from_tt(Value v, int ply, int r50c) { +Value value_from_tt(Value v, int ply, int r50c) { if (v == VALUE_NONE) return VALUE_NONE; @@ -1679,63 +1529,69 @@ namespace { if (v >= VALUE_TB_WIN_IN_MAX_PLY) // TB win or better { if (v >= VALUE_MATE_IN_MAX_PLY && VALUE_MATE - v > 99 - r50c) - return VALUE_MATE_IN_MAX_PLY - 1; // do not return a potentially false mate score + return VALUE_MATE_IN_MAX_PLY - 1; // do not return a potentially false mate score return v - ply; } - if (v <= VALUE_TB_LOSS_IN_MAX_PLY) // TB loss or worse + if (v <= VALUE_TB_LOSS_IN_MAX_PLY) // TB loss or worse { if (v <= VALUE_MATED_IN_MAX_PLY && VALUE_MATE + v > 99 - r50c) - return VALUE_MATED_IN_MAX_PLY + 1; // do not return a potentially false mate score + return VALUE_MATED_IN_MAX_PLY + 1; // do not return a potentially false mate score return v + ply; } return v; - } +} - // update_pv() adds current move and appends child pv[] +// update_pv() adds current move and appends child pv[] - void update_pv(Move* pv, Move move, const Move* childPv) { +void update_pv(Move* pv, Move move, const Move* childPv) { - for (*pv++ = move; childPv && *childPv != MOVE_NONE; ) + for (*pv++ = move; childPv && *childPv != MOVE_NONE;) *pv++ = *childPv++; *pv = MOVE_NONE; - } +} - // update_all_stats() updates stats at the end of search() when a bestMove is found +// update_all_stats() updates stats at the end of search() when a bestMove is found - void update_all_stats(const Position& pos, Stack* ss, Move bestMove, Value bestValue, Value beta, Square prevSq, - Move* quietsSearched, int quietCount, Move* capturesSearched, int captureCount, Depth depth) { +void update_all_stats(const Position& pos, + Stack* ss, + Move bestMove, + Value bestValue, + Value beta, + Square prevSq, + Move* quietsSearched, + int quietCount, + Move* capturesSearched, + int captureCount, + Depth depth) { - Color us = pos.side_to_move(); - Thread* thisThread = pos.this_thread(); + Color us = pos.side_to_move(); + Thread* thisThread = pos.this_thread(); CapturePieceToHistory& captureHistory = thisThread->captureHistory; - Piece moved_piece = pos.moved_piece(bestMove); - PieceType captured; + Piece moved_piece = pos.moved_piece(bestMove); + PieceType captured; int quietMoveBonus = stat_bonus(depth + 1); - if (!pos.capture_stage(bestMove)) - { - int bestMoveBonus = bestValue > beta + 168 ? quietMoveBonus // larger bonus - : stat_bonus(depth); // smaller bonus + if (!pos.capture_stage(bestMove)) { + int bestMoveBonus = bestValue > beta + 168 ? quietMoveBonus // larger bonus + : stat_bonus(depth); // smaller bonus // Increase stats for the best move in case it was a quiet move update_quiet_stats(pos, ss, bestMove, bestMoveBonus); // Decrease stats for all non-best quiet moves - for (int i = 0; i < quietCount; ++i) - { + for (int i = 0; i < quietCount; ++i) { thisThread->mainHistory[us][from_to(quietsSearched[i])] << -bestMoveBonus; - update_continuation_histories(ss, pos.moved_piece(quietsSearched[i]), to_sq(quietsSearched[i]), -bestMoveBonus); + update_continuation_histories(ss, pos.moved_piece(quietsSearched[i]), + to_sq(quietsSearched[i]), -bestMoveBonus); } - } - else - { + } else { // Increase stats for the best move in case it was a capture move captured = type_of(pos.piece_on(to_sq(bestMove))); captureHistory[moved_piece][to_sq(bestMove)][captured] << quietMoveBonus; @@ -1743,95 +1599,91 @@ namespace { // Extra penalty for a quiet early move that was not a TT move or // main killer move in previous ply when it gets refuted. - if ( prevSq != SQ_NONE - && ((ss-1)->moveCount == 1 + (ss-1)->ttHit || ((ss-1)->currentMove == (ss-1)->killers[0])) + if (prevSq != SQ_NONE + && ((ss - 1)->moveCount == 1 + (ss - 1)->ttHit + || ((ss - 1)->currentMove == (ss - 1)->killers[0])) && !pos.captured_piece()) - update_continuation_histories(ss-1, pos.piece_on(prevSq), prevSq, -quietMoveBonus); + update_continuation_histories(ss - 1, pos.piece_on(prevSq), prevSq, -quietMoveBonus); // Decrease stats for all non-best capture moves - for (int i = 0; i < captureCount; ++i) - { + for (int i = 0; i < captureCount; ++i) { moved_piece = pos.moved_piece(capturesSearched[i]); - captured = type_of(pos.piece_on(to_sq(capturesSearched[i]))); + captured = type_of(pos.piece_on(to_sq(capturesSearched[i]))); captureHistory[moved_piece][to_sq(capturesSearched[i])][captured] << -quietMoveBonus; } - } +} - // update_continuation_histories() updates histories of the move pairs formed - // by moves at ply -1, -2, -4, and -6 with current move. +// update_continuation_histories() updates histories of the move pairs formed +// by moves at ply -1, -2, -4, and -6 with current move. - void update_continuation_histories(Stack* ss, Piece pc, Square to, int bonus) { +void update_continuation_histories(Stack* ss, Piece pc, Square to, int bonus) { - for (int i : {1, 2, 3, 4, 6}) - { + for (int i : {1, 2, 3, 4, 6}) { // Only update the first 2 continuation histories if we are in check if (ss->inCheck && i > 2) break; - if (is_ok((ss-i)->currentMove)) - (*(ss-i)->continuationHistory)[pc][to] << bonus / (1 + 3 * (i == 3)); + if (is_ok((ss - i)->currentMove)) + (*(ss - i)->continuationHistory)[pc][to] << bonus / (1 + 3 * (i == 3)); } - } +} - // update_quiet_stats() updates move sorting heuristics +// update_quiet_stats() updates move sorting heuristics - void update_quiet_stats(const Position& pos, Stack* ss, Move move, int bonus) { +void update_quiet_stats(const Position& pos, Stack* ss, Move move, int bonus) { // Update killers - if (ss->killers[0] != move) - { + if (ss->killers[0] != move) { ss->killers[1] = ss->killers[0]; ss->killers[0] = move; } - Color us = pos.side_to_move(); + Color us = pos.side_to_move(); Thread* thisThread = pos.this_thread(); thisThread->mainHistory[us][from_to(move)] << bonus; update_continuation_histories(ss, pos.moved_piece(move), to_sq(move), bonus); // Update countermove history - if (is_ok((ss-1)->currentMove)) - { - Square prevSq = to_sq((ss-1)->currentMove); + if (is_ok((ss - 1)->currentMove)) { + Square prevSq = to_sq((ss - 1)->currentMove); thisThread->counterMoves[pos.piece_on(prevSq)][prevSq] = move; } - } +} - // When playing with strength handicap, choose the best move among a set of RootMoves - // using a statistical rule dependent on 'level'. Idea by Heinz van Saanen. +// When playing with strength handicap, choose the best move among a set of RootMoves +// using a statistical rule dependent on 'level'. Idea by Heinz van Saanen. - Move Skill::pick_best(size_t multiPV) { +Move Skill::pick_best(size_t multiPV) { const RootMoves& rootMoves = Threads.main()->rootMoves; - static PRNG rng(now()); // PRNG sequence should be non-deterministic + static PRNG rng(now()); // PRNG sequence should be non-deterministic // RootMoves are already sorted by score in descending order - Value topScore = rootMoves[0].score; - int delta = std::min(topScore - rootMoves[multiPV - 1].score, PawnValue); - int maxScore = -VALUE_INFINITE; + Value topScore = rootMoves[0].score; + int delta = std::min(topScore - rootMoves[multiPV - 1].score, PawnValue); + int maxScore = -VALUE_INFINITE; double weakness = 120 - 2 * level; // Choose best move. For each move score we add two terms, both dependent on // weakness. One is deterministic and bigger for weaker levels, and one is // random. Then we choose the move with the resulting highest score. - for (size_t i = 0; i < multiPV; ++i) - { + for (size_t i = 0; i < multiPV; ++i) { // This is our magic formula - int push = int(( weakness * int(topScore - rootMoves[i].score) - + delta * (rng.rand() % int(weakness))) / 128); + int push = int((weakness * int(topScore - rootMoves[i].score) + + delta * (rng.rand() % int(weakness))) + / 128); - if (rootMoves[i].score + push >= maxScore) - { + if (rootMoves[i].score + push >= maxScore) { maxScore = rootMoves[i].score + push; - best = rootMoves[i].pv[0]; + best = rootMoves[i].pv[0]; } } return best; - } +} -} // namespace +} // namespace // MainThread::check_time() is used to print debug info and, more importantly, @@ -1839,31 +1691,30 @@ namespace { void MainThread::check_time() { - if (--callsCnt > 0) - return; + if (--callsCnt > 0) + return; - // When using nodes, ensure checking rate is not lower than 0.1% of nodes - callsCnt = Limits.nodes ? std::min(512, int(Limits.nodes / 1024)) : 512; + // When using nodes, ensure checking rate is not lower than 0.1% of nodes + callsCnt = Limits.nodes ? std::min(512, int(Limits.nodes / 1024)) : 512; - static TimePoint lastInfoTime = now(); + static TimePoint lastInfoTime = now(); - TimePoint elapsed = Time.elapsed(); - TimePoint tick = Limits.startTime + elapsed; + TimePoint elapsed = Time.elapsed(); + TimePoint tick = Limits.startTime + elapsed; - if (tick - lastInfoTime >= 1000) - { - lastInfoTime = tick; - dbg_print(); - } + if (tick - lastInfoTime >= 1000) { + lastInfoTime = tick; + dbg_print(); + } - // We should not stop pondering until told so by the GUI - if (ponder) - return; + // We should not stop pondering until told so by the GUI + if (ponder) + return; - if ( (Limits.use_time_management() && (elapsed > Time.maximum() || stopOnPonderhit)) - || (Limits.movetime && elapsed >= Limits.movetime) - || (Limits.nodes && Threads.nodes_searched() >= uint64_t(Limits.nodes))) - Threads.stop = true; + if ((Limits.use_time_management() && (elapsed > Time.maximum() || stopOnPonderhit)) + || (Limits.movetime && elapsed >= Limits.movetime) + || (Limits.nodes && Threads.nodes_searched() >= uint64_t(Limits.nodes))) + Threads.stop = true; } @@ -1872,57 +1723,52 @@ void MainThread::check_time() { string UCI::pv(const Position& pos, Depth depth) { - std::stringstream ss; - TimePoint elapsed = Time.elapsed() + 1; - const RootMoves& rootMoves = pos.this_thread()->rootMoves; - size_t pvIdx = pos.this_thread()->pvIdx; - size_t multiPV = std::min(size_t(Options["MultiPV"]), rootMoves.size()); - uint64_t nodesSearched = Threads.nodes_searched(); - uint64_t tbHits = Threads.tb_hits() + (TB::RootInTB ? rootMoves.size() : 0); + std::stringstream ss; + TimePoint elapsed = Time.elapsed() + 1; + const RootMoves& rootMoves = pos.this_thread()->rootMoves; + size_t pvIdx = pos.this_thread()->pvIdx; + size_t multiPV = std::min(size_t(Options["MultiPV"]), rootMoves.size()); + uint64_t nodesSearched = Threads.nodes_searched(); + uint64_t tbHits = Threads.tb_hits() + (TB::RootInTB ? rootMoves.size() : 0); - for (size_t i = 0; i < multiPV; ++i) - { - bool updated = rootMoves[i].score != -VALUE_INFINITE; + for (size_t i = 0; i < multiPV; ++i) { + bool updated = rootMoves[i].score != -VALUE_INFINITE; - if (depth == 1 && !updated && i > 0) - continue; + if (depth == 1 && !updated && i > 0) + continue; - Depth d = updated ? depth : std::max(1, depth - 1); - Value v = updated ? rootMoves[i].uciScore : rootMoves[i].previousScore; + Depth d = updated ? depth : std::max(1, depth - 1); + Value v = updated ? rootMoves[i].uciScore : rootMoves[i].previousScore; - if (v == -VALUE_INFINITE) - v = VALUE_ZERO; + if (v == -VALUE_INFINITE) + v = VALUE_ZERO; - bool tb = TB::RootInTB && abs(v) < VALUE_MATE_IN_MAX_PLY; - v = tb ? rootMoves[i].tbScore : v; + bool tb = TB::RootInTB && abs(v) < VALUE_MATE_IN_MAX_PLY; + v = tb ? rootMoves[i].tbScore : v; - if (ss.rdbuf()->in_avail()) // Not at first line - ss << "\n"; + if (ss.rdbuf()->in_avail()) // Not at first line + ss << "\n"; - ss << "info" - << " depth " << d - << " seldepth " << rootMoves[i].selDepth - << " multipv " << i + 1 - << " score " << UCI::value(v); + ss << "info" + << " depth " << d << " seldepth " << rootMoves[i].selDepth << " multipv " << i + 1 + << " score " << UCI::value(v); - if (Options["UCI_ShowWDL"]) - ss << UCI::wdl(v, pos.game_ply()); + if (Options["UCI_ShowWDL"]) + ss << UCI::wdl(v, pos.game_ply()); - if (i == pvIdx && !tb && updated) // tablebase- and previous-scores are exact - ss << (rootMoves[i].scoreLowerbound ? " lowerbound" : (rootMoves[i].scoreUpperbound ? " upperbound" : "")); + if (i == pvIdx && !tb && updated) // tablebase- and previous-scores are exact + ss << (rootMoves[i].scoreLowerbound + ? " lowerbound" + : (rootMoves[i].scoreUpperbound ? " upperbound" : "")); - ss << " nodes " << nodesSearched - << " nps " << nodesSearched * 1000 / elapsed - << " hashfull " << TT.hashfull() - << " tbhits " << tbHits - << " time " << elapsed - << " pv"; + ss << " nodes " << nodesSearched << " nps " << nodesSearched * 1000 / elapsed + << " hashfull " << TT.hashfull() << " tbhits " << tbHits << " time " << elapsed << " pv"; - for (Move m : rootMoves[i].pv) - ss << " " << UCI::move(m, pos.is_chess960()); - } + for (Move m : rootMoves[i].pv) + ss << " " << UCI::move(m, pos.is_chess960()); + } - return ss.str(); + return ss.str(); } @@ -1946,9 +1792,8 @@ bool RootMove::extract_ponder_from_tt(Position& pos) { pos.do_move(pv[0], st); TTEntry* tte = TT.probe(pos.key(), ttHit); - if (ttHit) - { - Move m = tte->move(); // Local copy to be SMP safe + if (ttHit) { + Move m = tte->move(); // Local copy to be SMP safe if (MoveList(pos).contains(m)) pv.push_back(m); } @@ -1959,49 +1804,43 @@ bool RootMove::extract_ponder_from_tt(Position& pos) { void Tablebases::rank_root_moves(Position& pos, Search::RootMoves& rootMoves) { - RootInTB = false; - UseRule50 = bool(Options["Syzygy50MoveRule"]); - ProbeDepth = int(Options["SyzygyProbeDepth"]); - Cardinality = int(Options["SyzygyProbeLimit"]); + RootInTB = false; + UseRule50 = bool(Options["Syzygy50MoveRule"]); + ProbeDepth = int(Options["SyzygyProbeDepth"]); + Cardinality = int(Options["SyzygyProbeLimit"]); bool dtz_available = true; // Tables with fewer pieces than SyzygyProbeLimit are searched with // ProbeDepth == DEPTH_ZERO - if (Cardinality > MaxCardinality) - { + if (Cardinality > MaxCardinality) { Cardinality = MaxCardinality; - ProbeDepth = 0; + ProbeDepth = 0; } - if (Cardinality >= popcount(pos.pieces()) && !pos.can_castle(ANY_CASTLING)) - { + if (Cardinality >= popcount(pos.pieces()) && !pos.can_castle(ANY_CASTLING)) { // Rank moves using DTZ tables RootInTB = root_probe(pos, rootMoves); - if (!RootInTB) - { + if (!RootInTB) { // DTZ tables are missing; try to rank moves using WDL tables dtz_available = false; - RootInTB = root_probe_wdl(pos, rootMoves); + RootInTB = root_probe_wdl(pos, rootMoves); } } - if (RootInTB) - { + if (RootInTB) { // Sort moves according to TB rank std::stable_sort(rootMoves.begin(), rootMoves.end(), - [](const RootMove &a, const RootMove &b) { return a.tbRank > b.tbRank; } ); + [](const RootMove& a, const RootMove& b) { return a.tbRank > b.tbRank; }); // Probe during search only if DTZ is not available and we are winning if (dtz_available || rootMoves[0].tbScore <= VALUE_DRAW) Cardinality = 0; - } - else - { + } else { // Clean up if root_probe() and root_probe_wdl() have failed for (auto& m : rootMoves) m.tbRank = 0; } } -} // namespace Stockfish +} // namespace Stockfish diff --git a/src/search.h b/src/search.h index c434ba752d6..37cd5e5a686 100644 --- a/src/search.h +++ b/src/search.h @@ -38,20 +38,20 @@ namespace Search { // its own array of Stack objects, indexed by the current ply. struct Stack { - Move* pv; - PieceToHistory* continuationHistory; - int ply; - Move currentMove; - Move excludedMove; - Move killers[2]; - Value staticEval; - int statScore; - int moveCount; - bool inCheck; - bool ttPv; - bool ttHit; - int doubleExtensions; - int cutoffCnt; + Move* pv; + PieceToHistory* continuationHistory; + int ply; + Move currentMove; + Move excludedMove; + Move killers[2]; + Value staticEval; + int statScore; + int moveCount; + bool inCheck; + bool ttPv; + bool ttHit; + int doubleExtensions; + int cutoffCnt; }; @@ -61,24 +61,24 @@ struct Stack { struct RootMove { - explicit RootMove(Move m) : pv(1, m) {} - bool extract_ponder_from_tt(Position& pos); - bool operator==(const Move& m) const { return pv[0] == m; } - bool operator<(const RootMove& m) const { // Sort in descending order - return m.score != score ? m.score < score - : m.previousScore < previousScore; - } - - Value score = -VALUE_INFINITE; - Value previousScore = -VALUE_INFINITE; - Value averageScore = -VALUE_INFINITE; - Value uciScore = -VALUE_INFINITE; - bool scoreLowerbound = false; - bool scoreUpperbound = false; - int selDepth = 0; - int tbRank = 0; - Value tbScore; - std::vector pv; + explicit RootMove(Move m) : + pv(1, m) {} + bool extract_ponder_from_tt(Position& pos); + bool operator==(const Move& m) const { return pv[0] == m; } + bool operator<(const RootMove& m) const { // Sort in descending order + return m.score != score ? m.score < score : m.previousScore < previousScore; + } + + Value score = -VALUE_INFINITE; + Value previousScore = -VALUE_INFINITE; + Value averageScore = -VALUE_INFINITE; + Value uciScore = -VALUE_INFINITE; + bool scoreLowerbound = false; + bool scoreUpperbound = false; + int selDepth = 0; + int tbRank = 0; + Value tbScore; + std::vector pv; }; using RootMoves = std::vector; @@ -89,20 +89,18 @@ using RootMoves = std::vector; struct LimitsType { - LimitsType() { // Init explicitly due to broken value-initialization of non POD in MSVC - time[WHITE] = time[BLACK] = inc[WHITE] = inc[BLACK] = npmsec = movetime = TimePoint(0); - movestogo = depth = mate = perft = infinite = 0; - nodes = 0; - } + LimitsType() { // Init explicitly due to broken value-initialization of non POD in MSVC + time[WHITE] = time[BLACK] = inc[WHITE] = inc[BLACK] = npmsec = movetime = TimePoint(0); + movestogo = depth = mate = perft = infinite = 0; + nodes = 0; + } - bool use_time_management() const { - return time[WHITE] || time[BLACK]; - } + bool use_time_management() const { return time[WHITE] || time[BLACK]; } - std::vector searchmoves; - TimePoint time[COLOR_NB], inc[COLOR_NB], npmsec, movetime, startTime; - int movestogo, depth, mate, perft, infinite; - int64_t nodes; + std::vector searchmoves; + TimePoint time[COLOR_NB], inc[COLOR_NB], npmsec, movetime, startTime; + int movestogo, depth, mate, perft, infinite; + int64_t nodes; }; extern LimitsType Limits; @@ -110,8 +108,8 @@ extern LimitsType Limits; void init(); void clear(); -} // namespace Search +} // namespace Search -} // namespace Stockfish +} // namespace Stockfish -#endif // #ifndef SEARCH_H_INCLUDED +#endif // #ifndef SEARCH_H_INCLUDED diff --git a/src/syzygy/tbprobe.cpp b/src/syzygy/tbprobe.cpp index 4114db605d2..2b7a6cf31bd 100644 --- a/src/syzygy/tbprobe.cpp +++ b/src/syzygy/tbprobe.cpp @@ -45,15 +45,15 @@ #include "../uci.h" #ifndef _WIN32 -#include -#include -#include + #include + #include + #include #else -#define WIN32_LEAN_AND_MEAN -#ifndef NOMINMAX -# define NOMINMAX // Disable macros min() and max() -#endif -#include + #define WIN32_LEAN_AND_MEAN + #ifndef NOMINMAX + #define NOMINMAX // Disable macros min() and max() + #endif + #include #endif using namespace Stockfish::Tablebases; @@ -64,60 +64,69 @@ namespace Stockfish { namespace { -constexpr int TBPIECES = 7; // Max number of supported pieces -constexpr int MAX_DTZ = 1 << 18; // Max DTZ supported, large enough to deal with the syzygy TB limit. +constexpr int TBPIECES = 7; // Max number of supported pieces +constexpr int MAX_DTZ = + 1 << 18; // Max DTZ supported, large enough to deal with the syzygy TB limit. -enum { BigEndian, LittleEndian }; -enum TBType { WDL, DTZ }; // Used as template parameter +enum { + BigEndian, + LittleEndian +}; +enum TBType { + WDL, + DTZ +}; // Used as template parameter // Each table has a set of flags: all of them refer to DTZ tables, the last one to WDL tables -enum TBFlag { STM = 1, Mapped = 2, WinPlies = 4, LossPlies = 8, Wide = 16, SingleValue = 128 }; +enum TBFlag { + STM = 1, + Mapped = 2, + WinPlies = 4, + LossPlies = 8, + Wide = 16, + SingleValue = 128 +}; inline WDLScore operator-(WDLScore d) { return WDLScore(-int(d)); } -inline Square operator^(Square s, int i) { return Square(int(s) ^ i); } +inline Square operator^(Square s, int i) { return Square(int(s) ^ i); } constexpr std::string_view PieceToChar = " PNBRQK pnbrqk"; int MapPawns[SQUARE_NB]; int MapB1H1H7[SQUARE_NB]; int MapA1D1D4[SQUARE_NB]; -int MapKK[10][SQUARE_NB]; // [MapA1D1D4][SQUARE_NB] +int MapKK[10][SQUARE_NB]; // [MapA1D1D4][SQUARE_NB] -int Binomial[6][SQUARE_NB]; // [k][n] k elements from a set of n elements -int LeadPawnIdx[6][SQUARE_NB]; // [leadPawnsCnt][SQUARE_NB] -int LeadPawnsSize[6][4]; // [leadPawnsCnt][FILE_A..FILE_D] +int Binomial[6][SQUARE_NB]; // [k][n] k elements from a set of n elements +int LeadPawnIdx[6][SQUARE_NB]; // [leadPawnsCnt][SQUARE_NB] +int LeadPawnsSize[6][4]; // [leadPawnsCnt][FILE_A..FILE_D] // Comparison function to sort leading pawns in ascending MapPawns[] order bool pawns_comp(Square i, Square j) { return MapPawns[i] < MapPawns[j]; } -int off_A1H8(Square sq) { return int(rank_of(sq)) - file_of(sq); } - -constexpr Value WDL_to_value[] = { - -VALUE_MATE + MAX_PLY + 1, - VALUE_DRAW - 2, - VALUE_DRAW, - VALUE_DRAW + 2, - VALUE_MATE - MAX_PLY - 1 -}; +int off_A1H8(Square sq) { return int(rank_of(sq)) - file_of(sq); } + +constexpr Value WDL_to_value[] = {-VALUE_MATE + MAX_PLY + 1, VALUE_DRAW - 2, VALUE_DRAW, + VALUE_DRAW + 2, VALUE_MATE - MAX_PLY - 1}; template -inline void swap_endian(T& x) -{ +inline void swap_endian(T& x) { static_assert(std::is_unsigned_v, "Argument of swap_endian not unsigned"); - uint8_t tmp, *c = (uint8_t*)&x; + uint8_t tmp, *c = (uint8_t*) &x; for (int i = 0; i < Half; ++i) tmp = c[i], c[i] = c[End - i], c[End - i] = tmp; } -template<> inline void swap_endian(uint8_t&) {} +template<> +inline void swap_endian(uint8_t&) {} -template T number(void* addr) -{ +template +T number(void* addr) { T v; - if (uintptr_t(addr) & (alignof(T) - 1)) // Unaligned pointer (very rare) + if (uintptr_t(addr) & (alignof(T) - 1)) // Unaligned pointer (very rare) std::memcpy(&v, addr, sizeof(T)); else - v = *((T*)addr); + v = *((T*) addr); if (LE != IsLittleEndian) swap_endian(v); @@ -128,14 +137,16 @@ template T number(void* addr) // like captures and pawn moves but we can easily recover the correct dtz of the // previous move if we know the position's WDL score. int dtz_before_zeroing(WDLScore wdl) { - return wdl == WDLWin ? 1 : - wdl == WDLCursedWin ? 101 : - wdl == WDLBlessedLoss ? -101 : - wdl == WDLLoss ? -1 : 0; + return wdl == WDLWin ? 1 + : wdl == WDLCursedWin ? 101 + : wdl == WDLBlessedLoss ? -101 + : wdl == WDLLoss ? -1 + : 0; } // Return the sign of a number (-1, 0, 1) -template int sign_of(T val) { +template +int sign_of(T val) { return (T(0) < val) - (val < T(0)); } @@ -147,18 +158,22 @@ struct SparseEntry { static_assert(sizeof(SparseEntry) == 6, "SparseEntry must be 6 bytes"); -using Sym = uint16_t; // Huffman symbol +using Sym = uint16_t; // Huffman symbol struct LR { - enum Side { Left, Right }; + enum Side { + Left, + Right + }; - uint8_t lr[3]; // The first 12 bits is the left-hand symbol, the second 12 - // bits is the right-hand symbol. If the symbol has length 1, - // then the left-hand symbol is the stored value. + uint8_t lr[3]; // The first 12 bits is the left-hand symbol, the second 12 + // bits is the right-hand symbol. If the symbol has length 1, + // then the left-hand symbol is the stored value. template Sym get() { - return S == Left ? ((lr[1] & 0xF) << 8) | lr[0] : - S == Right ? (lr[2] << 4) | (lr[1] >> 4) : (assert(false), Sym(-1)); + return S == Left ? ((lr[1] & 0xF) << 8) | lr[0] + : S == Right ? (lr[2] << 4) | (lr[1] >> 4) + : (assert(false), Sym(-1)); } }; @@ -173,11 +188,11 @@ static_assert(sizeof(LR) == 3, "LR tree entry must be 3 bytes"); // class TBFile memory maps/unmaps the single .rtbw and .rtbz files. Files are // memory mapped for best performance. Files are mapped at first access: at init // time only existence of the file is checked. -class TBFile : public std::ifstream { +class TBFile: public std::ifstream { std::string fname; -public: + public: // Look for and open the file among the Paths directories where the .rtbw // and .rtbz files can be found. Multiple directories are separated by ";" // on Windows and by ":" on Unix-based operating systems. @@ -194,10 +209,9 @@ class TBFile : public std::ifstream { constexpr char SepChar = ';'; #endif std::stringstream ss(Paths); - std::string path; + std::string path; - while (std::getline(ss, path, SepChar)) - { + while (std::getline(ss, path, SepChar)) { fname = path + "/" + f; std::ifstream::open(fname); if (is_open()) @@ -208,39 +222,37 @@ class TBFile : public std::ifstream { // Memory map the file and check it. uint8_t* map(void** baseAddress, uint64_t* mapping, TBType type) { if (is_open()) - close(); // Need to re-open to get native file descriptor + close(); // Need to re-open to get native file descriptor #ifndef _WIN32 struct stat statbuf; - int fd = ::open(fname.c_str(), O_RDONLY); + int fd = ::open(fname.c_str(), O_RDONLY); if (fd == -1) return *baseAddress = nullptr, nullptr; fstat(fd, &statbuf); - if (statbuf.st_size % 64 != 16) - { + if (statbuf.st_size % 64 != 16) { std::cerr << "Corrupt tablebase file " << fname << std::endl; exit(EXIT_FAILURE); } - *mapping = statbuf.st_size; + *mapping = statbuf.st_size; *baseAddress = mmap(nullptr, statbuf.st_size, PROT_READ, MAP_SHARED, fd, 0); -#if defined(MADV_RANDOM) + #if defined(MADV_RANDOM) madvise(*baseAddress, statbuf.st_size, MADV_RANDOM); -#endif + #endif ::close(fd); - if (*baseAddress == MAP_FAILED) - { + if (*baseAddress == MAP_FAILED) { std::cerr << "Could not mmap() " << fname << std::endl; exit(EXIT_FAILURE); } #else // Note FILE_FLAG_RANDOM_ACCESS is only a hint to Windows and as such may get ignored. HANDLE fd = CreateFileA(fname.c_str(), GENERIC_READ, FILE_SHARE_READ, nullptr, - OPEN_EXISTING, FILE_FLAG_RANDOM_ACCESS, nullptr); + OPEN_EXISTING, FILE_FLAG_RANDOM_ACCESS, nullptr); if (fd == INVALID_HANDLE_VALUE) return *baseAddress = nullptr, nullptr; @@ -248,8 +260,7 @@ class TBFile : public std::ifstream { DWORD size_high; DWORD size_low = GetFileSize(fd, &size_high); - if (size_low % 64 != 16) - { + if (size_low % 64 != 16) { std::cerr << "Corrupt tablebase file " << fname << std::endl; exit(EXIT_FAILURE); } @@ -257,35 +268,31 @@ class TBFile : public std::ifstream { HANDLE mmap = CreateFileMapping(fd, nullptr, PAGE_READONLY, size_high, size_low, nullptr); CloseHandle(fd); - if (!mmap) - { + if (!mmap) { std::cerr << "CreateFileMapping() failed" << std::endl; exit(EXIT_FAILURE); } - *mapping = uint64_t(mmap); + *mapping = uint64_t(mmap); *baseAddress = MapViewOfFile(mmap, FILE_MAP_READ, 0, 0, 0); - if (!*baseAddress) - { + if (!*baseAddress) { std::cerr << "MapViewOfFile() failed, name = " << fname << ", error = " << GetLastError() << std::endl; exit(EXIT_FAILURE); } #endif - uint8_t* data = (uint8_t*)*baseAddress; + uint8_t* data = (uint8_t*) *baseAddress; - constexpr uint8_t Magics[][4] = { { 0xD7, 0x66, 0x0C, 0xA5 }, - { 0x71, 0xE8, 0x23, 0x5D } }; + constexpr uint8_t Magics[][4] = {{0xD7, 0x66, 0x0C, 0xA5}, {0x71, 0xE8, 0x23, 0x5D}}; - if (memcmp(data, Magics[type == WDL], 4)) - { + if (memcmp(data, Magics[type == WDL], 4)) { std::cerr << "Corrupted table in file " << fname << std::endl; unmap(*baseAddress, *mapping); return *baseAddress = nullptr, nullptr; } - return data + 4; // Skip Magics's header + return data + 4; // Skip Magics's header } static void unmap(void* baseAddress, uint64_t mapping) { @@ -294,7 +301,7 @@ class TBFile : public std::ifstream { munmap(baseAddress, mapping); #else UnmapViewOfFile(baseAddress); - CloseHandle((HANDLE)mapping); + CloseHandle((HANDLE) mapping); #endif } }; @@ -305,25 +312,27 @@ std::string TBFile::Paths; // There are 8, 4, or 2 PairsData records for each TBTable, according to the type // of table and if positions have pawns or not. It is populated at first access. struct PairsData { - uint8_t flags; // Table flags, see enum TBFlag - uint8_t maxSymLen; // Maximum length in bits of the Huffman symbols - uint8_t minSymLen; // Minimum length in bits of the Huffman symbols - uint32_t blocksNum; // Number of blocks in the TB file - size_t sizeofBlock; // Block size in bytes - size_t span; // About every span values there is a SparseIndex[] entry - Sym* lowestSym; // lowestSym[l] is the symbol of length l with the lowest value - LR* btree; // btree[sym] stores the left and right symbols that expand sym - uint16_t* blockLength; // Number of stored positions (minus one) for each block: 1..65536 - uint32_t blockLengthSize; // Size of blockLength[] table: padded so it's bigger than blocksNum - SparseEntry* sparseIndex; // Partial indices into blockLength[] - size_t sparseIndexSize; // Size of SparseIndex[] table - uint8_t* data; // Start of Huffman compressed data - std::vector base64; // base64[l - min_sym_len] is the 64bit-padded lowest symbol of length l - std::vector symlen; // Number of values (-1) represented by a given Huffman symbol: 1..256 - Piece pieces[TBPIECES]; // Position pieces: the order of pieces defines the groups - uint64_t groupIdx[TBPIECES+1]; // Start index used for the encoding of the group's pieces - int groupLen[TBPIECES+1]; // Number of pieces in a given group: KRKN -> (3, 1) - uint16_t map_idx[4]; // WDLWin, WDLLoss, WDLCursedWin, WDLBlessedLoss (used in DTZ) + uint8_t flags; // Table flags, see enum TBFlag + uint8_t maxSymLen; // Maximum length in bits of the Huffman symbols + uint8_t minSymLen; // Minimum length in bits of the Huffman symbols + uint32_t blocksNum; // Number of blocks in the TB file + size_t sizeofBlock; // Block size in bytes + size_t span; // About every span values there is a SparseIndex[] entry + Sym* lowestSym; // lowestSym[l] is the symbol of length l with the lowest value + LR* btree; // btree[sym] stores the left and right symbols that expand sym + uint16_t* blockLength; // Number of stored positions (minus one) for each block: 1..65536 + uint32_t blockLengthSize; // Size of blockLength[] table: padded so it's bigger than blocksNum + SparseEntry* sparseIndex; // Partial indices into blockLength[] + size_t sparseIndexSize; // Size of SparseIndex[] table + uint8_t* data; // Start of Huffman compressed data + std::vector + base64; // base64[l - min_sym_len] is the 64bit-padded lowest symbol of length l + std::vector + symlen; // Number of values (-1) represented by a given Huffman symbol: 1..256 + Piece pieces[TBPIECES]; // Position pieces: the order of pieces defines the groups + uint64_t groupIdx[TBPIECES + 1]; // Start index used for the encoding of the group's pieces + int groupLen[TBPIECES + 1]; // Number of pieces in a given group: KRKN -> (3, 1) + uint16_t map_idx[4]; // WDLWin, WDLLoss, WDLCursedWin, WDLBlessedLoss (used in DTZ) }; // struct TBTable contains indexing information to access the corresponding TBFile. @@ -337,22 +346,22 @@ struct TBTable { static constexpr int Sides = Type == WDL ? 2 : 1; std::atomic_bool ready; - void* baseAddress; - uint8_t* map; - uint64_t mapping; - Key key; - Key key2; - int pieceCount; - bool hasPawns; - bool hasUniquePieces; - uint8_t pawnCount[2]; // [Lead color / other color] - PairsData items[Sides][4]; // [wtm / btm][FILE_A..FILE_D or 0] - - PairsData* get(int stm, int f) { - return &items[stm % Sides][hasPawns ? f : 0]; - } - - TBTable() : ready(false), baseAddress(nullptr) {} + void* baseAddress; + uint8_t* map; + uint64_t mapping; + Key key; + Key key2; + int pieceCount; + bool hasPawns; + bool hasUniquePieces; + uint8_t pawnCount[2]; // [Lead color / other color] + PairsData items[Sides][4]; // [wtm / btm][FILE_A..FILE_D or 0] + + PairsData* get(int stm, int f) { return &items[stm % Sides][hasPawns ? f : 0]; } + + TBTable() : + ready(false), + baseAddress(nullptr) {} explicit TBTable(const std::string& code); explicit TBTable(const TBTable& wdl); @@ -363,26 +372,26 @@ struct TBTable { }; template<> -TBTable::TBTable(const std::string& code) : TBTable() { +TBTable::TBTable(const std::string& code) : + TBTable() { StateInfo st; - Position pos; + Position pos; - key = pos.set(code, WHITE, &st).material_key(); + key = pos.set(code, WHITE, &st).material_key(); pieceCount = pos.count(); - hasPawns = pos.pieces(PAWN); + hasPawns = pos.pieces(PAWN); hasUniquePieces = false; - for (Color c : { WHITE, BLACK }) + for (Color c : {WHITE, BLACK}) for (PieceType pt = PAWN; pt < KING; ++pt) if (popcount(pos.pieces(c, pt)) == 1) hasUniquePieces = true; // Set the leading color. In case both sides have pawns the leading color // is the side with fewer pawns because this leads to better compression. - bool c = !pos.count(BLACK) - || ( pos.count(WHITE) - && pos.count(BLACK) >= pos.count(WHITE)); + bool c = !pos.count(BLACK) + || (pos.count(WHITE) && pos.count(BLACK) >= pos.count(WHITE)); pawnCount[0] = pos.count(c ? WHITE : BLACK); pawnCount[1] = pos.count(c ? BLACK : WHITE); @@ -391,16 +400,17 @@ TBTable::TBTable(const std::string& code) : TBTable() { } template<> -TBTable::TBTable(const TBTable& wdl) : TBTable() { +TBTable::TBTable(const TBTable& wdl) : + TBTable() { // Use the corresponding WDL table to avoid recalculating all from scratch - key = wdl.key; - key2 = wdl.key2; - pieceCount = wdl.pieceCount; - hasPawns = wdl.hasPawns; + key = wdl.key; + key2 = wdl.key2; + pieceCount = wdl.pieceCount; + hasPawns = wdl.hasPawns; hasUniquePieces = wdl.hasUniquePieces; - pawnCount[0] = wdl.pawnCount[0]; - pawnCount[1] = wdl.pawnCount[1]; + pawnCount[0] = wdl.pawnCount[0]; + pawnCount[1] = wdl.pawnCount[1]; } // class TBTables creates and keeps ownership of the TBTable objects, one for @@ -408,19 +418,18 @@ TBTable::TBTable(const TBTable& wdl) : TBTable() { // at init time, accessed at probe time. class TBTables { - struct Entry - { - Key key; + struct Entry { + Key key; TBTable* wdl; TBTable* dtz; - template + template TBTable* get() const { - return (TBTable*)(Type == WDL ? (void*)wdl : (void*)dtz); + return (TBTable*) (Type == WDL ? (void*) wdl : (void*) dtz); } }; - static constexpr int Size = 1 << 12; // 4K table, indexed by key's 12 lsb + static constexpr int Size = 1 << 12; // 4K table, indexed by key's 12 lsb static constexpr int Overflow = 1; // Number of elements allowed to map to the last bucket Entry hashTable[Size + Overflow]; @@ -430,7 +439,7 @@ class TBTables { void insert(Key key, TBTable* wdl, TBTable* dtz) { uint32_t homeBucket = uint32_t(key) & (Size - 1); - Entry entry{ key, wdl, dtz }; + Entry entry{key, wdl, dtz}; // Ensure last element is empty to avoid overflow when looking up for (uint32_t bucket = homeBucket; bucket < Size + Overflow - 1; ++bucket) { @@ -445,7 +454,7 @@ class TBTables { uint32_t otherHomeBucket = uint32_t(otherKey) & (Size - 1); if (otherHomeBucket > homeBucket) { std::swap(entry, hashTable[bucket]); - key = otherKey; + key = otherKey; homeBucket = otherHomeBucket; } } @@ -453,10 +462,10 @@ class TBTables { exit(EXIT_FAILURE); } -public: + public: template TBTable* get(Key key) { - for (const Entry* entry = &hashTable[uint32_t(key) & (Size - 1)]; ; ++entry) { + for (const Entry* entry = &hashTable[uint32_t(key) & (Size - 1)];; ++entry) { if (entry->key == key || !entry->get()) return entry->get(); } @@ -468,7 +477,7 @@ class TBTables { dtzTable.clear(); } size_t size() const { return wdlTable.size(); } - void add(const std::vector& pieces); + void add(const std::vector& pieces); }; TBTables TBTables; @@ -482,9 +491,9 @@ void TBTables::add(const std::vector& pieces) { for (PieceType pt : pieces) code += PieceToChar[pt]; - TBFile file(code.insert(code.find('K', 1), "v") + ".rtbw"); // KRK -> KRvK + TBFile file(code.insert(code.find('K', 1), "v") + ".rtbw"); // KRK -> KRvK - if (!file.is_open()) // Only WDL file is checked + if (!file.is_open()) // Only WDL file is checked return; file.close(); @@ -495,7 +504,7 @@ void TBTables::add(const std::vector& pieces) { dtzTable.emplace_back(wdlTable.back()); // Insert into the hash keys for both colors: KRvK with KR white and black - insert(wdlTable.back().key , &wdlTable.back(), &dtzTable.back()); + insert(wdlTable.back().key, &wdlTable.back(), &dtzTable.back()); insert(wdlTable.back().key2, &wdlTable.back(), &dtzTable.back()); } @@ -538,8 +547,8 @@ int decompress_pairs(PairsData* d, uint64_t idx) { uint32_t k = uint32_t(idx / d->span); // Then we read the corresponding SparseIndex[] entry - uint32_t block = number(&d->sparseIndex[k].block); - int offset = number(&d->sparseIndex[k].offset); + uint32_t block = number(&d->sparseIndex[k].block); + int offset = number(&d->sparseIndex[k].offset); // Now compute the difference idx - I(k). From the definition of k, we know that // @@ -560,18 +569,18 @@ int decompress_pairs(PairsData* d, uint64_t idx) { offset -= d->blockLength[block++] + 1; // Finally, we find the start address of our block of canonical Huffman symbols - uint32_t* ptr = (uint32_t*)(d->data + (uint64_t(block) * d->sizeofBlock)); + uint32_t* ptr = (uint32_t*) (d->data + (uint64_t(block) * d->sizeofBlock)); // Read the first 64 bits in our block, this is a (truncated) sequence of // unknown number of symbols of unknown length but we know the first one // is at the beginning of this 64-bit sequence. - uint64_t buf64 = number(ptr); ptr += 2; + uint64_t buf64 = number(ptr); + ptr += 2; int buf64Size = 64; Sym sym; - while (true) - { - int len = 0; // This is the symbol length - d->min_sym_len + while (true) { + int len = 0; // This is the symbol length - d->min_sym_len // Now get the symbol length. For any symbol s64 of length l right-padded // to 64 bits we know that d->base64[l-1] >= s64 >= d->base64[l] so we @@ -594,11 +603,11 @@ int decompress_pairs(PairsData* d, uint64_t idx) { // ...otherwise update the offset and continue to iterate offset -= d->symlen[sym] + 1; - len += d->minSymLen; // Get the real length - buf64 <<= len; // Consume the just processed symbol + len += d->minSymLen; // Get the real length + buf64 <<= len; // Consume the just processed symbol buf64Size -= len; - if (buf64Size <= 32) { // Refill the buffer + if (buf64Size <= 32) { // Refill the buffer buf64Size += 32; buf64 |= uint64_t(number(ptr++)) << (64 - buf64Size); } @@ -608,8 +617,7 @@ int decompress_pairs(PairsData* d, uint64_t idx) { // We binary-search for our value recursively expanding into the left and // right child symbols until we reach a leaf node where symlen[sym] + 1 == 1 // that will store the value we need. - while (d->symlen[sym]) - { + while (d->symlen[sym]) { Sym left = d->btree[sym].get(); // If a symbol contains 36 sub-symbols (d->symlen[sym] + 1 = 36) and @@ -632,8 +640,7 @@ bool check_dtz_stm(TBTable*, int, File) { return true; } bool check_dtz_stm(TBTable* entry, int stm, File f) { auto flags = entry->get(stm, f)->flags; - return (flags & TBFlag::STM) == stm - || ((entry->key == entry->key2) && !entry->hasPawns); + return (flags & TBFlag::STM) == stm || ((entry->key == entry->key2) && !entry->hasPawns); } // DTZ scores are sorted by frequency of occurrence and then assigned the @@ -644,25 +651,24 @@ WDLScore map_score(TBTable*, File, int value, WDLScore) { return WDLScore(v int map_score(TBTable* entry, File f, int value, WDLScore wdl) { - constexpr int WDLMap[] = { 1, 3, 0, 2, 0 }; + constexpr int WDLMap[] = {1, 3, 0, 2, 0}; auto flags = entry->get(0, f)->flags; - uint8_t* map = entry->map; + uint8_t* map = entry->map; uint16_t* idx = entry->get(0, f)->map_idx; if (flags & TBFlag::Mapped) { if (flags & TBFlag::Wide) - value = ((uint16_t *)map)[idx[WDLMap[wdl + 2]] + value]; + value = ((uint16_t*) map)[idx[WDLMap[wdl + 2]] + value]; else value = map[idx[WDLMap[wdl + 2]] + value]; } // DTZ tables store distance to zero in number of moves or plies. We // want to return plies, so we have to convert to plies when needed. - if ( (wdl == WDLWin && !(flags & TBFlag::WinPlies)) - || (wdl == WDLLoss && !(flags & TBFlag::LossPlies)) - || wdl == WDLCursedWin - || wdl == WDLBlessedLoss) + if ((wdl == WDLWin && !(flags & TBFlag::WinPlies)) + || (wdl == WDLLoss && !(flags & TBFlag::LossPlies)) || wdl == WDLCursedWin + || wdl == WDLBlessedLoss) value *= 2; return value + 1; @@ -677,13 +683,13 @@ int map_score(TBTable* entry, File f, int value, WDLScore wdl) { template Ret do_probe_table(const Position& pos, T* entry, WDLScore wdl, ProbeState* result) { - Square squares[TBPIECES]; - Piece pieces[TBPIECES]; - uint64_t idx; - int next = 0, size = 0, leadPawnsCnt = 0; + Square squares[TBPIECES]; + Piece pieces[TBPIECES]; + uint64_t idx; + int next = 0, size = 0, leadPawnsCnt = 0; PairsData* d; - Bitboard b, leadPawns = 0; - File tbFile = FILE_A; + Bitboard b, leadPawns = 0; + File tbFile = FILE_A; // A given TB entry like KRK has associated two material keys: KRvk and Kvkr. // If both sides have the same pieces keys are equal. In this case TB tables @@ -734,8 +740,8 @@ Ret do_probe_table(const Position& pos, T* entry, WDLScore wdl, ProbeState* resu // directly map them to the correct color and square. b = pos.pieces() ^ leadPawns; do { - Square s = pop_lsb(b); - squares[size] = s ^ flipSquares; + Square s = pop_lsb(b); + squares[size] = s ^ flipSquares; pieces[size++] = Piece(pos.piece_on(s) ^ flipColor); } while (b); @@ -747,8 +753,7 @@ Ret do_probe_table(const Position& pos, T* entry, WDLScore wdl, ProbeState* resu // in pieces[i]: the sequence that ensures the best compression. for (int i = leadPawnsCnt; i < size - 1; ++i) for (int j = i + 1; j < size; ++j) - if (d->pieces[i] == pieces[j]) - { + if (d->pieces[i] == pieces[j]) { std::swap(pieces[i], pieces[j]); std::swap(squares[i], squares[j]); break; @@ -770,7 +775,7 @@ Ret do_probe_table(const Position& pos, T* entry, WDLScore wdl, ProbeState* resu for (int i = 1; i < leadPawnsCnt; ++i) idx += Binomial[i][MapPawns[squares[i]]]; - goto encode_remaining; // With pawns we have finished special treatments + goto encode_remaining; // With pawns we have finished special treatments } // In positions without pawns, we further flip the squares to ensure leading @@ -785,7 +790,7 @@ Ret do_probe_table(const Position& pos, T* entry, WDLScore wdl, ProbeState* resu if (!off_A1H8(squares[i])) continue; - if (off_A1H8(squares[i]) > 0) // A1-H8 diagonal flip: SQ_A3 -> SQ_C1 + if (off_A1H8(squares[i]) > 0) // A1-H8 diagonal flip: SQ_A3 -> SQ_C1 for (int j = i; j < size; ++j) squares[j] = Square(((squares[j] >> 3) | (squares[j] << 3)) & 63); break; @@ -820,38 +825,31 @@ Ret do_probe_table(const Position& pos, T* entry, WDLScore wdl, ProbeState* resu // together. if (entry->hasUniquePieces) { - int adjust1 = squares[1] > squares[0]; + int adjust1 = squares[1] > squares[0]; int adjust2 = (squares[2] > squares[0]) + (squares[2] > squares[1]); // First piece is below a1-h8 diagonal. MapA1D1D4[] maps the b1-d1-d3 // triangle to 0...5. There are 63 squares for second piece and and 62 // (mapped to 0...61) for the third. if (off_A1H8(squares[0])) - idx = ( MapA1D1D4[squares[0]] * 63 - + (squares[1] - adjust1)) * 62 - + squares[2] - adjust2; + idx = (MapA1D1D4[squares[0]] * 63 + (squares[1] - adjust1)) * 62 + squares[2] - adjust2; // First piece is on a1-h8 diagonal, second below: map this occurrence to // 6 to differentiate from the above case, rank_of() maps a1-d4 diagonal // to 0...3 and finally MapB1H1H7[] maps the b1-h1-h7 triangle to 0..27. else if (off_A1H8(squares[1])) - idx = ( 6 * 63 + rank_of(squares[0]) * 28 - + MapB1H1H7[squares[1]]) * 62 - + squares[2] - adjust2; + idx = (6 * 63 + rank_of(squares[0]) * 28 + MapB1H1H7[squares[1]]) * 62 + squares[2] + - adjust2; // First two pieces are on a1-h8 diagonal, third below else if (off_A1H8(squares[2])) - idx = 6 * 63 * 62 + 4 * 28 * 62 - + rank_of(squares[0]) * 7 * 28 - + (rank_of(squares[1]) - adjust1) * 28 - + MapB1H1H7[squares[2]]; + idx = 6 * 63 * 62 + 4 * 28 * 62 + rank_of(squares[0]) * 7 * 28 + + (rank_of(squares[1]) - adjust1) * 28 + MapB1H1H7[squares[2]]; // All 3 pieces on the diagonal a1-h8 else - idx = 6 * 63 * 62 + 4 * 28 * 62 + 4 * 7 * 28 - + rank_of(squares[0]) * 7 * 6 - + (rank_of(squares[1]) - adjust1) * 6 - + (rank_of(squares[2]) - adjust2); + idx = 6 * 63 * 62 + 4 * 28 * 62 + 4 * 7 * 28 + rank_of(squares[0]) * 7 * 6 + + (rank_of(squares[1]) - adjust1) * 6 + (rank_of(squares[2]) - adjust2); } else // We don't have at least 3 unique pieces, like in KRRvKBB, just map // the kings. @@ -864,16 +862,14 @@ Ret do_probe_table(const Position& pos, T* entry, WDLScore wdl, ProbeState* resu // Encode remaining pawns and then pieces according to square, in ascending order bool remainingPawns = entry->hasPawns && entry->pawnCount[1]; - while (d->groupLen[++next]) - { + while (d->groupLen[++next]) { std::stable_sort(groupSq, groupSq + d->groupLen[next]); uint64_t n = 0; // Map down a square if "comes later" than a square in the previous // groups (similar to what was done earlier for leading group pieces). - for (int i = 0; i < d->groupLen[next]; ++i) - { - auto f = [&](Square s) { return groupSq[i] > s; }; + for (int i = 0; i < d->groupLen[next]; ++i) { + auto f = [&](Square s) { return groupSq[i] > s; }; auto adjust = std::count_if(squares, groupSq, f); n += Binomial[i + 1][groupSq[i] - adjust - 8 * remainingPawns]; } @@ -911,7 +907,7 @@ void set_groups(T& e, PairsData* d, int order[], File f) { else d->groupLen[++n] = 1; - d->groupLen[++n] = 0; // Zero-terminated + d->groupLen[++n] = 0; // Zero-terminated // The sequence in pieces[] defines the groups, but not the order in which // they are encoded. If the pieces in a group g can be combined on the board @@ -924,24 +920,21 @@ void set_groups(T& e, PairsData* d, int order[], File f) { // pawns/pieces -> remaining pawns -> remaining pieces. In particular the // first group is at order[0] position and the remaining pawns, when present, // are at order[1] position. - bool pp = e.hasPawns && e.pawnCount[1]; // Pawns on both sides - int next = pp ? 2 : 1; - int freeSquares = 64 - d->groupLen[0] - (pp ? d->groupLen[1] : 0); - uint64_t idx = 1; + bool pp = e.hasPawns && e.pawnCount[1]; // Pawns on both sides + int next = pp ? 2 : 1; + int freeSquares = 64 - d->groupLen[0] - (pp ? d->groupLen[1] : 0); + uint64_t idx = 1; for (int k = 0; next < n || k == order[0] || k == order[1]; ++k) - if (k == order[0]) // Leading pawns or pieces + if (k == order[0]) // Leading pawns or pieces { d->groupIdx[0] = idx; - idx *= e.hasPawns ? LeadPawnsSize[d->groupLen[0]][f] - : e.hasUniquePieces ? 31332 : 462; - } - else if (k == order[1]) // Remaining pawns + idx *= e.hasPawns ? LeadPawnsSize[d->groupLen[0]][f] : e.hasUniquePieces ? 31332 : 462; + } else if (k == order[1]) // Remaining pawns { d->groupIdx[1] = idx; idx *= Binomial[d->groupLen[1]][48 - d->groupLen[0]]; - } - else // Remaining pieces + } else // Remaining pieces { d->groupIdx[next] = idx; idx *= Binomial[d->groupLen[next]][freeSquares]; @@ -956,8 +949,8 @@ void set_groups(T& e, PairsData* d, int order[], File f) { // symbol until reaching the leaves that represent the symbol value. uint8_t set_symlen(PairsData* d, Sym s, std::vector& visited) { - visited[s] = true; // We can set it now because tree is acyclic - Sym sr = d->btree[s].get(); + visited[s] = true; // We can set it now because tree is acyclic + Sym sr = d->btree[s].get(); if (sr == 0xFFF) return 0; @@ -979,8 +972,8 @@ uint8_t* set_sizes(PairsData* d, uint8_t* data) { if (d->flags & TBFlag::SingleValue) { d->blocksNum = d->blockLengthSize = 0; - d->span = d->sparseIndexSize = 0; // Broken MSVC zero-init - d->minSymLen = *data++; // Here we store the single value + d->span = d->sparseIndexSize = 0; // Broken MSVC zero-init + d->minSymLen = *data++; // Here we store the single value return data; } @@ -988,16 +981,17 @@ uint8_t* set_sizes(PairsData* d, uint8_t* data) { // element stores the biggest index that is the tb size. uint64_t tbSize = d->groupIdx[std::find(d->groupLen, d->groupLen + 7, 0) - d->groupLen]; - d->sizeofBlock = 1ULL << *data++; - d->span = 1ULL << *data++; - d->sparseIndexSize = size_t((tbSize + d->span - 1) / d->span); // Round up - auto padding = number(data++); - d->blocksNum = number(data); data += sizeof(uint32_t); - d->blockLengthSize = d->blocksNum + padding; // Padded to ensure SparseIndex[] - // does not point out of range. + d->sizeofBlock = 1ULL << *data++; + d->span = 1ULL << *data++; + d->sparseIndexSize = size_t((tbSize + d->span - 1) / d->span); // Round up + auto padding = number(data++); + d->blocksNum = number(data); + data += sizeof(uint32_t); + d->blockLengthSize = d->blocksNum + padding; // Padded to ensure SparseIndex[] + // does not point out of range. d->maxSymLen = *data++; d->minSymLen = *data++; - d->lowestSym = (Sym*)data; + d->lowestSym = (Sym*) data; d->base64.resize(d->maxSymLen - d->minSymLen + 1); // See https://en.wikipedia.org/wiki/Huffman_coding @@ -1014,9 +1008,10 @@ uint8_t* set_sizes(PairsData* d, uint8_t* data) { int base64_size = static_cast(d->base64.size()); for (int i = base64_size - 2; i >= 0; --i) { d->base64[i] = (d->base64[i + 1] + number(&d->lowestSym[i]) - - number(&d->lowestSym[i + 1])) / 2; + - number(&d->lowestSym[i + 1])) + / 2; - assert(d->base64[i] * 2 >= d->base64[i+1]); + assert(d->base64[i] * 2 >= d->base64[i + 1]); } // Now left-shift by an amount so that d->base64[i] gets shifted 1 bit more @@ -1024,11 +1019,12 @@ uint8_t* set_sizes(PairsData* d, uint8_t* data) { // d->base64[i] >= d->base64[i+1]. Moreover for any symbol s64 of length i // and right-padded to 64 bits holds d->base64[i-1] >= s64 >= d->base64[i]. for (int i = 0; i < base64_size; ++i) - d->base64[i] <<= 64 - i - d->minSymLen; // Right-padding to 64 bits + d->base64[i] <<= 64 - i - d->minSymLen; // Right-padding to 64 bits data += base64_size * sizeof(Sym); - d->symlen.resize(number(data)); data += sizeof(uint16_t); - d->btree = (LR*)data; + d->symlen.resize(number(data)); + data += sizeof(uint16_t); + d->btree = (LR*) data; // The compression scheme used is "Recursive Pairing", that replaces the most // frequent adjacent pair of symbols in the source message by a new symbol, @@ -1054,13 +1050,12 @@ uint8_t* set_dtz_map(TBTable& e, uint8_t* data, File maxFile) { auto flags = e.get(0, f)->flags; if (flags & TBFlag::Mapped) { if (flags & TBFlag::Wide) { - data += uintptr_t(data) & 1; // Word alignment, we may have a mixed table - for (int i = 0; i < 4; ++i) { // Sequence like 3,x,x,x,1,x,0,2,x,x - e.get(0, f)->map_idx[i] = uint16_t((uint16_t*)data - (uint16_t*)e.map + 1); + data += uintptr_t(data) & 1; // Word alignment, we may have a mixed table + for (int i = 0; i < 4; ++i) { // Sequence like 3,x,x,x,1,x,0,2,x,x + e.get(0, f)->map_idx[i] = uint16_t((uint16_t*) data - (uint16_t*) e.map + 1); data += 2 * number(data) + 2; } - } - else { + } else { for (int i = 0; i < 4; ++i) { e.get(0, f)->map_idx[i] = uint16_t(data - e.map + 1); data += *data + 1; @@ -1069,7 +1064,7 @@ uint8_t* set_dtz_map(TBTable& e, uint8_t* data, File maxFile) { } } - return data += uintptr_t(data) & 1; // Word alignment + return data += uintptr_t(data) & 1; // Word alignment } // Populate entry's PairsData records with data from the just memory-mapped file. @@ -1079,17 +1074,20 @@ void set(T& e, uint8_t* data) { PairsData* d; - enum { Split = 1, HasPawns = 2 }; + enum { + Split = 1, + HasPawns = 2 + }; - assert(e.hasPawns == bool(*data & HasPawns)); + assert(e.hasPawns == bool(*data & HasPawns)); assert((e.key != e.key2) == bool(*data & Split)); - data++; // First byte stores flags + data++; // First byte stores flags - const int sides = T::Sides == 2 && (e.key != e.key2) ? 2 : 1; + const int sides = T::Sides == 2 && (e.key != e.key2) ? 2 : 1; const File maxFile = e.hasPawns ? FILE_D : FILE_A; - bool pp = e.hasPawns && e.pawnCount[1]; // Pawns on both sides + bool pp = e.hasPawns && e.pawnCount[1]; // Pawns on both sides assert(!pp || e.pawnCount[0]); @@ -1098,19 +1096,19 @@ void set(T& e, uint8_t* data) { for (int i = 0; i < sides; i++) *e.get(i, f) = PairsData(); - int order[][2] = { { *data & 0xF, pp ? *(data + 1) & 0xF : 0xF }, - { *data >> 4, pp ? *(data + 1) >> 4 : 0xF } }; + int order[][2] = {{*data & 0xF, pp ? *(data + 1) & 0xF : 0xF}, + {*data >> 4, pp ? *(data + 1) >> 4 : 0xF}}; data += 1 + pp; for (int k = 0; k < e.pieceCount; ++k, ++data) for (int i = 0; i < sides; i++) - e.get(i, f)->pieces[k] = Piece(i ? *data >> 4 : *data & 0xF); + e.get(i, f)->pieces[k] = Piece(i ? *data >> 4 : *data & 0xF); for (int i = 0; i < sides; ++i) set_groups(e, e.get(i, f), order[i], f); } - data += uintptr_t(data) & 1; // Word alignment + data += uintptr_t(data) & 1; // Word alignment for (File f = FILE_A; f <= maxFile; ++f) for (int i = 0; i < sides; i++) @@ -1120,19 +1118,19 @@ void set(T& e, uint8_t* data) { for (File f = FILE_A; f <= maxFile; ++f) for (int i = 0; i < sides; i++) { - (d = e.get(i, f))->sparseIndex = (SparseEntry*)data; + (d = e.get(i, f))->sparseIndex = (SparseEntry*) data; data += d->sparseIndexSize * sizeof(SparseEntry); } for (File f = FILE_A; f <= maxFile; ++f) for (int i = 0; i < sides; i++) { - (d = e.get(i, f))->blockLength = (uint16_t*)data; + (d = e.get(i, f))->blockLength = (uint16_t*) data; data += d->blockLengthSize * sizeof(uint16_t); } for (File f = FILE_A; f <= maxFile; ++f) for (int i = 0; i < sides; i++) { - data = (uint8_t*)((uintptr_t(data) + 0x3F) & ~0x3F); // 64 byte alignment + data = (uint8_t*) ((uintptr_t(data) + 0x3F) & ~0x3F); // 64 byte alignment (d = e.get(i, f))->data = data; data += d->blocksNum * d->sizeofBlock; } @@ -1150,11 +1148,11 @@ void* mapped(TBTable& e, const Position& pos) { // Use 'acquire' to avoid a thread reading 'ready' == true while // another is still working. (compiler reordering may cause this). if (e.ready.load(std::memory_order_acquire)) - return e.baseAddress; // Could be nullptr if file does not exist + return e.baseAddress; // Could be nullptr if file does not exist std::scoped_lock lk(mutex); - if (e.ready.load(std::memory_order_relaxed)) // Recheck under lock + if (e.ready.load(std::memory_order_relaxed)) // Recheck under lock return e.baseAddress; // Pieces strings in decreasing order for each color, like ("KPP","KR") @@ -1164,8 +1162,8 @@ void* mapped(TBTable& e, const Position& pos) { b += std::string(popcount(pos.pieces(BLACK, pt)), PieceToChar[pt]); } - fname = (e.key == pos.material_key() ? w + 'v' + b : b + 'v' + w) - + (Type == WDL ? ".rtbw" : ".rtbz"); + fname = + (e.key == pos.material_key() ? w + 'v' + b : b + 'v' + w) + (Type == WDL ? ".rtbw" : ".rtbz"); uint8_t* data = TBFile(fname).map(&e.baseAddress, &e.mapping, Type); @@ -1179,7 +1177,7 @@ void* mapped(TBTable& e, const Position& pos) { template::Ret> Ret probe_table(const Position& pos, ProbeState* result, WDLScore wdl = WDLDraw) { - if (pos.count() == 2) // KvK + if (pos.count() == 2) // KvK return Ret(WDLDraw); TBTable* entry = TBTables.get(pos.material_key()); @@ -1206,16 +1204,14 @@ Ret probe_table(const Position& pos, ProbeState* result, WDLScore wdl = WDLDraw) template WDLScore search(Position& pos, ProbeState* result) { - WDLScore value, bestValue = WDLLoss; + WDLScore value, bestValue = WDLLoss; StateInfo st; - auto moveList = MoveList(pos); + auto moveList = MoveList(pos); size_t totalCount = moveList.size(), moveCount = 0; - for (const Move move : moveList) - { - if ( !pos.capture(move) - && (!CheckZeroingMoves || type_of(pos.moved_piece(move)) != PAWN)) + for (const Move move : moveList) { + if (!pos.capture(move) && (!CheckZeroingMoves || type_of(pos.moved_piece(move)) != PAWN)) continue; moveCount++; @@ -1227,13 +1223,11 @@ WDLScore search(Position& pos, ProbeState* result) { if (*result == FAIL) return WDLDraw; - if (value > bestValue) - { + if (value > bestValue) { bestValue = value; - if (value >= WDLWin) - { - *result = ZEROING_BEST_MOVE; // Winning DTZ-zeroing move + if (value >= WDLWin) { + *result = ZEROING_BEST_MOVE; // Winning DTZ-zeroing move return value; } } @@ -1249,8 +1243,7 @@ WDLScore search(Position& pos, ProbeState* result) { if (noMoreMoves) value = bestValue; - else - { + else { value = probe_table(pos, result); if (*result == FAIL) @@ -1259,13 +1252,12 @@ WDLScore search(Position& pos, ProbeState* result) { // DTZ stores a "don't care" value if bestValue is a win if (bestValue >= value) - return *result = ( bestValue > WDLDraw - || noMoreMoves ? ZEROING_BEST_MOVE : OK), bestValue; + return *result = (bestValue > WDLDraw || noMoreMoves ? ZEROING_BEST_MOVE : OK), bestValue; return *result = OK, value; } -} // namespace +} // namespace // Tablebases::init() is called at startup and after every change to @@ -1275,7 +1267,7 @@ void Tablebases::init(const std::string& paths) { TBTables.clear(); MaxCardinality = 0; - TBFile::Paths = paths; + TBFile::Paths = paths; if (paths.empty() || paths == "") return; @@ -1307,14 +1299,14 @@ void Tablebases::init(const std::string& paths) { code = 0; for (int idx = 0; idx < 10; idx++) for (Square s1 = SQ_A1; s1 <= SQ_D4; ++s1) - if (MapA1D1D4[s1] == idx && (idx || s1 == SQ_B1)) // SQ_B1 is mapped to 0 + if (MapA1D1D4[s1] == idx && (idx || s1 == SQ_B1)) // SQ_B1 is mapped to 0 { for (Square s2 = SQ_A1; s2 <= SQ_H8; ++s2) if ((PseudoAttacks[KING][s1] | s1) & s2) - continue; // Illegal position + continue; // Illegal position else if (!off_A1H8(s1) && off_A1H8(s2) > 0) - continue; // First on diagonal, second above + continue; // First on diagonal, second above else if (!off_A1H8(s1) && !off_A1H8(s2)) bothOnDiagonal.emplace_back(idx, s2); @@ -1331,30 +1323,28 @@ void Tablebases::init(const std::string& paths) { // are Binomial[k][n] ways to choose k elements from a set of n elements. Binomial[0][0] = 1; - for (int n = 1; n < 64; n++) // Squares - for (int k = 0; k < 6 && k <= n; ++k) // Pieces - Binomial[k][n] = (k > 0 ? Binomial[k - 1][n - 1] : 0) - + (k < n ? Binomial[k ][n - 1] : 0); + for (int n = 1; n < 64; n++) // Squares + for (int k = 0; k < 6 && k <= n; ++k) // Pieces + Binomial[k][n] = + (k > 0 ? Binomial[k - 1][n - 1] : 0) + (k < n ? Binomial[k][n - 1] : 0); // MapPawns[s] encodes squares a2-h7 to 0..47. This is the number of possible // available squares when the leading one is in 's'. Moreover the pawn with // highest MapPawns[] is the leading pawn, the one nearest the edge, and // among pawns with the same file, the one with the lowest rank. - int availableSquares = 47; // Available squares when lead pawn is in a2 + int availableSquares = 47; // Available squares when lead pawn is in a2 // Init the tables for the encoding of leading pawns group: with 7-men TB we // can have up to 5 leading pawns (KPPPPPK). for (int leadPawnsCnt = 1; leadPawnsCnt <= 5; ++leadPawnsCnt) - for (File f = FILE_A; f <= FILE_D; ++f) - { + for (File f = FILE_A; f <= FILE_D; ++f) { // Restart the index at every file because TB table is split // by file, so we can reuse the same index for different files. int idx = 0; // Sum all possible combinations for a given file, starting with // the leading pawn on rank 2 and increasing the rank. - for (Rank r = RANK_2; r <= RANK_7; ++r) - { + for (Rank r = RANK_2; r <= RANK_7; ++r) { Square sq = make_square(f, r); // Compute MapPawns[] at first pass. @@ -1362,9 +1352,8 @@ void Tablebases::init(const std::string& paths) { // below or more toward the edge of sq. There are 47 available // squares when sq = a2 and reduced by 2 for any rank increase // due to mirroring: sq == a3 -> no a2, h2, so MapPawns[a3] = 45 - if (leadPawnsCnt == 1) - { - MapPawns[sq] = availableSquares--; + if (leadPawnsCnt == 1) { + MapPawns[sq] = availableSquares--; MapPawns[flip_file(sq)] = availableSquares--; } LeadPawnIdx[leadPawnsCnt][sq] = idx; @@ -1457,10 +1446,10 @@ WDLScore Tablebases::probe_wdl(Position& pos, ProbeState* result) { // then do not accept moves leading to dtz + 50-move-counter == 100. int Tablebases::probe_dtz(Position& pos, ProbeState* result) { - *result = OK; + *result = OK; WDLScore wdl = search(pos, result); - if (*result == FAIL || wdl == WDLDraw) // DTZ tables don't store draws + if (*result == FAIL || wdl == WDLDraw) // DTZ tables don't store draws return 0; // DTZ stores a 'don't care value in this case, or even a plain wrong @@ -1479,10 +1468,9 @@ int Tablebases::probe_dtz(Position& pos, ProbeState* result) { // DTZ stores results for the other side, so we need to do a 1-ply search and // find the winning move that minimizes DTZ. StateInfo st; - int minDTZ = 0xFFFF; + int minDTZ = 0xFFFF; - for (const Move move : MoveList(pos)) - { + for (const Move move : MoveList(pos)) { bool zeroing = pos.capture(move) || type_of(pos.moved_piece(move)) == PAWN; pos.do_move(move, st); @@ -1491,8 +1479,7 @@ int Tablebases::probe_dtz(Position& pos, ProbeState* result) { // otherwise we will get the dtz of the next move sequence. Search the // position after the move to get the score sign (because even in a // winning position we could make a losing capture or go for a draw). - dtz = zeroing ? -dtz_before_zeroing(search(pos, result)) - : -probe_dtz(pos, result); + dtz = zeroing ? -dtz_before_zeroing(search(pos, result)) : -probe_dtz(pos, result); // If the move mates, force minDTZ to 1 if (dtz == 1 && pos.checkers() && MoveList(pos).size() == 0) @@ -1524,7 +1511,7 @@ int Tablebases::probe_dtz(Position& pos, ProbeState* result) { bool Tablebases::root_probe(Position& pos, Search::RootMoves& rootMoves) { ProbeState result = OK; - StateInfo st; + StateInfo st; // Obtain 50-move counter for the root position int cnt50 = pos.rule50_count(); @@ -1535,36 +1522,27 @@ bool Tablebases::root_probe(Position& pos, Search::RootMoves& rootMoves) { int dtz, bound = Options["Syzygy50MoveRule"] ? (MAX_DTZ - 100) : 1; // Probe and rank each move - for (auto& m : rootMoves) - { + for (auto& m : rootMoves) { pos.do_move(m.pv[0], st); // Calculate dtz for the current move counting from the root position - if (pos.rule50_count() == 0) - { + if (pos.rule50_count() == 0) { // In case of a zeroing move, dtz is one of -101/-1/0/1/101 WDLScore wdl = -probe_wdl(pos, &result); - dtz = dtz_before_zeroing(wdl); - } - else if (pos.is_draw(1)) - { + dtz = dtz_before_zeroing(wdl); + } else if (pos.is_draw(1)) { // In case a root move leads to a draw by repetition or 50-move rule, // we set dtz to zero. Note: since we are only 1 ply from the root, // this must be a true 3-fold repetition inside the game history. dtz = 0; - } - else - { + } else { // Otherwise, take dtz for the new position and correct by 1 ply dtz = -probe_dtz(pos, &result); - dtz = dtz > 0 ? dtz + 1 - : dtz < 0 ? dtz - 1 : dtz; + dtz = dtz > 0 ? dtz + 1 : dtz < 0 ? dtz - 1 : dtz; } // Make sure that a mating move is assigned a dtz value of 1 - if ( pos.checkers() - && dtz == 2 - && MoveList(pos).size() == 0) + if (pos.checkers() && dtz == 2 && MoveList(pos).size() == 0) dtz = 1; pos.undo_move(m.pv[0]); @@ -1574,19 +1552,19 @@ bool Tablebases::root_probe(Position& pos, Search::RootMoves& rootMoves) { // Better moves are ranked higher. Certain wins are ranked equally. // Losing moves are ranked equally unless a 50-move draw is in sight. - int r = dtz > 0 ? (dtz + cnt50 <= 99 && !rep ? MAX_DTZ : MAX_DTZ - (dtz + cnt50)) - : dtz < 0 ? (-dtz * 2 + cnt50 < 100 ? -MAX_DTZ : -MAX_DTZ + (-dtz + cnt50)) - : 0; + int r = dtz > 0 ? (dtz + cnt50 <= 99 && !rep ? MAX_DTZ : MAX_DTZ - (dtz + cnt50)) + : dtz < 0 ? (-dtz * 2 + cnt50 < 100 ? -MAX_DTZ : -MAX_DTZ + (-dtz + cnt50)) + : 0; m.tbRank = r; // Determine the score to be displayed for this move. Assign at least // 1 cp to cursed wins and let it grow to 49 cp as the positions gets // closer to a real win. - m.tbScore = r >= bound ? VALUE_MATE - MAX_PLY - 1 - : r > 0 ? Value((std::max( 3, r - (MAX_DTZ - 200)) * int(PawnValue)) / 200) - : r == 0 ? VALUE_DRAW - : r > -bound ? Value((std::min(-3, r + (MAX_DTZ - 200)) * int(PawnValue)) / 200) - : -VALUE_MATE + MAX_PLY + 1; + m.tbScore = r >= bound ? VALUE_MATE - MAX_PLY - 1 + : r > 0 ? Value((std::max(3, r - (MAX_DTZ - 200)) * int(PawnValue)) / 200) + : r == 0 ? VALUE_DRAW + : r > -bound ? Value((std::min(-3, r + (MAX_DTZ - 200)) * int(PawnValue)) / 200) + : -VALUE_MATE + MAX_PLY + 1; } return true; @@ -1599,17 +1577,16 @@ bool Tablebases::root_probe(Position& pos, Search::RootMoves& rootMoves) { // A return value false indicates that not all probes were successful. bool Tablebases::root_probe_wdl(Position& pos, Search::RootMoves& rootMoves) { - static const int WDL_to_rank[] = { -MAX_DTZ, -MAX_DTZ + 101, 0, MAX_DTZ - 101, MAX_DTZ }; + static const int WDL_to_rank[] = {-MAX_DTZ, -MAX_DTZ + 101, 0, MAX_DTZ - 101, MAX_DTZ}; ProbeState result = OK; - StateInfo st; - WDLScore wdl; + StateInfo st; + WDLScore wdl; bool rule50 = Options["Syzygy50MoveRule"]; // Probe and rank each move - for (auto& m : rootMoves) - { + for (auto& m : rootMoves) { pos.do_move(m.pv[0], st); if (pos.is_draw(1)) @@ -1625,12 +1602,11 @@ bool Tablebases::root_probe_wdl(Position& pos, Search::RootMoves& rootMoves) { m.tbRank = WDL_to_rank[wdl + 2]; if (!rule50) - wdl = wdl > WDLDraw ? WDLWin - : wdl < WDLDraw ? WDLLoss : WDLDraw; + wdl = wdl > WDLDraw ? WDLWin : wdl < WDLDraw ? WDLLoss : WDLDraw; m.tbScore = WDL_to_value[wdl + 2]; } return true; } -} // namespace Stockfish +} // namespace Stockfish diff --git a/src/syzygy/tbprobe.h b/src/syzygy/tbprobe.h index b2ba35ff4b0..3b7c8aa70fd 100644 --- a/src/syzygy/tbprobe.h +++ b/src/syzygy/tbprobe.h @@ -30,30 +30,30 @@ class Position; namespace Stockfish::Tablebases { enum WDLScore { - WDLLoss = -2, // Loss - WDLBlessedLoss = -1, // Loss, but draw under 50-move rule - WDLDraw = 0, // Draw - WDLCursedWin = 1, // Win, but draw under 50-move rule - WDLWin = 2, // Win + WDLLoss = -2, // Loss + WDLBlessedLoss = -1, // Loss, but draw under 50-move rule + WDLDraw = 0, // Draw + WDLCursedWin = 1, // Win, but draw under 50-move rule + WDLWin = 2, // Win }; // Possible states after a probing operation enum ProbeState { - FAIL = 0, // Probe failed (missing file table) - OK = 1, // Probe successful - CHANGE_STM = -1, // DTZ should check the other side - ZEROING_BEST_MOVE = 2 // Best move zeroes DTZ (capture or pawn move) + FAIL = 0, // Probe failed (missing file table) + OK = 1, // Probe successful + CHANGE_STM = -1, // DTZ should check the other side + ZEROING_BEST_MOVE = 2 // Best move zeroes DTZ (capture or pawn move) }; extern int MaxCardinality; -void init(const std::string& paths); +void init(const std::string& paths); WDLScore probe_wdl(Position& pos, ProbeState* result); -int probe_dtz(Position& pos, ProbeState* result); -bool root_probe(Position& pos, Search::RootMoves& rootMoves); -bool root_probe_wdl(Position& pos, Search::RootMoves& rootMoves); -void rank_root_moves(Position& pos, Search::RootMoves& rootMoves); +int probe_dtz(Position& pos, ProbeState* result); +bool root_probe(Position& pos, Search::RootMoves& rootMoves); +bool root_probe_wdl(Position& pos, Search::RootMoves& rootMoves); +void rank_root_moves(Position& pos, Search::RootMoves& rootMoves); -} // namespace Stockfish::Tablebases +} // namespace Stockfish::Tablebases #endif diff --git a/src/thread.cpp b/src/thread.cpp index c752e7326cd..747b5770d49 100644 --- a/src/thread.cpp +++ b/src/thread.cpp @@ -37,15 +37,17 @@ namespace Stockfish { -ThreadPool Threads; // Global object +ThreadPool Threads; // Global object // Thread constructor launches the thread and waits until it goes to sleep // in idle_loop(). Note that 'searching' and 'exit' should be already set. -Thread::Thread(size_t n) : idx(n), stdThread(&Thread::idle_loop, this) { +Thread::Thread(size_t n) : + idx(n), + stdThread(&Thread::idle_loop, this) { - wait_for_search_finished(); + wait_for_search_finished(); } @@ -54,11 +56,11 @@ Thread::Thread(size_t n) : idx(n), stdThread(&Thread::idle_loop, this) { Thread::~Thread() { - assert(!searching); + assert(!searching); - exit = true; - start_searching(); - stdThread.join(); + exit = true; + start_searching(); + stdThread.join(); } @@ -66,25 +68,25 @@ Thread::~Thread() { void Thread::clear() { - counterMoves.fill(MOVE_NONE); - mainHistory.fill(0); - captureHistory.fill(0); + counterMoves.fill(MOVE_NONE); + mainHistory.fill(0); + captureHistory.fill(0); - for (bool inCheck : { false, true }) - for (StatsType c : { NoCaptures, Captures }) - for (auto& to : continuationHistory[inCheck][c]) - for (auto& h : to) - h->fill(-71); + for (bool inCheck : {false, true}) + for (StatsType c : {NoCaptures, Captures}) + for (auto& to : continuationHistory[inCheck][c]) + for (auto& h : to) + h->fill(-71); } // Thread::start_searching() wakes up the thread that will start the search void Thread::start_searching() { - mutex.lock(); - searching = true; - mutex.unlock(); // Unlock before notifying saves a few CPU-cycles - cv.notify_one(); // Wake up the thread in idle_loop() + mutex.lock(); + searching = true; + mutex.unlock(); // Unlock before notifying saves a few CPU-cycles + cv.notify_one(); // Wake up the thread in idle_loop() } @@ -93,8 +95,8 @@ void Thread::start_searching() { void Thread::wait_for_search_finished() { - std::unique_lock lk(mutex); - cv.wait(lk, [&]{ return !searching; }); + std::unique_lock lk(mutex); + cv.wait(lk, [&] { return !searching; }); } @@ -103,28 +105,27 @@ void Thread::wait_for_search_finished() { void Thread::idle_loop() { - // If OS already scheduled us on a different group than 0 then don't overwrite - // the choice, eventually we are one of many one-threaded processes running on - // some Windows NUMA hardware, for instance in fishtest. To make it simple, - // just check if running threads are below a threshold, in this case, all this - // NUMA machinery is not needed. - if (Options["Threads"] > 8) - WinProcGroup::bindThisThread(idx); + // If OS already scheduled us on a different group than 0 then don't overwrite + // the choice, eventually we are one of many one-threaded processes running on + // some Windows NUMA hardware, for instance in fishtest. To make it simple, + // just check if running threads are below a threshold, in this case, all this + // NUMA machinery is not needed. + if (Options["Threads"] > 8) + WinProcGroup::bindThisThread(idx); - while (true) - { - std::unique_lock lk(mutex); - searching = false; - cv.notify_one(); // Wake up anyone waiting for search finished - cv.wait(lk, [&]{ return searching; }); + while (true) { + std::unique_lock lk(mutex); + searching = false; + cv.notify_one(); // Wake up anyone waiting for search finished + cv.wait(lk, [&] { return searching; }); - if (exit) - return; + if (exit) + return; - lk.unlock(); + lk.unlock(); - search(); - } + search(); + } } // ThreadPool::set() creates/destroys threads to match the requested number. @@ -133,28 +134,28 @@ void Thread::idle_loop() { void ThreadPool::set(size_t requested) { - if (threads.size() > 0) // destroy any existing thread(s) - { - main()->wait_for_search_finished(); + if (threads.size() > 0) // destroy any existing thread(s) + { + main()->wait_for_search_finished(); - while (threads.size() > 0) - delete threads.back(), threads.pop_back(); - } + while (threads.size() > 0) + delete threads.back(), threads.pop_back(); + } - if (requested > 0) // create new thread(s) - { - threads.push_back(new MainThread(0)); + if (requested > 0) // create new thread(s) + { + threads.push_back(new MainThread(0)); - while (threads.size() < requested) - threads.push_back(new Thread(threads.size())); - clear(); + while (threads.size() < requested) + threads.push_back(new Thread(threads.size())); + clear(); - // Reallocate the hash with the new threadpool size - TT.resize(size_t(Options["Hash"])); + // Reallocate the hash with the new threadpool size + TT.resize(size_t(Options["Hash"])); - // Init thread number dependent search params. - Search::init(); - } + // Init thread number dependent search params. + Search::init(); + } } @@ -162,94 +163,95 @@ void ThreadPool::set(size_t requested) { void ThreadPool::clear() { - for (Thread* th : threads) - th->clear(); + for (Thread* th : threads) + th->clear(); - main()->callsCnt = 0; - main()->bestPreviousScore = VALUE_INFINITE; - main()->bestPreviousAverageScore = VALUE_INFINITE; - main()->previousTimeReduction = 1.0; + main()->callsCnt = 0; + main()->bestPreviousScore = VALUE_INFINITE; + main()->bestPreviousAverageScore = VALUE_INFINITE; + main()->previousTimeReduction = 1.0; } // ThreadPool::start_thinking() wakes up main thread waiting in idle_loop() and // returns immediately. Main thread will wake up other threads and start the search. -void ThreadPool::start_thinking(Position& pos, StateListPtr& states, - const Search::LimitsType& limits, bool ponderMode) { - - main()->wait_for_search_finished(); - - main()->stopOnPonderhit = stop = false; - increaseDepth = true; - main()->ponder = ponderMode; - Search::Limits = limits; - Search::RootMoves rootMoves; - - for (const auto& m : MoveList(pos)) - if ( limits.searchmoves.empty() - || std::count(limits.searchmoves.begin(), limits.searchmoves.end(), m)) - rootMoves.emplace_back(m); - - if (!rootMoves.empty()) - Tablebases::rank_root_moves(pos, rootMoves); - - // After ownership transfer 'states' becomes empty, so if we stop the search - // and call 'go' again without setting a new position states.get() == nullptr. - assert(states.get() || setupStates.get()); - - if (states.get()) - setupStates = std::move(states); // Ownership transfer, states is now empty - - // We use Position::set() to set root position across threads. But there are - // some StateInfo fields (previous, pliesFromNull, capturedPiece) that cannot - // be deduced from a fen string, so set() clears them and they are set from - // setupStates->back() later. The rootState is per thread, earlier states are shared - // since they are read-only. - for (Thread* th : threads) - { - th->nodes = th->tbHits = th->nmpMinPly = th->bestMoveChanges = 0; - th->rootDepth = th->completedDepth = 0; - th->rootMoves = rootMoves; - th->rootPos.set(pos.fen(), pos.is_chess960(), &th->rootState, th); - th->rootState = setupStates->back(); - th->rootSimpleEval = Eval::simple_eval(pos, pos.side_to_move()); - } - - main()->start_searching(); +void ThreadPool::start_thinking(Position& pos, + StateListPtr& states, + const Search::LimitsType& limits, + bool ponderMode) { + + main()->wait_for_search_finished(); + + main()->stopOnPonderhit = stop = false; + increaseDepth = true; + main()->ponder = ponderMode; + Search::Limits = limits; + Search::RootMoves rootMoves; + + for (const auto& m : MoveList(pos)) + if (limits.searchmoves.empty() + || std::count(limits.searchmoves.begin(), limits.searchmoves.end(), m)) + rootMoves.emplace_back(m); + + if (!rootMoves.empty()) + Tablebases::rank_root_moves(pos, rootMoves); + + // After ownership transfer 'states' becomes empty, so if we stop the search + // and call 'go' again without setting a new position states.get() == nullptr. + assert(states.get() || setupStates.get()); + + if (states.get()) + setupStates = std::move(states); // Ownership transfer, states is now empty + + // We use Position::set() to set root position across threads. But there are + // some StateInfo fields (previous, pliesFromNull, capturedPiece) that cannot + // be deduced from a fen string, so set() clears them and they are set from + // setupStates->back() later. The rootState is per thread, earlier states are shared + // since they are read-only. + for (Thread* th : threads) { + th->nodes = th->tbHits = th->nmpMinPly = th->bestMoveChanges = 0; + th->rootDepth = th->completedDepth = 0; + th->rootMoves = rootMoves; + th->rootPos.set(pos.fen(), pos.is_chess960(), &th->rootState, th); + th->rootState = setupStates->back(); + th->rootSimpleEval = Eval::simple_eval(pos, pos.side_to_move()); + } + + main()->start_searching(); } Thread* ThreadPool::get_best_thread() const { - Thread* bestThread = threads.front(); + Thread* bestThread = threads.front(); std::map votes; - Value minScore = VALUE_NONE; + Value minScore = VALUE_NONE; // Find the minimum score of all threads - for (Thread* th: threads) + for (Thread* th : threads) minScore = std::min(minScore, th->rootMoves[0].score); // Vote according to score and depth, and select the best thread auto thread_value = [minScore](Thread* th) { - return (th->rootMoves[0].score - minScore + 14) * int(th->completedDepth); - }; + return (th->rootMoves[0].score - minScore + 14) * int(th->completedDepth); + }; for (Thread* th : threads) votes[th->rootMoves[0].pv[0]] += thread_value(th); for (Thread* th : threads) - if (abs(bestThread->rootMoves[0].score) >= VALUE_TB_WIN_IN_MAX_PLY) - { + if (abs(bestThread->rootMoves[0].score) >= VALUE_TB_WIN_IN_MAX_PLY) { // Make sure we pick the shortest mate / TB conversion or stave off mate the longest if (th->rootMoves[0].score > bestThread->rootMoves[0].score) bestThread = th; - } - else if ( th->rootMoves[0].score >= VALUE_TB_WIN_IN_MAX_PLY - || ( th->rootMoves[0].score > VALUE_TB_LOSS_IN_MAX_PLY - && ( votes[th->rootMoves[0].pv[0]] > votes[bestThread->rootMoves[0].pv[0]] - || ( votes[th->rootMoves[0].pv[0]] == votes[bestThread->rootMoves[0].pv[0]] - && thread_value(th) * int(th->rootMoves[0].pv.size() > 2) - > thread_value(bestThread) * int(bestThread->rootMoves[0].pv.size() > 2))))) + } else if (th->rootMoves[0].score >= VALUE_TB_WIN_IN_MAX_PLY + || (th->rootMoves[0].score > VALUE_TB_LOSS_IN_MAX_PLY + && (votes[th->rootMoves[0].pv[0]] > votes[bestThread->rootMoves[0].pv[0]] + || (votes[th->rootMoves[0].pv[0]] + == votes[bestThread->rootMoves[0].pv[0]] + && thread_value(th) * int(th->rootMoves[0].pv.size() > 2) + > thread_value(bestThread) + * int(bestThread->rootMoves[0].pv.size() > 2))))) bestThread = th; return bestThread; @@ -275,4 +277,4 @@ void ThreadPool::wait_for_search_finished() const { th->wait_for_search_finished(); } -} // namespace Stockfish +} // namespace Stockfish diff --git a/src/thread.h b/src/thread.h index 44cc56729e3..4a077962661 100644 --- a/src/thread.h +++ b/src/thread.h @@ -41,56 +41,56 @@ namespace Stockfish { class Thread { - std::mutex mutex; - std::condition_variable cv; - size_t idx; - bool exit = false, searching = true; // Set before starting std::thread - NativeThread stdThread; - -public: - explicit Thread(size_t); - virtual ~Thread(); - virtual void search(); - void clear(); - void idle_loop(); - void start_searching(); - void wait_for_search_finished(); - size_t id() const { return idx; } - - size_t pvIdx, pvLast; - std::atomic nodes, tbHits, bestMoveChanges; - int selDepth, nmpMinPly; - Value bestValue, optimism[COLOR_NB]; - - Position rootPos; - StateInfo rootState; - Search::RootMoves rootMoves; - Depth rootDepth, completedDepth; - Value rootDelta; - Value rootSimpleEval; - CounterMoveHistory counterMoves; - ButterflyHistory mainHistory; - CapturePieceToHistory captureHistory; - ContinuationHistory continuationHistory[2][2]; + std::mutex mutex; + std::condition_variable cv; + size_t idx; + bool exit = false, searching = true; // Set before starting std::thread + NativeThread stdThread; + + public: + explicit Thread(size_t); + virtual ~Thread(); + virtual void search(); + void clear(); + void idle_loop(); + void start_searching(); + void wait_for_search_finished(); + size_t id() const { return idx; } + + size_t pvIdx, pvLast; + std::atomic nodes, tbHits, bestMoveChanges; + int selDepth, nmpMinPly; + Value bestValue, optimism[COLOR_NB]; + + Position rootPos; + StateInfo rootState; + Search::RootMoves rootMoves; + Depth rootDepth, completedDepth; + Value rootDelta; + Value rootSimpleEval; + CounterMoveHistory counterMoves; + ButterflyHistory mainHistory; + CapturePieceToHistory captureHistory; + ContinuationHistory continuationHistory[2][2]; }; // MainThread is a derived class specific for main thread -struct MainThread : public Thread { +struct MainThread: public Thread { - using Thread::Thread; + using Thread::Thread; - void search() override; - void check_time(); + void search() override; + void check_time(); - double previousTimeReduction; - Value bestPreviousScore; - Value bestPreviousAverageScore; - Value iterValue[4]; - int callsCnt; - bool stopOnPonderhit; - std::atomic_bool ponder; + double previousTimeReduction; + Value bestPreviousScore; + Value bestPreviousAverageScore; + Value iterValue[4]; + int callsCnt; + bool stopOnPonderhit; + std::atomic_bool ponder; }; @@ -100,41 +100,41 @@ struct MainThread : public Thread { struct ThreadPool { - void start_thinking(Position&, StateListPtr&, const Search::LimitsType&, bool = false); - void clear(); - void set(size_t); - - MainThread* main() const { return static_cast(threads.front()); } - uint64_t nodes_searched() const { return accumulate(&Thread::nodes); } - uint64_t tb_hits() const { return accumulate(&Thread::tbHits); } - Thread* get_best_thread() const; - void start_searching(); - void wait_for_search_finished() const; - - std::atomic_bool stop, increaseDepth; - - auto cbegin() const noexcept { return threads.cbegin(); } - auto begin() noexcept { return threads.begin(); } - auto end() noexcept { return threads.end(); } - auto cend() const noexcept { return threads.cend(); } - auto size() const noexcept { return threads.size(); } - auto empty() const noexcept { return threads.empty(); } - -private: - StateListPtr setupStates; - std::vector threads; - - uint64_t accumulate(std::atomic Thread::* member) const { - - uint64_t sum = 0; - for (Thread* th : threads) - sum += (th->*member).load(std::memory_order_relaxed); - return sum; - } + void start_thinking(Position&, StateListPtr&, const Search::LimitsType&, bool = false); + void clear(); + void set(size_t); + + MainThread* main() const { return static_cast(threads.front()); } + uint64_t nodes_searched() const { return accumulate(&Thread::nodes); } + uint64_t tb_hits() const { return accumulate(&Thread::tbHits); } + Thread* get_best_thread() const; + void start_searching(); + void wait_for_search_finished() const; + + std::atomic_bool stop, increaseDepth; + + auto cbegin() const noexcept { return threads.cbegin(); } + auto begin() noexcept { return threads.begin(); } + auto end() noexcept { return threads.end(); } + auto cend() const noexcept { return threads.cend(); } + auto size() const noexcept { return threads.size(); } + auto empty() const noexcept { return threads.empty(); } + + private: + StateListPtr setupStates; + std::vector threads; + + uint64_t accumulate(std::atomic Thread::*member) const { + + uint64_t sum = 0; + for (Thread* th : threads) + sum += (th->*member).load(std::memory_order_relaxed); + return sum; + } }; extern ThreadPool Threads; -} // namespace Stockfish +} // namespace Stockfish -#endif // #ifndef THREAD_H_INCLUDED +#endif // #ifndef THREAD_H_INCLUDED diff --git a/src/thread_win32_osx.h b/src/thread_win32_osx.h index 77352aa087a..248e4a67450 100644 --- a/src/thread_win32_osx.h +++ b/src/thread_win32_osx.h @@ -29,46 +29,45 @@ #if defined(__APPLE__) || defined(__MINGW32__) || defined(__MINGW64__) || defined(USE_PTHREADS) -#include + #include namespace Stockfish { static const size_t TH_STACK_SIZE = 8 * 1024 * 1024; -template > -void* start_routine(void* ptr) -{ - P* p = reinterpret_cast(ptr); - (p->first->*(p->second))(); // Call member function pointer - delete p; - return nullptr; +template> +void* start_routine(void* ptr) { + P* p = reinterpret_cast(ptr); + (p->first->*(p->second))(); // Call member function pointer + delete p; + return nullptr; } class NativeThread { - pthread_t thread; - -public: - template> - explicit NativeThread(void(T::*fun)(), T* obj) { - pthread_attr_t attr_storage, *attr = &attr_storage; - pthread_attr_init(attr); - pthread_attr_setstacksize(attr, TH_STACK_SIZE); - pthread_create(&thread, attr, start_routine, new P(obj, fun)); - } - void join() { pthread_join(thread, nullptr); } + pthread_t thread; + + public: + template> + explicit NativeThread(void (T::*fun)(), T* obj) { + pthread_attr_t attr_storage, *attr = &attr_storage; + pthread_attr_init(attr); + pthread_attr_setstacksize(attr, TH_STACK_SIZE); + pthread_create(&thread, attr, start_routine, new P(obj, fun)); + } + void join() { pthread_join(thread, nullptr); } }; -} // namespace Stockfish +} // namespace Stockfish -#else // Default case: use STL classes +#else // Default case: use STL classes namespace Stockfish { using NativeThread = std::thread; -} // namespace Stockfish +} // namespace Stockfish #endif -#endif // #ifndef THREAD_WIN32_OSX_H_INCLUDED +#endif // #ifndef THREAD_WIN32_OSX_H_INCLUDED diff --git a/src/timeman.cpp b/src/timeman.cpp index 74f59d90574..ee8b4b2b8ac 100644 --- a/src/timeman.cpp +++ b/src/timeman.cpp @@ -26,7 +26,7 @@ namespace Stockfish { -TimeManagement Time; // Our global time management object +TimeManagement Time; // Our global time management object // TimeManagement::init() is called at the beginning of the search and calculates @@ -36,74 +36,71 @@ TimeManagement Time; // Our global time management object void TimeManagement::init(Search::LimitsType& limits, Color us, int ply) { - // If we have no time, no need to initialize TM, except for the start time, - // which is used by movetime. - startTime = limits.startTime; - if (limits.time[us] == 0) - return; - - TimePoint moveOverhead = TimePoint(Options["Move Overhead"]); - TimePoint slowMover = TimePoint(Options["Slow Mover"]); - TimePoint npmsec = TimePoint(Options["nodestime"]); - - // optScale is a percentage of available time to use for the current move. - // maxScale is a multiplier applied to optimumTime. - double optScale, maxScale; - - // If we have to play in 'nodes as time' mode, then convert from time - // to nodes, and use resulting values in time management formulas. - // WARNING: to avoid time losses, the given npmsec (nodes per millisecond) - // must be much lower than the real engine speed. - if (npmsec) - { - if (!availableNodes) // Only once at game start - availableNodes = npmsec * limits.time[us]; // Time is in msec - - // Convert from milliseconds to nodes - limits.time[us] = TimePoint(availableNodes); - limits.inc[us] *= npmsec; - limits.npmsec = npmsec; - } - - // Maximum move horizon of 50 moves - int mtg = limits.movestogo ? std::min(limits.movestogo, 50) : 50; - - // Make sure timeLeft is > 0 since we may use it as a divisor - TimePoint timeLeft = std::max(TimePoint(1), - limits.time[us] + limits.inc[us] * (mtg - 1) - moveOverhead * (2 + mtg)); - - // Use extra time with larger increments - double optExtra = std::clamp(1.0 + 12.0 * limits.inc[us] / limits.time[us], 1.0, 1.12); - - // A user may scale time usage by setting UCI option "Slow Mover" - // Default is 100 and changing this value will probably lose elo. - timeLeft = slowMover * timeLeft / 100; - - // x basetime (+ z increment) - // If there is a healthy increment, timeLeft can exceed actual available - // game time for the current move, so also cap to 20% of available game time. - if (limits.movestogo == 0) - { - optScale = std::min(0.0120 + std::pow(ply + 3.0, 0.45) * 0.0039, - 0.2 * limits.time[us] / double(timeLeft)) + // If we have no time, no need to initialize TM, except for the start time, + // which is used by movetime. + startTime = limits.startTime; + if (limits.time[us] == 0) + return; + + TimePoint moveOverhead = TimePoint(Options["Move Overhead"]); + TimePoint slowMover = TimePoint(Options["Slow Mover"]); + TimePoint npmsec = TimePoint(Options["nodestime"]); + + // optScale is a percentage of available time to use for the current move. + // maxScale is a multiplier applied to optimumTime. + double optScale, maxScale; + + // If we have to play in 'nodes as time' mode, then convert from time + // to nodes, and use resulting values in time management formulas. + // WARNING: to avoid time losses, the given npmsec (nodes per millisecond) + // must be much lower than the real engine speed. + if (npmsec) { + if (!availableNodes) // Only once at game start + availableNodes = npmsec * limits.time[us]; // Time is in msec + + // Convert from milliseconds to nodes + limits.time[us] = TimePoint(availableNodes); + limits.inc[us] *= npmsec; + limits.npmsec = npmsec; + } + + // Maximum move horizon of 50 moves + int mtg = limits.movestogo ? std::min(limits.movestogo, 50) : 50; + + // Make sure timeLeft is > 0 since we may use it as a divisor + TimePoint timeLeft = std::max(TimePoint(1), limits.time[us] + limits.inc[us] * (mtg - 1) + - moveOverhead * (2 + mtg)); + + // Use extra time with larger increments + double optExtra = std::clamp(1.0 + 12.0 * limits.inc[us] / limits.time[us], 1.0, 1.12); + + // A user may scale time usage by setting UCI option "Slow Mover" + // Default is 100 and changing this value will probably lose elo. + timeLeft = slowMover * timeLeft / 100; + + // x basetime (+ z increment) + // If there is a healthy increment, timeLeft can exceed actual available + // game time for the current move, so also cap to 20% of available game time. + if (limits.movestogo == 0) { + optScale = std::min(0.0120 + std::pow(ply + 3.0, 0.45) * 0.0039, + 0.2 * limits.time[us] / double(timeLeft)) * optExtra; - maxScale = std::min(7.0, 4.0 + ply / 12.0); - } - - // x moves in y seconds (+ z increment) - else - { - optScale = std::min((0.88 + ply / 116.4) / mtg, - 0.88 * limits.time[us] / double(timeLeft)); - maxScale = std::min(6.3, 1.5 + 0.11 * mtg); - } - - // Never use more than 80% of the available time for this move - optimumTime = TimePoint(optScale * timeLeft); - maximumTime = TimePoint(std::min(0.8 * limits.time[us] - moveOverhead, maxScale * optimumTime)) - 10; - - if (Options["Ponder"]) - optimumTime += optimumTime / 4; + maxScale = std::min(7.0, 4.0 + ply / 12.0); + } + + // x moves in y seconds (+ z increment) + else { + optScale = std::min((0.88 + ply / 116.4) / mtg, 0.88 * limits.time[us] / double(timeLeft)); + maxScale = std::min(6.3, 1.5 + 0.11 * mtg); + } + + // Never use more than 80% of the available time for this move + optimumTime = TimePoint(optScale * timeLeft); + maximumTime = + TimePoint(std::min(0.8 * limits.time[us] - moveOverhead, maxScale * optimumTime)) - 10; + + if (Options["Ponder"]) + optimumTime += optimumTime / 4; } -} // namespace Stockfish +} // namespace Stockfish diff --git a/src/timeman.h b/src/timeman.h index 6acdf0ac6db..4b9b62bd2a9 100644 --- a/src/timeman.h +++ b/src/timeman.h @@ -32,23 +32,24 @@ namespace Stockfish { // the maximum available time, the game move number, and other parameters. class TimeManagement { -public: - void init(Search::LimitsType& limits, Color us, int ply); - TimePoint optimum() const { return optimumTime; } - TimePoint maximum() const { return maximumTime; } - TimePoint elapsed() const { return Search::Limits.npmsec ? - TimePoint(Threads.nodes_searched()) : now() - startTime; } - - int64_t availableNodes; // When in 'nodes as time' mode - -private: - TimePoint startTime; - TimePoint optimumTime; - TimePoint maximumTime; + public: + void init(Search::LimitsType& limits, Color us, int ply); + TimePoint optimum() const { return optimumTime; } + TimePoint maximum() const { return maximumTime; } + TimePoint elapsed() const { + return Search::Limits.npmsec ? TimePoint(Threads.nodes_searched()) : now() - startTime; + } + + int64_t availableNodes; // When in 'nodes as time' mode + + private: + TimePoint startTime; + TimePoint optimumTime; + TimePoint maximumTime; }; extern TimeManagement Time; -} // namespace Stockfish +} // namespace Stockfish -#endif // #ifndef TIMEMAN_H_INCLUDED +#endif // #ifndef TIMEMAN_H_INCLUDED diff --git a/src/tt.cpp b/src/tt.cpp index c3aec8d3e7c..0c8666508e2 100644 --- a/src/tt.cpp +++ b/src/tt.cpp @@ -31,31 +31,28 @@ namespace Stockfish { -TranspositionTable TT; // Our global transposition table +TranspositionTable TT; // Our global transposition table // TTEntry::save() populates the TTEntry with a new node's data, possibly // overwriting an old position. The update is not atomic and can be racy. void TTEntry::save(Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev) { - // Preserve any existing move for the same position - if (m || uint16_t(k) != key16) - move16 = uint16_t(m); - - // Overwrite less valuable entries (cheapest checks first) - if ( b == BOUND_EXACT - || uint16_t(k) != key16 - || d - DEPTH_OFFSET + 2 * pv > depth8 - 4) - { - assert(d > DEPTH_OFFSET); - assert(d < 256 + DEPTH_OFFSET); - - key16 = uint16_t(k); - depth8 = uint8_t(d - DEPTH_OFFSET); - genBound8 = uint8_t(TT.generation8 | uint8_t(pv) << 2 | b); - value16 = int16_t(v); - eval16 = int16_t(ev); - } + // Preserve any existing move for the same position + if (m || uint16_t(k) != key16) + move16 = uint16_t(m); + + // Overwrite less valuable entries (cheapest checks first) + if (b == BOUND_EXACT || uint16_t(k) != key16 || d - DEPTH_OFFSET + 2 * pv > depth8 - 4) { + assert(d > DEPTH_OFFSET); + assert(d < 256 + DEPTH_OFFSET); + + key16 = uint16_t(k); + depth8 = uint8_t(d - DEPTH_OFFSET); + genBound8 = uint8_t(TT.generation8 | uint8_t(pv) << 2 | b); + value16 = int16_t(v); + eval16 = int16_t(ev); + } } @@ -65,21 +62,19 @@ void TTEntry::save(Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev) void TranspositionTable::resize(size_t mbSize) { - Threads.main()->wait_for_search_finished(); + Threads.main()->wait_for_search_finished(); - aligned_large_pages_free(table); + aligned_large_pages_free(table); - clusterCount = mbSize * 1024 * 1024 / sizeof(Cluster); + clusterCount = mbSize * 1024 * 1024 / sizeof(Cluster); - table = static_cast(aligned_large_pages_alloc(clusterCount * sizeof(Cluster))); - if (!table) - { - std::cerr << "Failed to allocate " << mbSize - << "MB for transposition table." << std::endl; - exit(EXIT_FAILURE); - } + table = static_cast(aligned_large_pages_alloc(clusterCount * sizeof(Cluster))); + if (!table) { + std::cerr << "Failed to allocate " << mbSize << "MB for transposition table." << std::endl; + exit(EXIT_FAILURE); + } - clear(); + clear(); } @@ -88,28 +83,26 @@ void TranspositionTable::resize(size_t mbSize) { void TranspositionTable::clear() { - std::vector threads; + std::vector threads; - for (size_t idx = 0; idx < size_t(Options["Threads"]); ++idx) - { - threads.emplace_back([this, idx]() { + for (size_t idx = 0; idx < size_t(Options["Threads"]); ++idx) { + threads.emplace_back([this, idx]() { + // Thread binding gives faster search on systems with a first-touch policy + if (Options["Threads"] > 8) + WinProcGroup::bindThisThread(idx); - // Thread binding gives faster search on systems with a first-touch policy - if (Options["Threads"] > 8) - WinProcGroup::bindThisThread(idx); + // Each thread will zero its part of the hash table + const size_t stride = size_t(clusterCount / Options["Threads"]), + start = size_t(stride * idx), + len = + idx != size_t(Options["Threads"]) - 1 ? stride : clusterCount - start; - // Each thread will zero its part of the hash table - const size_t stride = size_t(clusterCount / Options["Threads"]), - start = size_t(stride * idx), - len = idx != size_t(Options["Threads"]) - 1 ? - stride : clusterCount - start; + std::memset(&table[start], 0, len * sizeof(Cluster)); + }); + } - std::memset(&table[start], 0, len * sizeof(Cluster)); - }); - } - - for (std::thread& th : threads) - th.join(); + for (std::thread& th : threads) + th.join(); } @@ -122,30 +115,32 @@ void TranspositionTable::clear() { TTEntry* TranspositionTable::probe(const Key key, bool& found) const { - TTEntry* const tte = first_entry(key); - const uint16_t key16 = uint16_t(key); // Use the low 16 bits as key inside the cluster - - for (int i = 0; i < ClusterSize; ++i) - if (tte[i].key16 == key16 || !tte[i].depth8) - { - tte[i].genBound8 = uint8_t(generation8 | (tte[i].genBound8 & (GENERATION_DELTA - 1))); // Refresh - - return found = bool(tte[i].depth8), &tte[i]; - } - - // Find an entry to be replaced according to the replacement strategy - TTEntry* replace = tte; - for (int i = 1; i < ClusterSize; ++i) - // Due to our packed storage format for generation and its cyclic - // nature we add GENERATION_CYCLE (256 is the modulus, plus what - // is needed to keep the unrelated lowest n bits from affecting - // the result) to calculate the entry age correctly even after - // generation8 overflows into the next cycle. - if ( replace->depth8 - ((GENERATION_CYCLE + generation8 - replace->genBound8) & GENERATION_MASK) - > tte[i].depth8 - ((GENERATION_CYCLE + generation8 - tte[i].genBound8) & GENERATION_MASK)) - replace = &tte[i]; - - return found = false, replace; + TTEntry* const tte = first_entry(key); + const uint16_t key16 = uint16_t(key); // Use the low 16 bits as key inside the cluster + + for (int i = 0; i < ClusterSize; ++i) + if (tte[i].key16 == key16 || !tte[i].depth8) { + tte[i].genBound8 = + uint8_t(generation8 | (tte[i].genBound8 & (GENERATION_DELTA - 1))); // Refresh + + return found = bool(tte[i].depth8), &tte[i]; + } + + // Find an entry to be replaced according to the replacement strategy + TTEntry* replace = tte; + for (int i = 1; i < ClusterSize; ++i) + // Due to our packed storage format for generation and its cyclic + // nature we add GENERATION_CYCLE (256 is the modulus, plus what + // is needed to keep the unrelated lowest n bits from affecting + // the result) to calculate the entry age correctly even after + // generation8 overflows into the next cycle. + if (replace->depth8 + - ((GENERATION_CYCLE + generation8 - replace->genBound8) & GENERATION_MASK) + > tte[i].depth8 + - ((GENERATION_CYCLE + generation8 - tte[i].genBound8) & GENERATION_MASK)) + replace = &tte[i]; + + return found = false, replace; } @@ -154,12 +149,13 @@ TTEntry* TranspositionTable::probe(const Key key, bool& found) const { int TranspositionTable::hashfull() const { - int cnt = 0; - for (int i = 0; i < 1000; ++i) - for (int j = 0; j < ClusterSize; ++j) - cnt += table[i].entry[j].depth8 && (table[i].entry[j].genBound8 & GENERATION_MASK) == generation8; + int cnt = 0; + for (int i = 0; i < 1000; ++i) + for (int j = 0; j < ClusterSize; ++j) + cnt += table[i].entry[j].depth8 + && (table[i].entry[j].genBound8 & GENERATION_MASK) == generation8; - return cnt / ClusterSize; + return cnt / ClusterSize; } -} // namespace Stockfish +} // namespace Stockfish diff --git a/src/tt.h b/src/tt.h index fdea4933507..05eb2b0fac8 100644 --- a/src/tt.h +++ b/src/tt.h @@ -40,23 +40,23 @@ namespace Stockfish { struct TTEntry { - Move move() const { return Move (move16); } - Value value() const { return Value(value16); } - Value eval() const { return Value(eval16); } - Depth depth() const { return Depth(depth8 + DEPTH_OFFSET); } - bool is_pv() const { return bool (genBound8 & 0x4); } - Bound bound() const { return Bound(genBound8 & 0x3); } - void save(Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev); - -private: - friend class TranspositionTable; - - uint16_t key16; - uint8_t depth8; - uint8_t genBound8; - uint16_t move16; - int16_t value16; - int16_t eval16; + Move move() const { return Move(move16); } + Value value() const { return Value(value16); } + Value eval() const { return Value(eval16); } + Depth depth() const { return Depth(depth8 + DEPTH_OFFSET); } + bool is_pv() const { return bool(genBound8 & 0x4); } + Bound bound() const { return Bound(genBound8 & 0x3); } + void save(Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev); + + private: + friend class TranspositionTable; + + uint16_t key16; + uint8_t depth8; + uint8_t genBound8; + uint16_t move16; + int16_t value16; + int16_t eval16; }; @@ -68,43 +68,45 @@ struct TTEntry { class TranspositionTable { - static constexpr int ClusterSize = 3; - - struct Cluster { - TTEntry entry[ClusterSize]; - char padding[2]; // Pad to 32 bytes - }; - - static_assert(sizeof(Cluster) == 32, "Unexpected Cluster size"); - - // Constants used to refresh the hash table periodically - static constexpr unsigned GENERATION_BITS = 3; // nb of bits reserved for other things - static constexpr int GENERATION_DELTA = (1 << GENERATION_BITS); // increment for generation field - static constexpr int GENERATION_CYCLE = 255 + (1 << GENERATION_BITS); // cycle length - static constexpr int GENERATION_MASK = (0xFF << GENERATION_BITS) & 0xFF; // mask to pull out generation number - -public: - ~TranspositionTable() { aligned_large_pages_free(table); } - void new_search() { generation8 += GENERATION_DELTA; } // Lower bits are used for other things - TTEntry* probe(const Key key, bool& found) const; - int hashfull() const; - void resize(size_t mbSize); - void clear(); - - TTEntry* first_entry(const Key key) const { - return &table[mul_hi64(key, clusterCount)].entry[0]; - } - -private: - friend struct TTEntry; - - size_t clusterCount; - Cluster* table; - uint8_t generation8; // Size must be not bigger than TTEntry::genBound8 + static constexpr int ClusterSize = 3; + + struct Cluster { + TTEntry entry[ClusterSize]; + char padding[2]; // Pad to 32 bytes + }; + + static_assert(sizeof(Cluster) == 32, "Unexpected Cluster size"); + + // Constants used to refresh the hash table periodically + static constexpr unsigned GENERATION_BITS = 3; // nb of bits reserved for other things + static constexpr int GENERATION_DELTA = + (1 << GENERATION_BITS); // increment for generation field + static constexpr int GENERATION_CYCLE = 255 + (1 << GENERATION_BITS); // cycle length + static constexpr int GENERATION_MASK = + (0xFF << GENERATION_BITS) & 0xFF; // mask to pull out generation number + + public: + ~TranspositionTable() { aligned_large_pages_free(table); } + void new_search() { generation8 += GENERATION_DELTA; } // Lower bits are used for other things + TTEntry* probe(const Key key, bool& found) const; + int hashfull() const; + void resize(size_t mbSize); + void clear(); + + TTEntry* first_entry(const Key key) const { + return &table[mul_hi64(key, clusterCount)].entry[0]; + } + + private: + friend struct TTEntry; + + size_t clusterCount; + Cluster* table; + uint8_t generation8; // Size must be not bigger than TTEntry::genBound8 }; extern TranspositionTable TT; -} // namespace Stockfish +} // namespace Stockfish -#endif // #ifndef TT_H_INCLUDED +#endif // #ifndef TT_H_INCLUDED diff --git a/src/tune.cpp b/src/tune.cpp index 97baeb784e9..d7c8f832c62 100644 --- a/src/tune.cpp +++ b/src/tune.cpp @@ -34,75 +34,83 @@ using std::string; namespace Stockfish { -bool Tune::update_on_last; -const UCI::Option* LastOption = nullptr; +bool Tune::update_on_last; +const UCI::Option* LastOption = nullptr; static std::map TuneResults; string Tune::next(string& names, bool pop) { - string name; + string name; - do { - string token = names.substr(0, names.find(',')); + do { + string token = names.substr(0, names.find(',')); - if (pop) - names.erase(0, token.size() + 1); + if (pop) + names.erase(0, token.size() + 1); - std::stringstream ws(token); - name += (ws >> token, token); // Remove trailing whitespace + std::stringstream ws(token); + name += (ws >> token, token); // Remove trailing whitespace - } while ( std::count(name.begin(), name.end(), '(') - - std::count(name.begin(), name.end(), ')')); + } while (std::count(name.begin(), name.end(), '(') - std::count(name.begin(), name.end(), ')')); - return name; + return name; } static void on_tune(const UCI::Option& o) { - if (!Tune::update_on_last || LastOption == &o) - Tune::read_options(); + if (!Tune::update_on_last || LastOption == &o) + Tune::read_options(); } static void make_option(const string& n, int v, const SetRange& r) { - // Do not generate option when there is nothing to tune (ie. min = max) - if (r(v).first == r(v).second) - return; + // Do not generate option when there is nothing to tune (ie. min = max) + if (r(v).first == r(v).second) + return; - if (TuneResults.count(n)) - v = TuneResults[n]; + if (TuneResults.count(n)) + v = TuneResults[n]; - Options[n] << UCI::Option(v, r(v).first, r(v).second, on_tune); - LastOption = &Options[n]; + Options[n] << UCI::Option(v, r(v).first, r(v).second, on_tune); + LastOption = &Options[n]; - // Print formatted parameters, ready to be copy-pasted in Fishtest - std::cout << n << "," - << v << "," - << r(v).first << "," << r(v).second << "," - << (r(v).second - r(v).first) / 20.0 << "," - << "0.0020" - << std::endl; + // Print formatted parameters, ready to be copy-pasted in Fishtest + std::cout << n << "," << v << "," << r(v).first << "," << r(v).second << "," + << (r(v).second - r(v).first) / 20.0 << "," + << "0.0020" << std::endl; } -template<> void Tune::Entry::init_option() { make_option(name, value, range); } +template<> +void Tune::Entry::init_option() { + make_option(name, value, range); +} -template<> void Tune::Entry::read_option() { - if (Options.count(name)) - value = int(Options[name]); +template<> +void Tune::Entry::read_option() { + if (Options.count(name)) + value = int(Options[name]); } -template<> void Tune::Entry::init_option() { make_option(name, value, range); } +template<> +void Tune::Entry::init_option() { + make_option(name, value, range); +} -template<> void Tune::Entry::read_option() { - if (Options.count(name)) - value = Value(int(Options[name])); +template<> +void Tune::Entry::read_option() { + if (Options.count(name)) + value = Value(int(Options[name])); } // Instead of a variable here we have a PostUpdate function: just call it -template<> void Tune::Entry::init_option() {} -template<> void Tune::Entry::read_option() { value(); } +template<> +void Tune::Entry::init_option() {} +template<> +void Tune::Entry::read_option() { + value(); +} -} // namespace Stockfish +} // namespace Stockfish // Init options with tuning session results instead of default values. Useful to @@ -117,9 +125,7 @@ template<> void Tune::Entry::read_option() { value(); } namespace Stockfish { -void Tune::read_results() { - - /* ...insert your values here... */ +void Tune::read_results() { /* ...insert your values here... */ } -} // namespace Stockfish +} // namespace Stockfish diff --git a/src/tune.h b/src/tune.h index a9a7331e566..480aea165b5 100644 --- a/src/tune.h +++ b/src/tune.h @@ -22,28 +22,29 @@ #include #include #include -#include // IWYU pragma: keep +#include // IWYU pragma: keep #include #include namespace Stockfish { enum Value : int; -using Range = std::pair; // Option's min-max values -using RangeFun = Range (int); +using Range = std::pair; // Option's min-max values +using RangeFun = Range(int); // Default Range function, to calculate Option's min-max values -inline Range default_range(int v) { - return v > 0 ? Range(0, 2 * v) : Range(2 * v, 0); -} +inline Range default_range(int v) { return v > 0 ? Range(0, 2 * v) : Range(2 * v, 0); } struct SetRange { - explicit SetRange(RangeFun f) : fun(f) {} - SetRange(int min, int max) : fun(nullptr), range(min, max) {} - Range operator()(int v) const { return fun ? fun(v) : range; } - - RangeFun* fun; - Range range; + explicit SetRange(RangeFun f) : + fun(f) {} + SetRange(int min, int max) : + fun(nullptr), + range(min, max) {} + Range operator()(int v) const { return fun ? fun(v) : range; } + + RangeFun* fun; + Range range; }; #define SetDefaultRange SetRange(default_range) @@ -76,88 +77,102 @@ struct SetRange { class Tune { - using PostUpdate = void (); // Post-update function - - Tune() { read_results(); } - Tune(const Tune&) = delete; - void operator=(const Tune&) = delete; - void read_results(); - - static Tune& instance() { static Tune t; return t; } // Singleton - - // Use polymorphism to accommodate Entry of different types in the same vector - struct EntryBase { - virtual ~EntryBase() = default; - virtual void init_option() = 0; - virtual void read_option() = 0; - }; - - template - struct Entry : public EntryBase { - - static_assert(!std::is_const_v, "Parameter cannot be const!"); - - static_assert( std::is_same_v - || std::is_same_v - || std::is_same_v, "Parameter type not supported!"); - - Entry(const std::string& n, T& v, const SetRange& r) : name(n), value(v), range(r) {} - void operator=(const Entry&) = delete; // Because 'value' is a reference - void init_option() override; - void read_option() override; - - std::string name; - T& value; - SetRange range; - }; - - // Our facility to fill the container, each Entry corresponds to a parameter - // to tune. We use variadic templates to deal with an unspecified number of - // entries, each one of a possible different type. - static std::string next(std::string& names, bool pop = true); - - int add(const SetRange&, std::string&&) { return 0; } - - template - int add(const SetRange& range, std::string&& names, T& value, Args&&... args) { - list.push_back(std::unique_ptr(new Entry(next(names), value, range))); - return add(range, std::move(names), args...); - } - - // Template specialization for arrays: recursively handle multi-dimensional arrays - template - int add(const SetRange& range, std::string&& names, T (&value)[N], Args&&... args) { - for (size_t i = 0; i < N; i++) - add(range, next(names, i == N - 1) + "[" + std::to_string(i) + "]", value[i]); - return add(range, std::move(names), args...); - } - - // Template specialization for SetRange - template - int add(const SetRange&, std::string&& names, SetRange& value, Args&&... args) { - return add(value, (next(names), std::move(names)), args...); - } - - std::vector> list; - -public: - template - static int add(const std::string& names, Args&&... args) { - return instance().add(SetDefaultRange, names.substr(1, names.size() - 2), args...); // Remove trailing parenthesis - } - static void init() { for (auto& e : instance().list) e->init_option(); read_options(); } // Deferred, due to UCI::Options access - static void read_options() { for (auto& e : instance().list) e->read_option(); } - static bool update_on_last; + using PostUpdate = void(); // Post-update function + + Tune() { read_results(); } + Tune(const Tune&) = delete; + void operator=(const Tune&) = delete; + void read_results(); + + static Tune& instance() { + static Tune t; + return t; + } // Singleton + + // Use polymorphism to accommodate Entry of different types in the same vector + struct EntryBase { + virtual ~EntryBase() = default; + virtual void init_option() = 0; + virtual void read_option() = 0; + }; + + template + struct Entry: public EntryBase { + + static_assert(!std::is_const_v, "Parameter cannot be const!"); + + static_assert(std::is_same_v || std::is_same_v + || std::is_same_v, + "Parameter type not supported!"); + + Entry(const std::string& n, T& v, const SetRange& r) : + name(n), + value(v), + range(r) {} + void operator=(const Entry&) = delete; // Because 'value' is a reference + void init_option() override; + void read_option() override; + + std::string name; + T& value; + SetRange range; + }; + + // Our facility to fill the container, each Entry corresponds to a parameter + // to tune. We use variadic templates to deal with an unspecified number of + // entries, each one of a possible different type. + static std::string next(std::string& names, bool pop = true); + + int add(const SetRange&, std::string&&) { return 0; } + + template + int add(const SetRange& range, std::string&& names, T& value, Args&&... args) { + list.push_back(std::unique_ptr(new Entry(next(names), value, range))); + return add(range, std::move(names), args...); + } + + // Template specialization for arrays: recursively handle multi-dimensional arrays + template + int add(const SetRange& range, std::string&& names, T (&value)[N], Args&&... args) { + for (size_t i = 0; i < N; i++) + add(range, next(names, i == N - 1) + "[" + std::to_string(i) + "]", value[i]); + return add(range, std::move(names), args...); + } + + // Template specialization for SetRange + template + int add(const SetRange&, std::string&& names, SetRange& value, Args&&... args) { + return add(value, (next(names), std::move(names)), args...); + } + + std::vector> list; + + public: + template + static int add(const std::string& names, Args&&... args) { + return instance().add(SetDefaultRange, names.substr(1, names.size() - 2), + args...); // Remove trailing parenthesis + } + static void init() { + for (auto& e : instance().list) + e->init_option(); + read_options(); + } // Deferred, due to UCI::Options access + static void read_options() { + for (auto& e : instance().list) + e->read_option(); + } + static bool update_on_last; }; // Some macro magic :-) we define a dummy int variable that the compiler initializes calling Tune::add() #define STRINGIFY(x) #x -#define UNIQUE2(x, y) x ## y -#define UNIQUE(x, y) UNIQUE2(x, y) // Two indirection levels to expand __LINE__ +#define UNIQUE2(x, y) x##y +#define UNIQUE(x, y) UNIQUE2(x, y) // Two indirection levels to expand __LINE__ #define TUNE(...) int UNIQUE(p, __LINE__) = Tune::add(STRINGIFY((__VA_ARGS__)), __VA_ARGS__) #define UPDATE_ON_LAST() bool UNIQUE(p, __LINE__) = Tune::update_on_last = true -} // namespace Stockfish +} // namespace Stockfish -#endif // #ifndef TUNE_H_INCLUDED +#endif // #ifndef TUNE_H_INCLUDED diff --git a/src/types.h b/src/types.h index 1fc4d33a95f..4d94b0fd72a 100644 --- a/src/types.h +++ b/src/types.h @@ -17,7 +17,7 @@ */ #ifndef TYPES_H_INCLUDED -#define TYPES_H_INCLUDED + #define TYPES_H_INCLUDED // When compiling with provided Makefile (e.g. for Linux and OSX), configuration // is done automatically. To get started type 'make help'. @@ -36,15 +36,15 @@ // -DUSE_PEXT | Add runtime support for use of pext asm-instruction. Works // | only in 64-bit mode and requires hardware with pext support. -#include -#include + #include + #include -#if defined(_MSC_VER) -// Disable some silly and noisy warnings from MSVC compiler -#pragma warning(disable: 4127) // Conditional expression is constant -#pragma warning(disable: 4146) // Unary minus operator applied to unsigned type -#pragma warning(disable: 4800) // Forcing value to bool 'true' or 'false' -#endif + #if defined(_MSC_VER) + // Disable some silly and noisy warnings from MSVC compiler + #pragma warning(disable: 4127) // Conditional expression is constant + #pragma warning(disable: 4146) // Unary minus operator applied to unsigned type + #pragma warning(disable: 4800) // Forcing value to bool 'true' or 'false' + #endif // Predefined macros hell: // @@ -55,53 +55,54 @@ // _WIN32 Building on Windows (any) // _WIN64 Building on Windows 64 bit -#if defined(__GNUC__ ) && (__GNUC__ < 9 || (__GNUC__ == 9 && __GNUC_MINOR__ <= 2)) && defined(_WIN32) && !defined(__clang__) -#define ALIGNAS_ON_STACK_VARIABLES_BROKEN -#endif + #if defined(__GNUC__) && (__GNUC__ < 9 || (__GNUC__ == 9 && __GNUC_MINOR__ <= 2)) \ + && defined(_WIN32) && !defined(__clang__) + #define ALIGNAS_ON_STACK_VARIABLES_BROKEN + #endif -#define ASSERT_ALIGNED(ptr, alignment) assert(reinterpret_cast(ptr) % alignment == 0) + #define ASSERT_ALIGNED(ptr, alignment) assert(reinterpret_cast(ptr) % alignment == 0) -#if defined(_WIN64) && defined(_MSC_VER) // No Makefile used -# include // Microsoft header for _BitScanForward64() -# define IS_64BIT -#endif + #if defined(_WIN64) && defined(_MSC_VER) // No Makefile used + #include // Microsoft header for _BitScanForward64() + #define IS_64BIT + #endif -#if defined(USE_POPCNT) && defined(_MSC_VER) -# include // Microsoft header for _mm_popcnt_u64() -#endif + #if defined(USE_POPCNT) && defined(_MSC_VER) + #include // Microsoft header for _mm_popcnt_u64() + #endif -#if !defined(NO_PREFETCH) && defined(_MSC_VER) -# include // Microsoft header for _mm_prefetch() -#endif + #if !defined(NO_PREFETCH) && defined(_MSC_VER) + #include // Microsoft header for _mm_prefetch() + #endif -#if defined(USE_PEXT) -# include // Header for _pext_u64() intrinsic -# define pext(b, m) _pext_u64(b, m) -#else -# define pext(b, m) 0 -#endif + #if defined(USE_PEXT) + #include // Header for _pext_u64() intrinsic + #define pext(b, m) _pext_u64(b, m) + #else + #define pext(b, m) 0 + #endif namespace Stockfish { -#ifdef USE_POPCNT + #ifdef USE_POPCNT constexpr bool HasPopCnt = true; -#else + #else constexpr bool HasPopCnt = false; -#endif + #endif -#ifdef USE_PEXT + #ifdef USE_PEXT constexpr bool HasPext = true; -#else -constexpr bool HasPext = false; -#endif + #else +constexpr bool HasPext = false; + #endif -#ifdef IS_64BIT + #ifdef IS_64BIT constexpr bool Is64Bit = true; -#else -constexpr bool Is64Bit = false; -#endif + #else +constexpr bool Is64Bit = false; + #endif -using Key = uint64_t; +using Key = uint64_t; using Bitboard = uint64_t; constexpr int MAX_MOVES = 256; @@ -120,164 +121,255 @@ constexpr int MAX_PLY = 246; // while MOVE_NONE and MOVE_NULL have the same origin and destination square. enum Move : int { - MOVE_NONE, - MOVE_NULL = 65 + MOVE_NONE, + MOVE_NULL = 65 }; enum MoveType { - NORMAL, - PROMOTION = 1 << 14, - EN_PASSANT = 2 << 14, - CASTLING = 3 << 14 + NORMAL, + PROMOTION = 1 << 14, + EN_PASSANT = 2 << 14, + CASTLING = 3 << 14 }; enum Color { - WHITE, BLACK, COLOR_NB = 2 + WHITE, + BLACK, + COLOR_NB = 2 }; enum CastlingRights { - NO_CASTLING, - WHITE_OO, - WHITE_OOO = WHITE_OO << 1, - BLACK_OO = WHITE_OO << 2, - BLACK_OOO = WHITE_OO << 3, - - KING_SIDE = WHITE_OO | BLACK_OO, - QUEEN_SIDE = WHITE_OOO | BLACK_OOO, - WHITE_CASTLING = WHITE_OO | WHITE_OOO, - BLACK_CASTLING = BLACK_OO | BLACK_OOO, - ANY_CASTLING = WHITE_CASTLING | BLACK_CASTLING, - - CASTLING_RIGHT_NB = 16 + NO_CASTLING, + WHITE_OO, + WHITE_OOO = WHITE_OO << 1, + BLACK_OO = WHITE_OO << 2, + BLACK_OOO = WHITE_OO << 3, + + KING_SIDE = WHITE_OO | BLACK_OO, + QUEEN_SIDE = WHITE_OOO | BLACK_OOO, + WHITE_CASTLING = WHITE_OO | WHITE_OOO, + BLACK_CASTLING = BLACK_OO | BLACK_OOO, + ANY_CASTLING = WHITE_CASTLING | BLACK_CASTLING, + + CASTLING_RIGHT_NB = 16 }; enum Bound { - BOUND_NONE, - BOUND_UPPER, - BOUND_LOWER, - BOUND_EXACT = BOUND_UPPER | BOUND_LOWER + BOUND_NONE, + BOUND_UPPER, + BOUND_LOWER, + BOUND_EXACT = BOUND_UPPER | BOUND_LOWER }; enum Value : int { - VALUE_ZERO = 0, - VALUE_DRAW = 0, - VALUE_MATE = 32000, - VALUE_INFINITE = 32001, - VALUE_NONE = 32002, - - VALUE_TB_WIN_IN_MAX_PLY = VALUE_MATE - 2 * MAX_PLY, - VALUE_TB_LOSS_IN_MAX_PLY = -VALUE_TB_WIN_IN_MAX_PLY, - VALUE_MATE_IN_MAX_PLY = VALUE_MATE - MAX_PLY, - VALUE_MATED_IN_MAX_PLY = -VALUE_MATE_IN_MAX_PLY, - - // In the code, we make the assumption that these values - // are such that non_pawn_material() can be used to uniquely - // identify the material on the board. - PawnValue = 208, - KnightValue = 781, - BishopValue = 825, - RookValue = 1276, - QueenValue = 2538, + VALUE_ZERO = 0, + VALUE_DRAW = 0, + VALUE_MATE = 32000, + VALUE_INFINITE = 32001, + VALUE_NONE = 32002, + + VALUE_TB_WIN_IN_MAX_PLY = VALUE_MATE - 2 * MAX_PLY, + VALUE_TB_LOSS_IN_MAX_PLY = -VALUE_TB_WIN_IN_MAX_PLY, + VALUE_MATE_IN_MAX_PLY = VALUE_MATE - MAX_PLY, + VALUE_MATED_IN_MAX_PLY = -VALUE_MATE_IN_MAX_PLY, + + // In the code, we make the assumption that these values + // are such that non_pawn_material() can be used to uniquely + // identify the material on the board. + PawnValue = 208, + KnightValue = 781, + BishopValue = 825, + RookValue = 1276, + QueenValue = 2538, }; enum PieceType { - NO_PIECE_TYPE, PAWN, KNIGHT, BISHOP, ROOK, QUEEN, KING, - ALL_PIECES = 0, - PIECE_TYPE_NB = 8 + NO_PIECE_TYPE, + PAWN, + KNIGHT, + BISHOP, + ROOK, + QUEEN, + KING, + ALL_PIECES = 0, + PIECE_TYPE_NB = 8 }; enum Piece { - NO_PIECE, - W_PAWN = PAWN, W_KNIGHT, W_BISHOP, W_ROOK, W_QUEEN, W_KING, - B_PAWN = PAWN + 8, B_KNIGHT, B_BISHOP, B_ROOK, B_QUEEN, B_KING, - PIECE_NB = 16 + NO_PIECE, + W_PAWN = PAWN, + W_KNIGHT, + W_BISHOP, + W_ROOK, + W_QUEEN, + W_KING, + B_PAWN = PAWN + 8, + B_KNIGHT, + B_BISHOP, + B_ROOK, + B_QUEEN, + B_KING, + PIECE_NB = 16 }; -constexpr Value PieceValue[PIECE_NB] = { VALUE_ZERO, PawnValue, KnightValue, BishopValue, RookValue, QueenValue, VALUE_ZERO, VALUE_ZERO, - VALUE_ZERO, PawnValue, KnightValue, BishopValue, RookValue, QueenValue, VALUE_ZERO, VALUE_ZERO }; +constexpr Value PieceValue[PIECE_NB] = { + VALUE_ZERO, PawnValue, KnightValue, BishopValue, RookValue, QueenValue, VALUE_ZERO, VALUE_ZERO, + VALUE_ZERO, PawnValue, KnightValue, BishopValue, RookValue, QueenValue, VALUE_ZERO, VALUE_ZERO}; using Depth = int; enum : int { - DEPTH_QS_CHECKS = 0, - DEPTH_QS_NO_CHECKS = -1, - DEPTH_QS_RECAPTURES = -5, + DEPTH_QS_CHECKS = 0, + DEPTH_QS_NO_CHECKS = -1, + DEPTH_QS_RECAPTURES = -5, - DEPTH_NONE = -6, + DEPTH_NONE = -6, - DEPTH_OFFSET = -7 // value used only for TT entry occupancy check + DEPTH_OFFSET = -7 // value used only for TT entry occupancy check }; enum Square : int { - SQ_A1, SQ_B1, SQ_C1, SQ_D1, SQ_E1, SQ_F1, SQ_G1, SQ_H1, - SQ_A2, SQ_B2, SQ_C2, SQ_D2, SQ_E2, SQ_F2, SQ_G2, SQ_H2, - SQ_A3, SQ_B3, SQ_C3, SQ_D3, SQ_E3, SQ_F3, SQ_G3, SQ_H3, - SQ_A4, SQ_B4, SQ_C4, SQ_D4, SQ_E4, SQ_F4, SQ_G4, SQ_H4, - SQ_A5, SQ_B5, SQ_C5, SQ_D5, SQ_E5, SQ_F5, SQ_G5, SQ_H5, - SQ_A6, SQ_B6, SQ_C6, SQ_D6, SQ_E6, SQ_F6, SQ_G6, SQ_H6, - SQ_A7, SQ_B7, SQ_C7, SQ_D7, SQ_E7, SQ_F7, SQ_G7, SQ_H7, - SQ_A8, SQ_B8, SQ_C8, SQ_D8, SQ_E8, SQ_F8, SQ_G8, SQ_H8, - SQ_NONE, - - SQUARE_ZERO = 0, - SQUARE_NB = 64 + SQ_A1, + SQ_B1, + SQ_C1, + SQ_D1, + SQ_E1, + SQ_F1, + SQ_G1, + SQ_H1, + SQ_A2, + SQ_B2, + SQ_C2, + SQ_D2, + SQ_E2, + SQ_F2, + SQ_G2, + SQ_H2, + SQ_A3, + SQ_B3, + SQ_C3, + SQ_D3, + SQ_E3, + SQ_F3, + SQ_G3, + SQ_H3, + SQ_A4, + SQ_B4, + SQ_C4, + SQ_D4, + SQ_E4, + SQ_F4, + SQ_G4, + SQ_H4, + SQ_A5, + SQ_B5, + SQ_C5, + SQ_D5, + SQ_E5, + SQ_F5, + SQ_G5, + SQ_H5, + SQ_A6, + SQ_B6, + SQ_C6, + SQ_D6, + SQ_E6, + SQ_F6, + SQ_G6, + SQ_H6, + SQ_A7, + SQ_B7, + SQ_C7, + SQ_D7, + SQ_E7, + SQ_F7, + SQ_G7, + SQ_H7, + SQ_A8, + SQ_B8, + SQ_C8, + SQ_D8, + SQ_E8, + SQ_F8, + SQ_G8, + SQ_H8, + SQ_NONE, + + SQUARE_ZERO = 0, + SQUARE_NB = 64 }; enum Direction : int { - NORTH = 8, - EAST = 1, - SOUTH = -NORTH, - WEST = -EAST, - - NORTH_EAST = NORTH + EAST, - SOUTH_EAST = SOUTH + EAST, - SOUTH_WEST = SOUTH + WEST, - NORTH_WEST = NORTH + WEST + NORTH = 8, + EAST = 1, + SOUTH = -NORTH, + WEST = -EAST, + + NORTH_EAST = NORTH + EAST, + SOUTH_EAST = SOUTH + EAST, + SOUTH_WEST = SOUTH + WEST, + NORTH_WEST = NORTH + WEST }; enum File : int { - FILE_A, FILE_B, FILE_C, FILE_D, FILE_E, FILE_F, FILE_G, FILE_H, FILE_NB + FILE_A, + FILE_B, + FILE_C, + FILE_D, + FILE_E, + FILE_F, + FILE_G, + FILE_H, + FILE_NB }; enum Rank : int { - RANK_1, RANK_2, RANK_3, RANK_4, RANK_5, RANK_6, RANK_7, RANK_8, RANK_NB + RANK_1, + RANK_2, + RANK_3, + RANK_4, + RANK_5, + RANK_6, + RANK_7, + RANK_8, + RANK_NB }; // Keep track of what a move changes on the board (used by NNUE) struct DirtyPiece { - // Number of changed pieces - int dirty_num; + // Number of changed pieces + int dirty_num; - // Max 3 pieces can change in one move. A promotion with capture moves - // both the pawn and the captured piece to SQ_NONE and the piece promoted - // to from SQ_NONE to the capture square. - Piece piece[3]; + // Max 3 pieces can change in one move. A promotion with capture moves + // both the pawn and the captured piece to SQ_NONE and the piece promoted + // to from SQ_NONE to the capture square. + Piece piece[3]; - // From and to squares, which may be SQ_NONE - Square from[3]; - Square to[3]; + // From and to squares, which may be SQ_NONE + Square from[3]; + Square to[3]; }; -#define ENABLE_BASE_OPERATORS_ON(T) \ -constexpr T operator+(T d1, int d2) { return T(int(d1) + d2); } \ -constexpr T operator-(T d1, int d2) { return T(int(d1) - d2); } \ -constexpr T operator-(T d) { return T(-int(d)); } \ -inline T& operator+=(T& d1, int d2) { return d1 = d1 + d2; } \ -inline T& operator-=(T& d1, int d2) { return d1 = d1 - d2; } - -#define ENABLE_INCR_OPERATORS_ON(T) \ -inline T& operator++(T& d) { return d = T(int(d) + 1); } \ -inline T& operator--(T& d) { return d = T(int(d) - 1); } - -#define ENABLE_FULL_OPERATORS_ON(T) \ -ENABLE_BASE_OPERATORS_ON(T) \ -constexpr T operator*(int i, T d) { return T(i * int(d)); } \ -constexpr T operator*(T d, int i) { return T(int(d) * i); } \ -constexpr T operator/(T d, int i) { return T(int(d) / i); } \ -constexpr int operator/(T d1, T d2) { return int(d1) / int(d2); } \ -inline T& operator*=(T& d, int i) { return d = T(int(d) * i); } \ -inline T& operator/=(T& d, int i) { return d = T(int(d) / i); } + #define ENABLE_BASE_OPERATORS_ON(T) \ + constexpr T operator+(T d1, int d2) { return T(int(d1) + d2); } \ + constexpr T operator-(T d1, int d2) { return T(int(d1) - d2); } \ + constexpr T operator-(T d) { return T(-int(d)); } \ + inline T& operator+=(T& d1, int d2) { return d1 = d1 + d2; } \ + inline T& operator-=(T& d1, int d2) { return d1 = d1 - d2; } + + #define ENABLE_INCR_OPERATORS_ON(T) \ + inline T& operator++(T& d) { return d = T(int(d) + 1); } \ + inline T& operator--(T& d) { return d = T(int(d) - 1); } + + #define ENABLE_FULL_OPERATORS_ON(T) \ + ENABLE_BASE_OPERATORS_ON(T) \ + constexpr T operator*(int i, T d) { return T(i * int(d)); } \ + constexpr T operator*(T d, int i) { return T(int(d) * i); } \ + constexpr T operator/(T d, int i) { return T(int(d) / i); } \ + constexpr int operator/(T d1, T d2) { return int(d1) / int(d2); } \ + inline T& operator*=(T& d, int i) { return d = T(int(d) * i); } \ + inline T& operator/=(T& d, int i) { return d = T(int(d) / i); } ENABLE_FULL_OPERATORS_ON(Value) ENABLE_FULL_OPERATORS_ON(Direction) @@ -287,131 +379,97 @@ ENABLE_INCR_OPERATORS_ON(Square) ENABLE_INCR_OPERATORS_ON(File) ENABLE_INCR_OPERATORS_ON(Rank) -#undef ENABLE_FULL_OPERATORS_ON -#undef ENABLE_INCR_OPERATORS_ON -#undef ENABLE_BASE_OPERATORS_ON + #undef ENABLE_FULL_OPERATORS_ON + #undef ENABLE_INCR_OPERATORS_ON + #undef ENABLE_BASE_OPERATORS_ON // Additional operators to add a Direction to a Square constexpr Square operator+(Square s, Direction d) { return Square(int(s) + int(d)); } constexpr Square operator-(Square s, Direction d) { return Square(int(s) - int(d)); } -inline Square& operator+=(Square& s, Direction d) { return s = s + d; } -inline Square& operator-=(Square& s, Direction d) { return s = s - d; } +inline Square& operator+=(Square& s, Direction d) { return s = s + d; } +inline Square& operator-=(Square& s, Direction d) { return s = s - d; } constexpr Color operator~(Color c) { - return Color(c ^ BLACK); // Toggle color + return Color(c ^ BLACK); // Toggle color } -constexpr Square flip_rank(Square s) { // Swap A1 <-> A8 - return Square(s ^ SQ_A8); +constexpr Square flip_rank(Square s) { // Swap A1 <-> A8 + return Square(s ^ SQ_A8); } -constexpr Square flip_file(Square s) { // Swap A1 <-> H1 - return Square(s ^ SQ_H1); +constexpr Square flip_file(Square s) { // Swap A1 <-> H1 + return Square(s ^ SQ_H1); } constexpr Piece operator~(Piece pc) { - return Piece(pc ^ 8); // Swap color of piece B_KNIGHT <-> W_KNIGHT + return Piece(pc ^ 8); // Swap color of piece B_KNIGHT <-> W_KNIGHT } constexpr CastlingRights operator&(Color c, CastlingRights cr) { - return CastlingRights((c == WHITE ? WHITE_CASTLING : BLACK_CASTLING) & cr); + return CastlingRights((c == WHITE ? WHITE_CASTLING : BLACK_CASTLING) & cr); } -constexpr Value mate_in(int ply) { - return VALUE_MATE - ply; -} +constexpr Value mate_in(int ply) { return VALUE_MATE - ply; } -constexpr Value mated_in(int ply) { - return -VALUE_MATE + ply; -} +constexpr Value mated_in(int ply) { return -VALUE_MATE + ply; } -constexpr Square make_square(File f, Rank r) { - return Square((r << 3) + f); -} +constexpr Square make_square(File f, Rank r) { return Square((r << 3) + f); } -constexpr Piece make_piece(Color c, PieceType pt) { - return Piece((c << 3) + pt); -} +constexpr Piece make_piece(Color c, PieceType pt) { return Piece((c << 3) + pt); } -constexpr PieceType type_of(Piece pc) { - return PieceType(pc & 7); -} +constexpr PieceType type_of(Piece pc) { return PieceType(pc & 7); } inline Color color_of(Piece pc) { - assert(pc != NO_PIECE); - return Color(pc >> 3); + assert(pc != NO_PIECE); + return Color(pc >> 3); } -constexpr bool is_ok(Move m) { - return m != MOVE_NONE && m != MOVE_NULL; -} +constexpr bool is_ok(Move m) { return m != MOVE_NONE && m != MOVE_NULL; } -constexpr bool is_ok(Square s) { - return s >= SQ_A1 && s <= SQ_H8; -} +constexpr bool is_ok(Square s) { return s >= SQ_A1 && s <= SQ_H8; } -constexpr File file_of(Square s) { - return File(s & 7); -} +constexpr File file_of(Square s) { return File(s & 7); } -constexpr Rank rank_of(Square s) { - return Rank(s >> 3); -} +constexpr Rank rank_of(Square s) { return Rank(s >> 3); } -constexpr Square relative_square(Color c, Square s) { - return Square(s ^ (c * 56)); -} +constexpr Square relative_square(Color c, Square s) { return Square(s ^ (c * 56)); } -constexpr Rank relative_rank(Color c, Rank r) { - return Rank(r ^ (c * 7)); -} +constexpr Rank relative_rank(Color c, Rank r) { return Rank(r ^ (c * 7)); } -constexpr Rank relative_rank(Color c, Square s) { - return relative_rank(c, rank_of(s)); -} +constexpr Rank relative_rank(Color c, Square s) { return relative_rank(c, rank_of(s)); } -constexpr Direction pawn_push(Color c) { - return c == WHITE ? NORTH : SOUTH; -} +constexpr Direction pawn_push(Color c) { return c == WHITE ? NORTH : SOUTH; } constexpr Square from_sq(Move m) { - assert(is_ok(m)); - return Square((m >> 6) & 0x3F); + assert(is_ok(m)); + return Square((m >> 6) & 0x3F); } constexpr Square to_sq(Move m) { - assert(is_ok(m)); - return Square(m & 0x3F); + assert(is_ok(m)); + return Square(m & 0x3F); } -constexpr int from_to(Move m) { - return m & 0xFFF; -} +constexpr int from_to(Move m) { return m & 0xFFF; } -constexpr MoveType type_of(Move m) { - return MoveType(m & (3 << 14)); -} +constexpr MoveType type_of(Move m) { return MoveType(m & (3 << 14)); } -constexpr PieceType promotion_type(Move m) { - return PieceType(((m >> 12) & 3) + KNIGHT); -} +constexpr PieceType promotion_type(Move m) { return PieceType(((m >> 12) & 3) + KNIGHT); } -constexpr Move make_move(Square from, Square to) { - return Move((from << 6) + to); -} +constexpr Move make_move(Square from, Square to) { return Move((from << 6) + to); } template constexpr Move make(Square from, Square to, PieceType pt = KNIGHT) { - return Move(T + ((pt - KNIGHT) << 12) + (from << 6) + to); + return Move(T + ((pt - KNIGHT) << 12) + (from << 6) + to); } // Based on a congruential pseudo-random number generator constexpr Key make_key(uint64_t seed) { - return seed * 6364136223846793005ULL + 1442695040888963407ULL; + return seed * 6364136223846793005ULL + 1442695040888963407ULL; } -} // namespace Stockfish +} // namespace Stockfish -#endif // #ifndef TYPES_H_INCLUDED +#endif // #ifndef TYPES_H_INCLUDED -#include "tune.h" // Global visibility to tuning setup +#include "tune.h" // Global visibility to tuning setup diff --git a/src/uci.cpp b/src/uci.cpp index 81bf7aff768..b0d59180d37 100644 --- a/src/uci.cpp +++ b/src/uci.cpp @@ -45,69 +45,66 @@ namespace Stockfish { namespace { - // FEN string for the initial position in standard chess - const char* StartFEN = "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1"; +// FEN string for the initial position in standard chess +const char* StartFEN = "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1"; - // position() is called when the engine receives the "position" UCI command. - // It sets up the position that is described in the given FEN string ("fen") or - // the initial position ("startpos") and then makes the moves given in the following - // move list ("moves"). +// position() is called when the engine receives the "position" UCI command. +// It sets up the position that is described in the given FEN string ("fen") or +// the initial position ("startpos") and then makes the moves given in the following +// move list ("moves"). - void position(Position& pos, std::istringstream& is, StateListPtr& states) { +void position(Position& pos, std::istringstream& is, StateListPtr& states) { - Move m; + Move m; std::string token, fen; is >> token; - if (token == "startpos") - { + if (token == "startpos") { fen = StartFEN; - is >> token; // Consume the "moves" token, if any - } - else if (token == "fen") + is >> token; // Consume the "moves" token, if any + } else if (token == "fen") while (is >> token && token != "moves") fen += token + " "; else return; - states = StateListPtr(new std::deque(1)); // Drop the old state and create a new one + states = StateListPtr(new std::deque(1)); // Drop the old state and create a new one pos.set(fen, Options["UCI_Chess960"], &states->back(), Threads.main()); // Parse the move list, if any - while (is >> token && (m = UCI::to_move(pos, token)) != MOVE_NONE) - { + while (is >> token && (m = UCI::to_move(pos, token)) != MOVE_NONE) { states->emplace_back(); pos.do_move(m, states->back()); } - } +} - // trace_eval() prints the evaluation of the current position, consistent with - // the UCI options set so far. +// trace_eval() prints the evaluation of the current position, consistent with +// the UCI options set so far. - void trace_eval(Position& pos) { +void trace_eval(Position& pos) { StateListPtr states(new std::deque(1)); - Position p; + Position p; p.set(pos.fen(), Options["UCI_Chess960"], &states->back(), Threads.main()); Eval::NNUE::verify(); sync_cout << "\n" << Eval::trace(p) << sync_endl; - } +} - // setoption() is called when the engine receives the "setoption" UCI command. - // The function updates the UCI option ("name") to the given value ("value"). +// setoption() is called when the engine receives the "setoption" UCI command. +// The function updates the UCI option ("name") to the given value ("value"). - void setoption(std::istringstream& is) { +void setoption(std::istringstream& is) { Threads.main()->wait_for_search_finished(); std::string token, name, value; - is >> token; // Consume the "name" token + is >> token; // Consume the "name" token // Read the option name (can contain spaces) while (is >> token && token != "value") @@ -121,116 +118,129 @@ namespace { Options[name] = value; else sync_cout << "No such option: " << name << sync_endl; - } +} - // go() is called when the engine receives the "go" UCI command. The function - // sets the thinking time and other parameters from the input string, then starts - // with a search. +// go() is called when the engine receives the "go" UCI command. The function +// sets the thinking time and other parameters from the input string, then starts +// with a search. - void go(Position& pos, std::istringstream& is, StateListPtr& states) { +void go(Position& pos, std::istringstream& is, StateListPtr& states) { Search::LimitsType limits; - std::string token; - bool ponderMode = false; + std::string token; + bool ponderMode = false; - limits.startTime = now(); // The search starts as early as possible + limits.startTime = now(); // The search starts as early as possible while (is >> token) - if (token == "searchmoves") // Needs to be the last command on the line + if (token == "searchmoves") // Needs to be the last command on the line while (is >> token) limits.searchmoves.push_back(UCI::to_move(pos, token)); - else if (token == "wtime") is >> limits.time[WHITE]; - else if (token == "btime") is >> limits.time[BLACK]; - else if (token == "winc") is >> limits.inc[WHITE]; - else if (token == "binc") is >> limits.inc[BLACK]; - else if (token == "movestogo") is >> limits.movestogo; - else if (token == "depth") is >> limits.depth; - else if (token == "nodes") is >> limits.nodes; - else if (token == "movetime") is >> limits.movetime; - else if (token == "mate") is >> limits.mate; - else if (token == "perft") is >> limits.perft; - else if (token == "infinite") limits.infinite = 1; - else if (token == "ponder") ponderMode = true; + else if (token == "wtime") + is >> limits.time[WHITE]; + else if (token == "btime") + is >> limits.time[BLACK]; + else if (token == "winc") + is >> limits.inc[WHITE]; + else if (token == "binc") + is >> limits.inc[BLACK]; + else if (token == "movestogo") + is >> limits.movestogo; + else if (token == "depth") + is >> limits.depth; + else if (token == "nodes") + is >> limits.nodes; + else if (token == "movetime") + is >> limits.movetime; + else if (token == "mate") + is >> limits.mate; + else if (token == "perft") + is >> limits.perft; + else if (token == "infinite") + limits.infinite = 1; + else if (token == "ponder") + ponderMode = true; Threads.start_thinking(pos, states, limits, ponderMode); - } +} - // bench() is called when the engine receives the "bench" command. - // First, a list of UCI commands is set up according to the bench - // parameters, then it is run one by one, printing a summary at the end. +// bench() is called when the engine receives the "bench" command. +// First, a list of UCI commands is set up according to the bench +// parameters, then it is run one by one, printing a summary at the end. - void bench(Position& pos, std::istream& args, StateListPtr& states) { +void bench(Position& pos, std::istream& args, StateListPtr& states) { std::string token; - uint64_t num, nodes = 0, cnt = 1; + uint64_t num, nodes = 0, cnt = 1; std::vector list = setup_bench(pos, args); - num = count_if(list.begin(), list.end(), [](const std::string& s) { return s.find("go ") == 0 || s.find("eval") == 0; }); + num = count_if(list.begin(), list.end(), + [](const std::string& s) { return s.find("go ") == 0 || s.find("eval") == 0; }); TimePoint elapsed = now(); - for (const auto& cmd : list) - { + for (const auto& cmd : list) { std::istringstream is(cmd); is >> std::skipws >> token; - if (token == "go" || token == "eval") - { - std::cerr << "\nPosition: " << cnt++ << '/' << num << " (" << pos.fen() << ")" << std::endl; - if (token == "go") - { - go(pos, is, states); - Threads.main()->wait_for_search_finished(); - nodes += Threads.nodes_searched(); - } - else - trace_eval(pos); - } - else if (token == "setoption") setoption(is); - else if (token == "position") position(pos, is, states); - else if (token == "ucinewgame") { Search::clear(); elapsed = now(); } // Search::clear() may take a while + if (token == "go" || token == "eval") { + std::cerr << "\nPosition: " << cnt++ << '/' << num << " (" << pos.fen() << ")" + << std::endl; + if (token == "go") { + go(pos, is, states); + Threads.main()->wait_for_search_finished(); + nodes += Threads.nodes_searched(); + } else + trace_eval(pos); + } else if (token == "setoption") + setoption(is); + else if (token == "position") + position(pos, is, states); + else if (token == "ucinewgame") { + Search::clear(); + elapsed = now(); + } // Search::clear() may take a while } - elapsed = now() - elapsed + 1; // Ensure positivity to avoid a 'divide by zero' + elapsed = now() - elapsed + 1; // Ensure positivity to avoid a 'divide by zero' dbg_print(); std::cerr << "\n===========================" - << "\nTotal time (ms) : " << elapsed - << "\nNodes searched : " << nodes + << "\nTotal time (ms) : " << elapsed << "\nNodes searched : " << nodes << "\nNodes/second : " << 1000 * nodes / elapsed << std::endl; - } +} - // The win rate model returns the probability of winning (in per mille units) given an - // eval and a game ply. It fits the LTC fishtest statistics rather accurately. - int win_rate_model(Value v, int ply) { +// The win rate model returns the probability of winning (in per mille units) given an +// eval and a game ply. It fits the LTC fishtest statistics rather accurately. +int win_rate_model(Value v, int ply) { - // The model only captures up to 240 plies, so limit the input and then rescale - double m = std::min(240, ply) / 64.0; + // The model only captures up to 240 plies, so limit the input and then rescale + double m = std::min(240, ply) / 64.0; - // The coefficients of a third-order polynomial fit is based on the fishtest data - // for two parameters that need to transform eval to the argument of a logistic - // function. - constexpr double as[] = { 0.38036525, -2.82015070, 23.17882135, 307.36768407}; - constexpr double bs[] = { -2.29434733, 13.27689788, -14.26828904, 63.45318330 }; + // The coefficients of a third-order polynomial fit is based on the fishtest data + // for two parameters that need to transform eval to the argument of a logistic + // function. + constexpr double as[] = {0.38036525, -2.82015070, 23.17882135, 307.36768407}; + constexpr double bs[] = {-2.29434733, 13.27689788, -14.26828904, 63.45318330}; - // Enforce that NormalizeToPawnValue corresponds to a 50% win rate at ply 64 - static_assert(UCI::NormalizeToPawnValue == int(as[0] + as[1] + as[2] + as[3])); + // Enforce that NormalizeToPawnValue corresponds to a 50% win rate at ply 64 + static_assert(UCI::NormalizeToPawnValue == int(as[0] + as[1] + as[2] + as[3])); - double a = (((as[0] * m + as[1]) * m + as[2]) * m) + as[3]; - double b = (((bs[0] * m + bs[1]) * m + bs[2]) * m) + bs[3]; + double a = (((as[0] * m + as[1]) * m + as[2]) * m) + as[3]; + double b = (((bs[0] * m + bs[1]) * m + bs[2]) * m) + bs[3]; - // Transform the eval to centipawns with limited range - double x = std::clamp(double(v), -4000.0, 4000.0); + // Transform the eval to centipawns with limited range + double x = std::clamp(double(v), -4000.0, 4000.0); - // Return the win rate in per mille units, rounded to the nearest integer - return int(0.5 + 1000 / (1 + std::exp((a - x) / b))); - } + // Return the win rate in per mille units, rounded to the nearest integer + return int(0.5 + 1000 / (1 + std::exp((a - x) / b))); +} -} // namespace +} // namespace // UCI::loop() waits for a command from the stdin, parses it, and then calls the appropriate @@ -241,81 +251,89 @@ namespace { void UCI::loop(int argc, char* argv[]) { - Position pos; - std::string token, cmd; - StateListPtr states(new std::deque(1)); - - pos.set(StartFEN, false, &states->back(), Threads.main()); - - for (int i = 1; i < argc; ++i) - cmd += std::string(argv[i]) + " "; - - do { - if (argc == 1 && !getline(std::cin, cmd)) // Wait for an input or an end-of-file (EOF) indication - cmd = "quit"; - - std::istringstream is(cmd); - - token.clear(); // Avoid a stale if getline() returns nothing or a blank line - is >> std::skipws >> token; - - if ( token == "quit" - || token == "stop") - Threads.stop = true; - - // The GUI sends 'ponderhit' to tell that the user has played the expected move. - // So, 'ponderhit' is sent if pondering was done on the same move that the user - // has played. The search should continue, but should also switch from pondering - // to the normal search. - else if (token == "ponderhit") - Threads.main()->ponder = false; // Switch to the normal search - - else if (token == "uci") - sync_cout << "id name " << engine_info(true) - << "\n" << Options - << "\nuciok" << sync_endl; - - else if (token == "setoption") setoption(is); - else if (token == "go") go(pos, is, states); - else if (token == "position") position(pos, is, states); - else if (token == "ucinewgame") Search::clear(); - else if (token == "isready") sync_cout << "readyok" << sync_endl; - - // Add custom non-UCI commands, mainly for debugging purposes. - // These commands must not be used during a search! - else if (token == "flip") pos.flip(); - else if (token == "bench") bench(pos, is, states); - else if (token == "d") sync_cout << pos << sync_endl; - else if (token == "eval") trace_eval(pos); - else if (token == "compiler") sync_cout << compiler_info() << sync_endl; - else if (token == "export_net") - { - std::optional filename; - std::string f; - if (is >> std::skipws >> f) - filename = f; - Eval::NNUE::save_eval(filename); - } - else if (token == "--help" || token == "help" || token == "--license" || token == "license") - sync_cout << "\nStockfish is a powerful chess engine for playing and analyzing." - "\nIt is released as free software licensed under the GNU GPLv3 License." - "\nStockfish is normally used with a graphical user interface (GUI) and implements" - "\nthe Universal Chess Interface (UCI) protocol to communicate with a GUI, an API, etc." - "\nFor any further information, visit https://github.com/official-stockfish/Stockfish#readme" - "\nor read the corresponding README.md and Copying.txt files distributed along with this program.\n" << sync_endl; - else if (!token.empty() && token[0] != '#') - sync_cout << "Unknown command: '" << cmd << "'. Type help for more information." << sync_endl; - - } while (token != "quit" && argc == 1); // The command-line arguments are one-shot + Position pos; + std::string token, cmd; + StateListPtr states(new std::deque(1)); + + pos.set(StartFEN, false, &states->back(), Threads.main()); + + for (int i = 1; i < argc; ++i) + cmd += std::string(argv[i]) + " "; + + do { + if (argc == 1 + && !getline(std::cin, cmd)) // Wait for an input or an end-of-file (EOF) indication + cmd = "quit"; + + std::istringstream is(cmd); + + token.clear(); // Avoid a stale if getline() returns nothing or a blank line + is >> std::skipws >> token; + + if (token == "quit" || token == "stop") + Threads.stop = true; + + // The GUI sends 'ponderhit' to tell that the user has played the expected move. + // So, 'ponderhit' is sent if pondering was done on the same move that the user + // has played. The search should continue, but should also switch from pondering + // to the normal search. + else if (token == "ponderhit") + Threads.main()->ponder = false; // Switch to the normal search + + else if (token == "uci") + sync_cout << "id name " << engine_info(true) << "\n" + << Options << "\nuciok" << sync_endl; + + else if (token == "setoption") + setoption(is); + else if (token == "go") + go(pos, is, states); + else if (token == "position") + position(pos, is, states); + else if (token == "ucinewgame") + Search::clear(); + else if (token == "isready") + sync_cout << "readyok" << sync_endl; + + // Add custom non-UCI commands, mainly for debugging purposes. + // These commands must not be used during a search! + else if (token == "flip") + pos.flip(); + else if (token == "bench") + bench(pos, is, states); + else if (token == "d") + sync_cout << pos << sync_endl; + else if (token == "eval") + trace_eval(pos); + else if (token == "compiler") + sync_cout << compiler_info() << sync_endl; + else if (token == "export_net") { + std::optional filename; + std::string f; + if (is >> std::skipws >> f) + filename = f; + Eval::NNUE::save_eval(filename); + } else if (token == "--help" || token == "help" || token == "--license" + || token == "license") + sync_cout + << "\nStockfish is a powerful chess engine for playing and analyzing." + "\nIt is released as free software licensed under the GNU GPLv3 License." + "\nStockfish is normally used with a graphical user interface (GUI) and implements" + "\nthe Universal Chess Interface (UCI) protocol to communicate with a GUI, an API, etc." + "\nFor any further information, visit https://github.com/official-stockfish/Stockfish#readme" + "\nor read the corresponding README.md and Copying.txt files distributed along with this program.\n" + << sync_endl; + else if (!token.empty() && token[0] != '#') + sync_cout << "Unknown command: '" << cmd << "'. Type help for more information." + << sync_endl; + + } while (token != "quit" && argc == 1); // The command-line arguments are one-shot } // Turns a Value to an integer centipawn number, // without treatment of mate and similar special scores. -int UCI::to_cp(Value v) { - - return 100 * v / UCI::NormalizeToPawnValue; -} +int UCI::to_cp(Value v) { return 100 * v / UCI::NormalizeToPawnValue; } // UCI::value() converts a Value to a string by adhering to the UCI protocol specification: // @@ -325,21 +343,19 @@ int UCI::to_cp(Value v) { std::string UCI::value(Value v) { - assert(-VALUE_INFINITE < v && v < VALUE_INFINITE); + assert(-VALUE_INFINITE < v && v < VALUE_INFINITE); - std::stringstream ss; + std::stringstream ss; - if (abs(v) < VALUE_TB_WIN_IN_MAX_PLY) - ss << "cp " << UCI::to_cp(v); - else if (abs(v) < VALUE_MATE_IN_MAX_PLY) - { - const int ply = VALUE_MATE_IN_MAX_PLY - 1 - std::abs(v); // recompute ss->ply - ss << "cp " << (v > 0 ? 20000 - ply : -20000 + ply); - } - else - ss << "mate " << (v > 0 ? VALUE_MATE - v + 1 : -VALUE_MATE - v) / 2; + if (abs(v) < VALUE_TB_WIN_IN_MAX_PLY) + ss << "cp " << UCI::to_cp(v); + else if (abs(v) < VALUE_MATE_IN_MAX_PLY) { + const int ply = VALUE_MATE_IN_MAX_PLY - 1 - std::abs(v); // recompute ss->ply + ss << "cp " << (v > 0 ? 20000 - ply : -20000 + ply); + } else + ss << "mate " << (v > 0 ? VALUE_MATE - v + 1 : -VALUE_MATE - v) / 2; - return ss.str(); + return ss.str(); } @@ -348,21 +364,21 @@ std::string UCI::value(Value v) { std::string UCI::wdl(Value v, int ply) { - std::stringstream ss; + std::stringstream ss; - int wdl_w = win_rate_model( v, ply); - int wdl_l = win_rate_model(-v, ply); - int wdl_d = 1000 - wdl_w - wdl_l; - ss << " wdl " << wdl_w << " " << wdl_d << " " << wdl_l; + int wdl_w = win_rate_model(v, ply); + int wdl_l = win_rate_model(-v, ply); + int wdl_d = 1000 - wdl_w - wdl_l; + ss << " wdl " << wdl_w << " " << wdl_d << " " << wdl_l; - return ss.str(); + return ss.str(); } // UCI::square() converts a Square to a string in algebraic notation (g1, a7, etc.) std::string UCI::square(Square s) { - return std::string{ char('a' + file_of(s)), char('1' + rank_of(s)) }; + return std::string{char('a' + file_of(s)), char('1' + rank_of(s))}; } @@ -373,24 +389,24 @@ std::string UCI::square(Square s) { std::string UCI::move(Move m, bool chess960) { - if (m == MOVE_NONE) - return "(none)"; + if (m == MOVE_NONE) + return "(none)"; - if (m == MOVE_NULL) - return "0000"; + if (m == MOVE_NULL) + return "0000"; - Square from = from_sq(m); - Square to = to_sq(m); + Square from = from_sq(m); + Square to = to_sq(m); - if (type_of(m) == CASTLING && !chess960) - to = make_square(to > from ? FILE_G : FILE_C, rank_of(from)); + if (type_of(m) == CASTLING && !chess960) + to = make_square(to > from ? FILE_G : FILE_C, rank_of(from)); - std::string move = UCI::square(from) + UCI::square(to); + std::string move = UCI::square(from) + UCI::square(to); - if (type_of(m) == PROMOTION) - move += " pnbrqk"[promotion_type(m)]; + if (type_of(m) == PROMOTION) + move += " pnbrqk"[promotion_type(m)]; - return move; + return move; } @@ -399,14 +415,14 @@ std::string UCI::move(Move m, bool chess960) { Move UCI::to_move(const Position& pos, std::string& str) { - if (str.length() == 5) - str[4] = char(tolower(str[4])); // The promotion piece character must be lowercased + if (str.length() == 5) + str[4] = char(tolower(str[4])); // The promotion piece character must be lowercased - for (const auto& m : MoveList(pos)) - if (str == UCI::move(m, pos.is_chess960())) - return m; + for (const auto& m : MoveList(pos)) + if (str == UCI::move(m, pos.is_chess960())) + return m; - return MOVE_NONE; + return MOVE_NONE; } -} // namespace Stockfish +} // namespace Stockfish diff --git a/src/uci.h b/src/uci.h index 048f8c1166e..be5c70c54ce 100644 --- a/src/uci.h +++ b/src/uci.h @@ -43,7 +43,7 @@ class Option; // Define a custom comparator, because the UCI options should be case-insensitive struct CaseInsensitiveLess { - bool operator() (const std::string&, const std::string&) const; + bool operator()(const std::string&, const std::string&) const; }; // The options container is defined as a std::map @@ -52,44 +52,44 @@ using OptionsMap = std::map; // The Option class implements each option as specified by the UCI protocol class Option { - using OnChange = void (*)(const Option&); + using OnChange = void (*)(const Option&); -public: - Option(OnChange = nullptr); - Option(bool v, OnChange = nullptr); - Option(const char* v, OnChange = nullptr); - Option(double v, int minv, int maxv, OnChange = nullptr); - Option(const char* v, const char* cur, OnChange = nullptr); + public: + Option(OnChange = nullptr); + Option(bool v, OnChange = nullptr); + Option(const char* v, OnChange = nullptr); + Option(double v, int minv, int maxv, OnChange = nullptr); + Option(const char* v, const char* cur, OnChange = nullptr); - Option& operator=(const std::string&); - void operator<<(const Option&); - operator int() const; - operator std::string() const; - bool operator==(const char*) const; + Option& operator=(const std::string&); + void operator<<(const Option&); + operator int() const; + operator std::string() const; + bool operator==(const char*) const; -private: - friend std::ostream& operator<<(std::ostream&, const OptionsMap&); + private: + friend std::ostream& operator<<(std::ostream&, const OptionsMap&); - std::string defaultValue, currentValue, type; - int min, max; - size_t idx; - OnChange on_change; + std::string defaultValue, currentValue, type; + int min, max; + size_t idx; + OnChange on_change; }; -void init(OptionsMap&); -void loop(int argc, char* argv[]); -int to_cp(Value v); +void init(OptionsMap&); +void loop(int argc, char* argv[]); +int to_cp(Value v); std::string value(Value v); std::string square(Square s); std::string move(Move m, bool chess960); std::string pv(const Position& pos, Depth depth); std::string wdl(Value v, int ply); -Move to_move(const Position& pos, std::string& str); +Move to_move(const Position& pos, std::string& str); -} // namespace UCI +} // namespace UCI extern UCI::OptionsMap Options; -} // namespace Stockfish +} // namespace Stockfish -#endif // #ifndef UCI_H_INCLUDED +#endif // #ifndef UCI_H_INCLUDED diff --git a/src/ucioption.cpp b/src/ucioption.cpp index b822ccf936f..da5bf38175c 100644 --- a/src/ucioption.cpp +++ b/src/ucioption.cpp @@ -40,7 +40,7 @@ using std::string; namespace Stockfish { -UCI::OptionsMap Options; // Global object +UCI::OptionsMap Options; // Global object namespace UCI { @@ -53,10 +53,10 @@ static void on_tb_path(const Option& o) { Tablebases::init(o); } static void on_eval_file(const Option&) { Eval::NNUE::init(); } // Our case insensitive less() function as required by UCI protocol -bool CaseInsensitiveLess::operator() (const string& s1, const string& s2) const { +bool CaseInsensitiveLess::operator()(const string& s1, const string& s2) const { - return std::lexicographical_compare(s1.begin(), s1.end(), s2.begin(), s2.end(), - [](char c1, char c2) { return tolower(c1) < tolower(c2); }); + return std::lexicographical_compare(s1.begin(), s1.end(), s2.begin(), s2.end(), + [](char c1, char c2) { return tolower(c1) < tolower(c2); }); } @@ -64,28 +64,28 @@ bool CaseInsensitiveLess::operator() (const string& s1, const string& s2) const void init(OptionsMap& o) { - constexpr int MaxHashMB = Is64Bit ? 33554432 : 2048; - - o["Debug Log File"] << Option("", on_logger); - o["Threads"] << Option(1, 1, 1024, on_threads); - o["Hash"] << Option(16, 1, MaxHashMB, on_hash_size); - o["Clear Hash"] << Option(on_clear_hash); - o["Ponder"] << Option(false); - o["MultiPV"] << Option(1, 1, 500); - o["Skill Level"] << Option(20, 0, 20); - o["Move Overhead"] << Option(10, 0, 5000); - o["Slow Mover"] << Option(100, 10, 1000); - o["nodestime"] << Option(0, 0, 10000); - o["UCI_Chess960"] << Option(false); - o["UCI_AnalyseMode"] << Option(false); - o["UCI_LimitStrength"] << Option(false); - o["UCI_Elo"] << Option(1320, 1320, 3190); - o["UCI_ShowWDL"] << Option(false); - o["SyzygyPath"] << Option("", on_tb_path); - o["SyzygyProbeDepth"] << Option(1, 1, 100); - o["Syzygy50MoveRule"] << Option(true); - o["SyzygyProbeLimit"] << Option(7, 0, 7); - o["EvalFile"] << Option(EvalFileDefaultName, on_eval_file); + constexpr int MaxHashMB = Is64Bit ? 33554432 : 2048; + + o["Debug Log File"] << Option("", on_logger); + o["Threads"] << Option(1, 1, 1024, on_threads); + o["Hash"] << Option(16, 1, MaxHashMB, on_hash_size); + o["Clear Hash"] << Option(on_clear_hash); + o["Ponder"] << Option(false); + o["MultiPV"] << Option(1, 1, 500); + o["Skill Level"] << Option(20, 0, 20); + o["Move Overhead"] << Option(10, 0, 5000); + o["Slow Mover"] << Option(100, 10, 1000); + o["nodestime"] << Option(0, 0, 10000); + o["UCI_Chess960"] << Option(false); + o["UCI_AnalyseMode"] << Option(false); + o["UCI_LimitStrength"] << Option(false); + o["UCI_Elo"] << Option(1320, 1320, 3190); + o["UCI_ShowWDL"] << Option(false); + o["SyzygyPath"] << Option("", on_tb_path); + o["SyzygyProbeDepth"] << Option(1, 1, 100); + o["Syzygy50MoveRule"] << Option(true); + o["SyzygyProbeLimit"] << Option(7, 0, 7); + o["EvalFile"] << Option(EvalFileDefaultName, on_eval_file); } @@ -94,59 +94,80 @@ void init(OptionsMap& o) { std::ostream& operator<<(std::ostream& os, const OptionsMap& om) { - for (size_t idx = 0; idx < om.size(); ++idx) - for (const auto& it : om) - if (it.second.idx == idx) - { - const Option& o = it.second; - os << "\noption name " << it.first << " type " << o.type; + for (size_t idx = 0; idx < om.size(); ++idx) + for (const auto& it : om) + if (it.second.idx == idx) { + const Option& o = it.second; + os << "\noption name " << it.first << " type " << o.type; - if (o.type == "string" || o.type == "check" || o.type == "combo") - os << " default " << o.defaultValue; + if (o.type == "string" || o.type == "check" || o.type == "combo") + os << " default " << o.defaultValue; - if (o.type == "spin") - os << " default " << int(stof(o.defaultValue)) - << " min " << o.min - << " max " << o.max; + if (o.type == "spin") + os << " default " << int(stof(o.defaultValue)) << " min " << o.min << " max " + << o.max; - break; - } + break; + } - return os; + return os; } // Option class constructors and conversion operators -Option::Option(const char* v, OnChange f) : type("string"), min(0), max(0), on_change(f) -{ defaultValue = currentValue = v; } - -Option::Option(bool v, OnChange f) : type("check"), min(0), max(0), on_change(f) -{ defaultValue = currentValue = (v ? "true" : "false"); } +Option::Option(const char* v, OnChange f) : + type("string"), + min(0), + max(0), + on_change(f) { + defaultValue = currentValue = v; +} -Option::Option(OnChange f) : type("button"), min(0), max(0), on_change(f) -{} +Option::Option(bool v, OnChange f) : + type("check"), + min(0), + max(0), + on_change(f) { + defaultValue = currentValue = (v ? "true" : "false"); +} -Option::Option(double v, int minv, int maxv, OnChange f) : type("spin"), min(minv), max(maxv), on_change(f) -{ defaultValue = currentValue = std::to_string(v); } +Option::Option(OnChange f) : + type("button"), + min(0), + max(0), + on_change(f) {} + +Option::Option(double v, int minv, int maxv, OnChange f) : + type("spin"), + min(minv), + max(maxv), + on_change(f) { + defaultValue = currentValue = std::to_string(v); +} -Option::Option(const char* v, const char* cur, OnChange f) : type("combo"), min(0), max(0), on_change(f) -{ defaultValue = v; currentValue = cur; } +Option::Option(const char* v, const char* cur, OnChange f) : + type("combo"), + min(0), + max(0), + on_change(f) { + defaultValue = v; + currentValue = cur; +} Option::operator int() const { - assert(type == "check" || type == "spin"); - return (type == "spin" ? std::stoi(currentValue) : currentValue == "true"); + assert(type == "check" || type == "spin"); + return (type == "spin" ? std::stoi(currentValue) : currentValue == "true"); } Option::operator std::string() const { - assert(type == "string"); - return currentValue; + assert(type == "string"); + return currentValue; } bool Option::operator==(const char* s) const { - assert(type == "combo"); - return !CaseInsensitiveLess()(currentValue, s) - && !CaseInsensitiveLess()(s, currentValue); + assert(type == "combo"); + return !CaseInsensitiveLess()(currentValue, s) && !CaseInsensitiveLess()(s, currentValue); } @@ -154,10 +175,10 @@ bool Option::operator==(const char* s) const { void Option::operator<<(const Option& o) { - static size_t insert_order = 0; + static size_t insert_order = 0; - *this = o; - idx = insert_order++; + *this = o; + idx = insert_order++; } @@ -167,33 +188,32 @@ void Option::operator<<(const Option& o) { Option& Option::operator=(const string& v) { - assert(!type.empty()); + assert(!type.empty()); - if ( (type != "button" && type != "string" && v.empty()) - || (type == "check" && v != "true" && v != "false") - || (type == "spin" && (stof(v) < min || stof(v) > max))) - return *this; + if ((type != "button" && type != "string" && v.empty()) + || (type == "check" && v != "true" && v != "false") + || (type == "spin" && (stof(v) < min || stof(v) > max))) + return *this; - if (type == "combo") - { - OptionsMap comboMap; // To have case insensitive compare - string token; - std::istringstream ss(defaultValue); - while (ss >> token) - comboMap[token] << Option(); - if (!comboMap.count(v) || v == "var") - return *this; - } + if (type == "combo") { + OptionsMap comboMap; // To have case insensitive compare + string token; + std::istringstream ss(defaultValue); + while (ss >> token) + comboMap[token] << Option(); + if (!comboMap.count(v) || v == "var") + return *this; + } - if (type != "button") - currentValue = v; + if (type != "button") + currentValue = v; - if (on_change) - on_change(*this); + if (on_change) + on_change(*this); - return *this; + return *this; } -} // namespace UCI +} // namespace UCI -} // namespace Stockfish +} // namespace Stockfish