diff --git a/AUTHORS b/AUTHORS index abae401c1ef..36b2b6f7942 100644 --- a/AUTHORS +++ b/AUTHORS @@ -46,6 +46,7 @@ Bryan Cross (crossbr) candirufish Chess13234 Chris Cain (ceebo) +Ciekce clefrks Clemens L. (rn5f107s2) Cody Ho (aesrentai) diff --git a/src/engine.cpp b/src/engine.cpp index 164f228ad09..c4c0918082a 100644 --- a/src/engine.cpp +++ b/src/engine.cpp @@ -54,6 +54,7 @@ Engine::Engine(std::string path) : NN::NetworkMedium({EvalFileDefaultNameMedium, "None", ""}, NN::EmbeddedNNUEType::MEDIUM), NN::NetworkSmall({EvalFileDefaultNameSmall, "None", ""}, NN::EmbeddedNNUEType::SMALL))) { pos.set(StartFEN, false, &states->back()); + capSq = SQ_NONE; } std::uint64_t Engine::perft(const std::string& fen, Depth depth, bool isChess960) { @@ -62,9 +63,10 @@ std::uint64_t Engine::perft(const std::string& fen, Depth depth, bool isChess960 return Benchmark::perft(fen, depth, isChess960); } -void Engine::go(const Search::LimitsType& limits) { +void Engine::go(Search::LimitsType& limits) { assert(limits.perft == 0); verify_networks(); + limits.capSq = capSq; threads.start_thinking(options, pos, states, limits); } @@ -103,6 +105,7 @@ void Engine::set_position(const std::string& fen, const std::vector states = StateListPtr(new std::deque(1)); pos.set(fen, options["UCI_Chess960"], &states->back()); + capSq = SQ_NONE; for (const auto& move : moves) { auto m = UCIEngine::to_move(pos, move); @@ -112,6 +115,11 @@ void Engine::set_position(const std::string& fen, const std::vector states->emplace_back(); pos.do_move(m, states->back()); + + capSq = SQ_NONE; + DirtyPiece& dp = states->back().dirtyPiece; + if (dp.dirty_num > 1 && dp.to[1] == SQ_NONE) + capSq = m.to_sq(); } } @@ -180,4 +188,4 @@ std::string Engine::visualize() const { return ss.str(); } -} \ No newline at end of file +} diff --git a/src/engine.h b/src/engine.h index 753ec4899f4..fc6bda97a43 100644 --- a/src/engine.h +++ b/src/engine.h @@ -20,24 +20,26 @@ #define ENGINE_H_INCLUDED #include +#include #include #include #include #include #include #include -#include #include "nnue/network.h" #include "position.h" #include "search.h" +#include "syzygy/tbprobe.h" // for Stockfish::Depth #include "thread.h" #include "tt.h" #include "ucioption.h" -#include "syzygy/tbprobe.h" // for Stockfish::Depth namespace Stockfish { +enum Square : int; + class Engine { public: using InfoShort = Search::InfoShort; @@ -50,7 +52,7 @@ class Engine { std::uint64_t perft(const std::string& fen, Depth depth, bool isChess960); // non blocking call to start searching - void go(const Search::LimitsType&); + void go(Search::LimitsType&); // non blocking call to stop searching void stop(); @@ -93,6 +95,7 @@ class Engine { Position pos; StateListPtr states; + Square capSq; OptionsMap options; ThreadPool threads; @@ -105,4 +108,4 @@ class Engine { } // namespace Stockfish -#endif // #ifndef ENGINE_H_INCLUDED \ No newline at end of file +#endif // #ifndef ENGINE_H_INCLUDED diff --git a/src/evaluate.cpp b/src/evaluate.cpp index d2dfeed5830..e4e8d9fcc80 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -67,26 +67,30 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks, bool psqtOnly = std::abs(simpleEval) > PsqtOnlyThreshold; int nnueComplexity; int v; - + + //Value nnue = smallNet + // ? networks.small.evaluate(pos, &caches.small, true, &nnueComplexity, psqtOnly) + // : networks.big.evaluate(pos, &caches.big, true, &nnueComplexity, false); + Value nnue; if (smallNet) - nnue = networks.small.evaluate(pos, nullptr, true, &nnueComplexity, psqtOnly); + nnue = networks.small.evaluate(pos, &caches.small, true, &nnueComplexity, psqtOnly); else { if (Eval::mediumNetOn) nnue = networks.medium.evaluate(pos, nullptr, true, &nnueComplexity, false); //funktioniert Cache? Nö &caches.medium else nnue = networks.big.evaluate(pos, &caches.big, true, &nnueComplexity, false); - } - const auto adjustEval = [&](int optDiv, int nnueDiv, int npmDiv, int pawnCountConstant, - int pawnCountMul, int npmConstant, int evalDiv, - int shufflingConstant, int shufflingDiv) { + } + const auto adjustEval = [&](int optDiv, int nnueDiv, int pawnCountConstant, int pawnCountMul, + int npmConstant, int evalDiv, int shufflingConstant, + int shufflingDiv) { // Blend optimism and eval with nnue complexity and material imbalance optimism += optimism * (nnueComplexity + std::abs(simpleEval - nnue)) / optDiv; nnue -= nnue * (nnueComplexity * 5 / 3) / nnueDiv; - int npm = pos.non_pawn_material() / npmDiv; + int npm = pos.non_pawn_material() / 64; v = (nnue * (npm + pawnCountConstant + pawnCountMul * pos.count()) + optimism * (npmConstant + npm)) / evalDiv; @@ -97,11 +101,11 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks, }; if (!smallNet) - adjustEval(524, 32395, 66, 942, 11, 139, 1058, 178, 204); + adjustEval(524, 32395, 942, 11, 139, 1058, 178, 204); else if (psqtOnly) - adjustEval(517, 32857, 65, 908, 7, 155, 1006, 224, 238); + adjustEval(517, 32857, 908, 7, 155, 1006, 224, 238); else - adjustEval(515, 32793, 63, 944, 9, 140, 1067, 206, 206); + adjustEval(515, 32793, 944, 9, 140, 1067, 206, 206); // SFnps Begin // if((NNUE::RandomEval) || (NNUE::WaitMs)) @@ -133,11 +137,11 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks, // Trace scores are from white's point of view std::string Eval::trace(Position& pos, const Eval::NNUE::Networks& networks) { - auto caches = std::make_unique(); - if (pos.checkers()) return "Final evaluation: none (in check)"; + auto caches = std::make_unique(networks); + std::stringstream ss; ss << std::showpoint << std::noshowpos << std::fixed << std::setprecision(2); ss << '\n' << NNUE::trace(pos, networks, *caches) << '\n'; diff --git a/src/nnue/network.cpp b/src/nnue/network.cpp index f7235c5d776..f5ac7b9f427 100644 --- a/src/nnue/network.cpp +++ b/src/nnue/network.cpp @@ -259,15 +259,19 @@ void Network::verify(std::string evalfilePath) const { exit(EXIT_FAILURE); } - sync_cout << "info string NNUE evaluation using " << evalfilePath << sync_endl; + size_t size = sizeof(*featureTransformer) + sizeof(*network) * LayerStacks; + sync_cout << "info string NNUE evaluation using " << evalfilePath << " (" + << size / (1024 * 1024) << "MiB, (" << featureTransformer->InputDimensions << ", " + << network[0]->TransformedFeatureDimensions << ", " << network[0]->FC_0_OUTPUTS + << ", " << network[0]->FC_1_OUTPUTS << ", 1))" << sync_endl; } template void Network::hint_common_access(const Position& pos, AccumulatorCaches::Cache* cache, - bool psqtOnl) const { - featureTransformer->hint_common_access(pos, cache, psqtOnl); + bool psqtOnly) const { + featureTransformer->hint_common_access(pos, cache, psqtOnly); } template diff --git a/src/nnue/network.h b/src/nnue/network.h index a4ccb2a1af5..18c5835e664 100644 --- a/src/nnue/network.h +++ b/src/nnue/network.h @@ -63,7 +63,7 @@ class Network { void hint_common_access(const Position& pos, AccumulatorCaches::Cache* cache, - bool psqtOnl) const; + bool psqtOnly) const; void verify(std::string evalfilePath) const; NnueEvalTrace trace_evaluate(const Position& pos, diff --git a/src/nnue/nnue_accumulator.h b/src/nnue/nnue_accumulator.h index b8dc96f4fab..abfd2cc8ea5 100644 --- a/src/nnue/nnue_accumulator.h +++ b/src/nnue/nnue_accumulator.h @@ -50,33 +50,36 @@ struct alignas(CacheLineSize) Accumulator { // is commonly referred to as "Finny Tables". struct AccumulatorCaches { + template + AccumulatorCaches(const Networks& networks) { + clear(networks); + } + template struct alignas(CacheLineSize) Cache { struct alignas(CacheLineSize) Entry { - BiasType accumulation[COLOR_NB][Size]; - PSQTWeightType psqtAccumulation[COLOR_NB][PSQTBuckets]; - Bitboard byColorBB[COLOR_NB][COLOR_NB]; - Bitboard byTypeBB[COLOR_NB][PIECE_TYPE_NB]; + BiasType accumulation[Size]; + PSQTWeightType psqtAccumulation[PSQTBuckets]; + Bitboard byColorBB[COLOR_NB]; + Bitboard byTypeBB[PIECE_TYPE_NB]; + bool psqtOnly; // To initialize a refresh entry, we set all its bitboards empty, // so we put the biases in the accumulation, without any weights on top void clear(const BiasType* biases) { - std::memset(byColorBB, 0, sizeof(byColorBB)); - std::memset(byTypeBB, 0, sizeof(byTypeBB)); - - std::memcpy(accumulation[WHITE], biases, Size * sizeof(BiasType)); - std::memcpy(accumulation[BLACK], biases, Size * sizeof(BiasType)); - - std::memset(psqtAccumulation, 0, sizeof(psqtAccumulation)); + std::memcpy(accumulation, biases, sizeof(accumulation)); + std::memset((uint8_t*) this + offsetof(Entry, psqtAccumulation), 0, + sizeof(Entry) - offsetof(Entry, psqtAccumulation)); } }; template void clear(const Network& network) { - for (auto& entry : entries) - entry.clear(network.featureTransformer->biases); + for (auto& entries1D : entries) + for (auto& entry : entries1D) + entry.clear(network.featureTransformer->biases); } void clear(const BiasType* biases) { @@ -84,20 +87,23 @@ struct AccumulatorCaches { entry.clear(biases); } - Entry& operator[](Square sq) { return entries[sq]; } + std::array& operator[](Square sq) { return entries[sq]; } - std::array entries; + std::array, SQUARE_NB> entries; }; template void clear(const Networks& networks) { big.clear(networks.big); + small.clear(networks.small); } // When adding a new cache for a network, i.e. the smallnet // the appropriate condition must be added to FeatureTransformer::update_accumulator_refresh. //Cache medium; - Cache big; + Cache big; + Cache medium; + Cache small; }; } // namespace Stockfish::Eval::NNUE diff --git a/src/nnue/nnue_feature_transformer.h b/src/nnue/nnue_feature_transformer.h index 88f0e4031a4..4647ecd066d 100644 --- a/src/nnue/nnue_feature_transformer.h +++ b/src/nnue/nnue_feature_transformer.h @@ -404,19 +404,25 @@ class FeatureTransformer { return {st, next}; } - // NOTE: The parameter states_to_update is an array of position states, ending with nullptr. + // NOTE: The parameter states_to_update is an array of position states. // All states must be sequential, that is states_to_update[i] must either be reachable - // by repeatedly applying ->previous from states_to_update[i+1] or - // states_to_update[i] == nullptr. + // by repeatedly applying ->previous from states_to_update[i+1]. // computed_st must be reachable by repeatedly applying ->previous on - // states_to_update[0], if not nullptr. + // states_to_update[0]. template void update_accumulator_incremental(const Position& pos, StateInfo* computed_st, StateInfo* states_to_update[N], bool psqtOnly) const { static_assert(N > 0); - assert(states_to_update[N - 1] == nullptr); + assert([&]() { + for (size_t i = 0; i < N; ++i) + { + if (states_to_update[i] == nullptr) + return false; + } + return true; + }()); #ifdef VECTOR // Gcc-10.2 unnecessarily spills AVX2 registers if this array @@ -425,11 +431,7 @@ class FeatureTransformer { psqt_vec_t psqt[NumPsqtRegs]; #endif - if (states_to_update[0] == nullptr) - return; - // Update incrementally going back through states_to_update. - // Gather all features to be updated. const Square ksq = pos.square(Perspective); @@ -437,28 +439,18 @@ class FeatureTransformer { // That might depend on the feature set and generally relies on the // feature set's update cost calculation to be correct and never allow // updates with more added/removed features than MaxActiveDimensions. - FeatureSet::IndexList removed[N - 1], added[N - 1]; + FeatureSet::IndexList removed[N], added[N]; + for (int i = N - 1; i >= 0; --i) { - int i = - N - - 2; // Last potential state to update. Skip last element because it must be nullptr. - while (states_to_update[i] == nullptr) - --i; + (states_to_update[i]->*accPtr).computed[Perspective] = !psqtOnly; + (states_to_update[i]->*accPtr).computedPSQT[Perspective] = true; - StateInfo* st2 = states_to_update[i]; + const StateInfo* end_state = i == 0 ? computed_st : states_to_update[i - 1]; - for (; i >= 0; --i) - { - (states_to_update[i]->*accPtr).computed[Perspective] = !psqtOnly; - (states_to_update[i]->*accPtr).computedPSQT[Perspective] = true; - - const StateInfo* end_state = i == 0 ? computed_st : states_to_update[i - 1]; - - for (; st2 != end_state; st2 = st2->previous) - FeatureSet::append_changed_indices(ksq, st2->dirtyPiece, - removed[i], added[i]); - } + for (StateInfo* st2 = states_to_update[i]; st2 != end_state; st2 = st2->previous) + FeatureSet::append_changed_indices(ksq, st2->dirtyPiece, removed[i], + added[i]); } StateInfo* st = computed_st; @@ -466,8 +458,7 @@ class FeatureTransformer { // Now update the accumulators listed in states_to_update[], where the last element is a sentinel. #ifdef VECTOR - if (states_to_update[1] == nullptr && (removed[0].size() == 1 || removed[0].size() == 2) - && added[0].size() == 1) + if (N == 1 && (removed[0].size() == 1 || removed[0].size() == 2) && added[0].size() == 1) { assert(states_to_update[0]); @@ -541,7 +532,7 @@ class FeatureTransformer { for (IndexType k = 0; k < NumRegs; ++k) acc[k] = vec_load(&accTileIn[k]); - for (IndexType i = 0; states_to_update[i]; ++i) + for (IndexType i = 0; i < N; ++i) { // Difference calculation for the deactivated features for (const auto index : removed[i]) @@ -578,7 +569,7 @@ class FeatureTransformer { for (std::size_t k = 0; k < NumPsqtRegs; ++k) psqt[k] = vec_load_psqt(&accTilePsqtIn[k]); - for (IndexType i = 0; states_to_update[i]; ++i) + for (IndexType i = 0; i < N; ++i) { // Difference calculation for the deactivated features for (const auto index : removed[i]) @@ -608,7 +599,7 @@ class FeatureTransformer { } } #else - for (IndexType i = 0; states_to_update[i]; ++i) + for (IndexType i = 0; i < N; ++i) { if (!psqtOnly) std::memcpy((states_to_update[i]->*accPtr).accumulation[Perspective], @@ -656,200 +647,85 @@ class FeatureTransformer { template void update_accumulator_refresh_cache(const Position& pos, - AccumulatorCaches::Cache* cache) const { + AccumulatorCaches::Cache* cache, + bool psqtOnly) const { assert(cache != nullptr); - Square ksq = pos.square(Perspective); - - auto& entry = (*cache)[ksq]; - - auto& accumulator = pos.state()->*accPtr; - accumulator.computed[Perspective] = true; - accumulator.computedPSQT[Perspective] = true; - + Square ksq = pos.square(Perspective); + auto& entry = (*cache)[ksq][Perspective]; FeatureSet::IndexList removed, added; - for (Color c : {WHITE, BLACK}) - { - for (PieceType pt = PAWN; pt <= KING; ++pt) - { - const Piece piece = make_piece(c, pt); - const Bitboard oldBB = - entry.byColorBB[Perspective][c] & entry.byTypeBB[Perspective][pt]; - const Bitboard newBB = pos.pieces(c, pt); - Bitboard toRemove = oldBB & ~newBB; - Bitboard toAdd = newBB & ~oldBB; - - while (toRemove) - { - Square sq = pop_lsb(toRemove); - removed.push_back(FeatureSet::make_index(sq, piece, ksq)); - } - while (toAdd) - { - Square sq = pop_lsb(toAdd); - added.push_back(FeatureSet::make_index(sq, piece, ksq)); - } - } - } - -#ifdef VECTOR - vec_t acc[NumRegs]; - psqt_vec_t psqt[NumPsqtRegs]; - for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j) + if (entry.psqtOnly && !psqtOnly) { - auto entryTile = - reinterpret_cast(&entry.accumulation[Perspective][j * TileHeight]); - for (IndexType k = 0; k < NumRegs; ++k) - acc[k] = entryTile[k]; - - for (int i = 0; i < int(added.size()); ++i) - { - IndexType index = added[i]; - const IndexType offset = HalfDimensions * index + j * TileHeight; - auto column = reinterpret_cast(&weights[offset]); - - for (unsigned k = 0; k < NumRegs; ++k) - acc[k] = vec_add_16(acc[k], column[k]); - } - for (int i = 0; i < int(removed.size()); ++i) - { - IndexType index = removed[i]; - const IndexType offset = HalfDimensions * index + j * TileHeight; - auto column = reinterpret_cast(&weights[offset]); - - for (unsigned k = 0; k < NumRegs; ++k) - acc[k] = vec_sub_16(acc[k], column[k]); - } - - for (IndexType k = 0; k < NumRegs; k++) - vec_store(&entryTile[k], acc[k]); + entry.clear(biases); + FeatureSet::append_active_indices(pos, added); } - - for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j) + else { - auto entryTilePsqt = reinterpret_cast( - &entry.psqtAccumulation[Perspective][j * PsqtTileHeight]); - for (std::size_t k = 0; k < NumPsqtRegs; ++k) - psqt[k] = entryTilePsqt[k]; - - for (int i = 0; i < int(added.size()); ++i) - { - IndexType index = added[i]; - const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight; - auto columnPsqt = reinterpret_cast(&psqtWeights[offset]); - - for (std::size_t k = 0; k < NumPsqtRegs; ++k) - psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]); - } - for (int i = 0; i < int(removed.size()); ++i) + for (Color c : {WHITE, BLACK}) { - IndexType index = removed[i]; - const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight; - auto columnPsqt = reinterpret_cast(&psqtWeights[offset]); + for (PieceType pt = PAWN; pt <= KING; ++pt) + { + const Piece piece = make_piece(c, pt); + const Bitboard oldBB = entry.byColorBB[c] & entry.byTypeBB[pt]; + const Bitboard newBB = pos.pieces(c, pt); + Bitboard toRemove = oldBB & ~newBB; + Bitboard toAdd = newBB & ~oldBB; - for (std::size_t k = 0; k < NumPsqtRegs; ++k) - psqt[k] = vec_sub_psqt_32(psqt[k], columnPsqt[k]); + while (toRemove) + { + Square sq = pop_lsb(toRemove); + removed.push_back(FeatureSet::make_index(sq, piece, ksq)); + } + while (toAdd) + { + Square sq = pop_lsb(toAdd); + added.push_back(FeatureSet::make_index(sq, piece, ksq)); + } + } } - - for (std::size_t k = 0; k < NumPsqtRegs; ++k) - vec_store_psqt(&entryTilePsqt[k], psqt[k]); - } - -#else - - for (const auto index : added) - { - const IndexType offset = HalfDimensions * index; - for (IndexType j = 0; j < HalfDimensions; ++j) - entry.accumulation[Perspective][j] += weights[offset + j]; - - for (std::size_t k = 0; k < PSQTBuckets; ++k) - entry.psqtAccumulation[Perspective][k] += psqtWeights[index * PSQTBuckets + k]; - } - for (const auto index : removed) - { - const IndexType offset = HalfDimensions * index; - for (IndexType j = 0; j < HalfDimensions; ++j) - entry.accumulation[Perspective][j] -= weights[offset + j]; - - for (std::size_t k = 0; k < PSQTBuckets; ++k) - entry.psqtAccumulation[Perspective][k] -= psqtWeights[index * PSQTBuckets + k]; } -#endif - - // The accumulator of the refresh entry has been updated. - // Now copy its content to the actual accumulator we were refreshing - - std::memcpy(accumulator.psqtAccumulation[Perspective], entry.psqtAccumulation[Perspective], - sizeof(int32_t) * PSQTBuckets); - - std::memcpy(accumulator.accumulation[Perspective], entry.accumulation[Perspective], - sizeof(BiasType) * HalfDimensions); - - for (Color c : {WHITE, BLACK}) - entry.byColorBB[Perspective][c] = pos.pieces(c); - - for (PieceType pt = PAWN; pt <= KING; ++pt) - entry.byTypeBB[Perspective][pt] = pos.pieces(pt); - } - - template - void - update_accumulator_refresh(const Position& pos, - [[maybe_unused]] AccumulatorCaches::Cache* cache, - bool psqtOnly) const { - - // When we are refreshing the accumulator of the big net, - // redirect to the version of refresh that uses the refresh table. - // Using the cache for the small net is not beneficial. - if constexpr (HalfDimensions == Eval::NNUE::TransformedFeatureDimensionsBig) - { - update_accumulator_refresh_cache(pos, cache); - return; - } - -#ifdef VECTOR - // Gcc-10.2 unnecessarily spills AVX2 registers if this array - // is defined in the VECTOR code below, once in each branch - vec_t acc[NumRegs]; - psqt_vec_t psqt[NumPsqtRegs]; -#endif - - // Refresh the accumulator - // Could be extracted to a separate function because it's done in 2 places, - // but it's unclear if compilers would correctly handle register allocation. auto& accumulator = pos.state()->*accPtr; accumulator.computed[Perspective] = !psqtOnly; accumulator.computedPSQT[Perspective] = true; - FeatureSet::IndexList active; - FeatureSet::append_active_indices(pos, active); #ifdef VECTOR + vec_t acc[NumRegs]; + psqt_vec_t psqt[NumPsqtRegs]; + if (!psqtOnly) for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j) { - auto biasesTile = reinterpret_cast(&biases[j * TileHeight]); + auto entryTile = reinterpret_cast(&entry.accumulation[j * TileHeight]); for (IndexType k = 0; k < NumRegs; ++k) - acc[k] = biasesTile[k]; + acc[k] = entryTile[k]; - int i = 0; - for (; i < int(active.size()) - 1; i += 2) + int i0 = 0; + for (; i0 < int(std::min(removed.size(), added.size())); ++i0) { - IndexType index0 = active[i]; - IndexType index1 = active[i + 1]; - const IndexType offset0 = HalfDimensions * index0 + j * TileHeight; - const IndexType offset1 = HalfDimensions * index1 + j * TileHeight; - auto column0 = reinterpret_cast(&weights[offset0]); - auto column1 = reinterpret_cast(&weights[offset1]); + IndexType indexR = removed[i0]; + const IndexType offsetR = HalfDimensions * indexR + j * TileHeight; + auto columnR = reinterpret_cast(&weights[offsetR]); + IndexType indexA = added[i0]; + const IndexType offsetA = HalfDimensions * indexA + j * TileHeight; + auto columnA = reinterpret_cast(&weights[offsetA]); for (unsigned k = 0; k < NumRegs; ++k) - acc[k] = vec_add_16(acc[k], vec_add_16(column0[k], column1[k])); + acc[k] = vec_add_16(vec_sub_16(acc[k], columnR[k]), columnA[k]); } - for (; i < int(active.size()); ++i) + for (int i = i0; i < int(removed.size()); ++i) { - IndexType index = active[i]; + IndexType index = removed[i]; + const IndexType offset = HalfDimensions * index + j * TileHeight; + auto column = reinterpret_cast(&weights[offset]); + + for (unsigned k = 0; k < NumRegs; ++k) + acc[k] = vec_sub_16(acc[k], column[k]); + } + for (int i = i0; i < int(added.size()); ++i) + { + IndexType index = added[i]; const IndexType offset = HalfDimensions * index + j * TileHeight; auto column = reinterpret_cast(&weights[offset]); @@ -857,34 +733,29 @@ class FeatureTransformer { acc[k] = vec_add_16(acc[k], column[k]); } - auto accTile = - reinterpret_cast(&accumulator.accumulation[Perspective][j * TileHeight]); - for (unsigned k = 0; k < NumRegs; k++) - vec_store(&accTile[k], acc[k]); + for (IndexType k = 0; k < NumRegs; k++) + vec_store(&entryTile[k], acc[k]); } for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j) { + auto entryTilePsqt = + reinterpret_cast(&entry.psqtAccumulation[j * PsqtTileHeight]); for (std::size_t k = 0; k < NumPsqtRegs; ++k) - psqt[k] = vec_zero_psqt(); + psqt[k] = entryTilePsqt[k]; - int i = 0; - for (; i < int(active.size()) - 1; i += 2) + for (int i = 0; i < int(removed.size()); ++i) { - IndexType index0 = active[i]; - IndexType index1 = active[i + 1]; - const IndexType offset0 = PSQTBuckets * index0 + j * PsqtTileHeight; - const IndexType offset1 = PSQTBuckets * index1 + j * PsqtTileHeight; - auto columnPsqt0 = reinterpret_cast(&psqtWeights[offset0]); - auto columnPsqt1 = reinterpret_cast(&psqtWeights[offset1]); + IndexType index = removed[i]; + const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight; + auto columnPsqt = reinterpret_cast(&psqtWeights[offset]); for (std::size_t k = 0; k < NumPsqtRegs; ++k) - psqt[k] = - vec_add_psqt_32(psqt[k], vec_add_psqt_32(columnPsqt0[k], columnPsqt1[k])); + psqt[k] = vec_sub_psqt_32(psqt[k], columnPsqt[k]); } - for (; i < int(active.size()); ++i) + for (int i = 0; i < int(added.size()); ++i) { - IndexType index = active[i]; + IndexType index = added[i]; const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight; auto columnPsqt = reinterpret_cast(&psqtWeights[offset]); @@ -892,34 +763,56 @@ class FeatureTransformer { psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]); } - auto accTilePsqt = reinterpret_cast( - &accumulator.psqtAccumulation[Perspective][j * PsqtTileHeight]); for (std::size_t k = 0; k < NumPsqtRegs; ++k) - vec_store_psqt(&accTilePsqt[k], psqt[k]); + vec_store_psqt(&entryTilePsqt[k], psqt[k]); } #else - if (!psqtOnly) - std::memcpy(accumulator.accumulation[Perspective], biases, - HalfDimensions * sizeof(BiasType)); - for (std::size_t k = 0; k < PSQTBuckets; ++k) - accumulator.psqtAccumulation[Perspective][k] = 0; + for (const auto index : removed) + { + if (!psqtOnly) + { + const IndexType offset = HalfDimensions * index; + for (IndexType j = 0; j < HalfDimensions; ++j) + entry.accumulation[j] -= weights[offset + j]; + } - for (const auto index : active) + for (std::size_t k = 0; k < PSQTBuckets; ++k) + entry.psqtAccumulation[k] -= psqtWeights[index * PSQTBuckets + k]; + } + for (const auto index : added) { if (!psqtOnly) { const IndexType offset = HalfDimensions * index; for (IndexType j = 0; j < HalfDimensions; ++j) - accumulator.accumulation[Perspective][j] += weights[offset + j]; + entry.accumulation[j] += weights[offset + j]; } for (std::size_t k = 0; k < PSQTBuckets; ++k) - accumulator.psqtAccumulation[Perspective][k] += - psqtWeights[index * PSQTBuckets + k]; + entry.psqtAccumulation[k] += psqtWeights[index * PSQTBuckets + k]; } + #endif + + // The accumulator of the refresh entry has been updated. + // Now copy its content to the actual accumulator we were refreshing + + if (!psqtOnly) + std::memcpy(accumulator.accumulation[Perspective], entry.accumulation, + sizeof(BiasType) * HalfDimensions); + + std::memcpy(accumulator.psqtAccumulation[Perspective], entry.psqtAccumulation, + sizeof(int32_t) * PSQTBuckets); + + for (Color c : {WHITE, BLACK}) + entry.byColorBB[c] = pos.pieces(c); + + for (PieceType pt = PAWN; pt <= KING; ++pt) + entry.byTypeBB[pt] = pos.pieces(pt); + + entry.psqtOnly = psqtOnly; } template @@ -943,12 +836,12 @@ class FeatureTransformer { || (psqtOnly && (oldest_st->*accPtr).computedPSQT[Perspective])) { // Only update current position accumulator to minimize work. - StateInfo* states_to_update[2] = {pos.state(), nullptr}; - update_accumulator_incremental(pos, oldest_st, states_to_update, + StateInfo* states_to_update[1] = {pos.state()}; + update_accumulator_incremental(pos, oldest_st, states_to_update, psqtOnly); } else - update_accumulator_refresh(pos, cache, psqtOnly); + update_accumulator_refresh_cache(pos, cache, psqtOnly); } template @@ -969,14 +862,23 @@ class FeatureTransformer { // 1. for the current position // 2. the next accumulator after the computed one // The heuristic may change in the future. - StateInfo* states_to_update[3] = {next, next == pos.state() ? nullptr : pos.state(), - nullptr}; + if (next == pos.state()) + { + StateInfo* states_to_update[1] = {next}; - update_accumulator_incremental(pos, oldest_st, states_to_update, - psqtOnly); + update_accumulator_incremental(pos, oldest_st, states_to_update, + psqtOnly); + } + else + { + StateInfo* states_to_update[2] = {next, pos.state()}; + + update_accumulator_incremental(pos, oldest_st, states_to_update, + psqtOnly); + } } else - update_accumulator_refresh(pos, cache, psqtOnly); + update_accumulator_refresh_cache(pos, cache, psqtOnly); } template diff --git a/src/nnue/nnue_misc.cpp b/src/nnue/nnue_misc.cpp index 18abc8d6a17..859dab97990 100644 --- a/src/nnue/nnue_misc.cpp +++ b/src/nnue/nnue_misc.cpp @@ -48,7 +48,7 @@ void hint_common_parent_position(const Position& pos, int simpleEvalAbs = std::abs(simple_eval(pos, pos.side_to_move())); if (simpleEvalAbs > Eval::SmallNetThreshold) - networks.small.hint_common_access(pos, nullptr, simpleEvalAbs > Eval::PsqtOnlyThreshold); + networks.small.hint_common_access(pos, &caches.small, simpleEvalAbs > Eval::PsqtOnlyThreshold); else if (Stockfish::Eval::mediumNetOn) networks.medium.hint_common_access(pos, nullptr, false); //funktioniert Cache? Nein &caches.medium, else diff --git a/src/search.cpp b/src/search.cpp index 0413c656e6e..41d72d08ac2 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -54,8 +54,8 @@ using namespace Search; namespace { -static constexpr double EvalLevel[10] = {1.043, 1.017, 0.952, 1.009, 0.971, - 1.002, 0.992, 0.947, 1.046, 1.001}; +static constexpr double EvalLevel[10] = {0.981, 0.956, 0.895, 0.949, 0.913, + 0.942, 0.933, 0.890, 0.984, 0.941}; // Futility margin Value futility_margin(Depth d, bool noTtCutNode, bool improving, bool oppWorsening) { @@ -137,19 +137,16 @@ Search::Worker::Worker(SharedState& sharedState, // Unpack the SharedState struct into member variables thread_idx(thread_id), manager(std::move(sm)), - refreshTable(), options(sharedState.options), threads(sharedState.threads), tt(sharedState.tt), - networks(sharedState.networks) { + networks(sharedState.networks), + refreshTable(networks) { clear(); } void Search::Worker::start_searching() { - // Initialize accumulator refresh entries - refreshTable.clear(networks); - // Non-main threads go directly to iterative_deepening() if (!is_mainthread()) { @@ -459,9 +456,10 @@ void Search::Worker::iterative_deepening() { double reduction = (1.48 + mainThread->previousTimeReduction) / (2.17 * timeReduction); double bestMoveInstability = 1 + 1.88 * totBestMoveChanges / threads.size(); int el = std::clamp((bestValue + 750) / 150, 0, 9); + double recapture = limits.capSq == rootMoves[0].pv[0].to_sq() ? 0.955 : 1.005; double totalTime = mainThread->tm.optimum() * fallingEval * reduction - * bestMoveInstability * EvalLevel[el]; + * bestMoveInstability * EvalLevel[el] * recapture; // Cap used time in case of a single legal move for a better viewer experience if (rootMoves.size() == 1) @@ -518,6 +516,8 @@ void Search::Worker::clear() { for (size_t i = 1; i < reductions.size(); ++i) reductions[i] = int((20.14 + std::log(size_t(options["Threads"])) / 2) * std::log(i)); + + refreshTable.clear(networks); } @@ -979,20 +979,22 @@ Value Search::Worker::search( if (capture || givesCheck) { + Piece capturedPiece = pos.piece_on(move.to_sq()); + int captHist = + thisThread->captureHistory[movedPiece][move.to_sq()][type_of(capturedPiece)]; + // Futility pruning for captures (~2 Elo) if (!givesCheck && lmrDepth < 7 && !ss->inCheck) { - Piece capturedPiece = pos.piece_on(move.to_sq()); - Value futilityValue = - ss->staticEval + 285 + 277 * lmrDepth + PieceValue[capturedPiece] - + thisThread->captureHistory[movedPiece][move.to_sq()][type_of(capturedPiece)] - / 7; + Value futilityValue = ss->staticEval + 285 + 277 * lmrDepth + + PieceValue[capturedPiece] + captHist / 7; if (futilityValue <= alpha) continue; } // SEE based pruning for captures and checks (~11 Elo) - if (!pos.see_ge(move, -203 * depth)) + int seeHist = std::clamp(captHist / 32, -199 * depth, 199 * depth); + if (!pos.see_ge(move, -203 * depth - seeHist)) continue; } else @@ -1079,7 +1081,12 @@ Value Search::Worker::search( // we assume this expected cut-node is not singular (multiple moves fail high), // and we can prune the whole subtree by returning a softbound. else if (singularBeta >= beta) + { + if (!ttCapture) + update_quiet_stats(pos, ss, *this, ttMove, -stat_malus(depth)); + return singularBeta; + } // Negative extensions // If other moves failed high over (ttValue - margin) without the ttMove on a reduced search, @@ -1130,6 +1137,9 @@ Value Search::Worker::search( if (ss->ttPv) r -= 1 + (ttValue > alpha) + (tte->depth() >= depth); + else if (cutNode && move != ttMove && move != ss->killers[0]) + r++; + // Increase reduction for cut nodes (~4 Elo) if (cutNode) r += 2 - (tte->depth() >= depth && ss->ttPv); diff --git a/src/search.h b/src/search.h index 0fd778b47e6..444e3b8bb1d 100644 --- a/src/search.h +++ b/src/search.h @@ -109,8 +109,7 @@ struct RootMove { using RootMoves = std::vector; -// LimitsType struct stores information sent by GUI about available time to -// search the current move, maximum depth/time, or if we are in analysis mode. +// LimitsType struct stores information sent by the caller about the analysis required. struct LimitsType { // Init explicitly due to broken value-initialization of non POD in MSVC @@ -128,6 +127,7 @@ struct LimitsType { int movestogo, depth, mate, perft, infinite; uint64_t nodes; bool ponderMode; + Square capSq; }; @@ -302,15 +302,14 @@ class Worker { Tablebases::Config tbConfig; - // Used by NNUE - - Eval::NNUE::AccumulatorCaches refreshTable; - const OptionsMap& options; ThreadPool& threads; TranspositionTable& tt; const Eval::NNUE::Networks& networks; + // Used by NNUE + Eval::NNUE::AccumulatorCaches refreshTable; + friend class Stockfish::ThreadPool; friend class SearchManager; };