From bdf6b44a7b82026caca84cfb20ea415e653c8f7b Mon Sep 17 00:00:00 2001 From: mstembera Date: Sat, 2 Dec 2023 17:50:32 -0800 Subject: [PATCH 1/7] Dual net NNUE bench: 1449578 --- src/Makefile | 3 + src/evaluate.cpp | 167 +++++++++++++++------------- src/evaluate.h | 6 +- src/nnue/evaluate_nnue.cpp | 127 +++++++++++++-------- src/nnue/evaluate_nnue.h | 19 ++-- src/nnue/nnue_accumulator.h | 4 +- src/nnue/nnue_architecture.h | 33 ++++-- src/nnue/nnue_feature_transformer.h | 108 ++++++++++-------- src/position.cpp | 14 ++- src/position.h | 24 +++- src/thread.cpp | 2 +- src/uci.cpp | 2 +- src/ucioption.cpp | 3 +- 13 files changed, 310 insertions(+), 202 deletions(-) diff --git a/src/Makefile b/src/Makefile index 59ea7bfe7b5..68aecc21168 100644 --- a/src/Makefile +++ b/src/Makefile @@ -905,6 +905,7 @@ profileclean: @rm -f stockfish.res @rm -f ./-lstdc++.res +ifneq ("x","x") # set up shell variables for the net stuff netvariables: $(eval nnuenet := $(shell grep EvalFileDefaultName evaluate.h | grep define | sed 's/.*\(nn-[a-z0-9]\{12\}.nnue\).*/\1/')) @@ -951,6 +952,8 @@ net: netvariables fi; \ fi; \ +endif + format: $(CLANG-FORMAT) -i $(SRCS) $(HEADERS) -style=file diff --git a/src/evaluate.cpp b/src/evaluate.cpp index 9c39d4c07fb..007275ee676 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -43,11 +43,15 @@ // const unsigned int gEmbeddedNNUESize; // the size of the embedded file // Note that this does not work in Microsoft Visual Studio. #if !defined(_MSC_VER) && !defined(NNUE_EMBEDDING_OFF) -INCBIN(EmbeddedNNUE, EvalFileDefaultName); +INCBIN(EmbeddedNNUEBig, EvalFileDefaultNameBig); +INCBIN(EmbeddedNNUESmall, EvalFileDefaultNameSmall); #else -const unsigned char gEmbeddedNNUEData[1] = {0x0}; -const unsigned char* const gEmbeddedNNUEEnd = &gEmbeddedNNUEData[1]; -const unsigned int gEmbeddedNNUESize = 1; +const unsigned char gEmbeddedNNUEBigData[1] = {0x0}; +const unsigned char* const gEmbeddedNNUEBigEnd = &gEmbeddedNNUEBigData[1]; +const unsigned int gEmbeddedNNUEBigSize = 1; +const unsigned char gEmbeddedNNUESmallData[1] = {0x0}; +const unsigned char* const gEmbeddedNNUESmallEnd = &gEmbeddedNNUESmallData[1]; +const unsigned int gEmbeddedNNUESmallSize = 1; #endif @@ -55,7 +59,9 @@ namespace Stockfish { namespace Eval { -std::string currentEvalFileName = "None"; +std::string currentEvalFileName[2] = {"None", "None"}; +const std::string EvFiles[2] = {"EvalFileBig", "EvalFileSmall"}; +const std::string EvFileNames[2] = {EvalFileDefaultNameBig, EvalFileDefaultNameSmall}; // Tries to load a NNUE network at startup time, or when the engine // receives a UCI command "setoption name EvalFile value nn-[a-z0-9]{12}.nnue" @@ -66,9 +72,11 @@ std::string currentEvalFileName = "None"; // variable to have the engine search in a special directory in their distro. void NNUE::init() { - std::string eval_file = std::string(Options["EvalFile"]); - if (eval_file.empty()) - eval_file = EvalFileDefaultName; + for (bool small : {false, true}) + { + std::string eval_file = std::string(Options[EvFiles[small]]); + if (eval_file.empty()) + eval_file = EvFileNames[small]; #if defined(DEFAULT_NNUE_DIRECTORY) std::vector dirs = {"", "", CommandLine::binaryDirectory, @@ -77,82 +85,79 @@ void NNUE::init() { std::vector dirs = {"", "", CommandLine::binaryDirectory}; #endif - for (const std::string& directory : dirs) - if (currentEvalFileName != eval_file) + for (const std::string& directory : dirs) { - if (directory != "") + if (currentEvalFileName[small] != eval_file) { - std::ifstream stream(directory + eval_file, std::ios::binary); - if (NNUE::load_eval(eval_file, stream)) - currentEvalFileName = eval_file; - } - - if (directory == "" && eval_file == EvalFileDefaultName) - { - // C++ way to prepare a buffer for a memory stream - class MemoryBuffer: public std::basic_streambuf { - public: - MemoryBuffer(char* p, size_t n) { - setg(p, p, p + n); - setp(p, p + n); - } - }; - - MemoryBuffer buffer( - const_cast(reinterpret_cast(gEmbeddedNNUEData)), - size_t(gEmbeddedNNUESize)); - (void) gEmbeddedNNUEEnd; // Silence warning on unused variable - - std::istream stream(&buffer); - if (NNUE::load_eval(eval_file, stream)) - currentEvalFileName = eval_file; + if (directory != "") + { + std::ifstream stream(directory + eval_file, std::ios::binary); + if (NNUE::load_eval(eval_file, stream, small)) + currentEvalFileName[small] = eval_file; + } + + if (directory == "" && eval_file == EvFileNames[small]) + { + // C++ way to prepare a buffer for a memory stream + class MemoryBuffer: public std::basic_streambuf { + public: + MemoryBuffer(char* p, size_t n) { + setg(p, p, p + n); + setp(p, p + n); + } + }; + + MemoryBuffer buffer( + const_cast(reinterpret_cast( + small ? gEmbeddedNNUESmallData : gEmbeddedNNUEBigData)), + size_t(small ? gEmbeddedNNUESmallSize : gEmbeddedNNUEBigSize)); + (void) gEmbeddedNNUEBigEnd; // Silence warning on unused variable + (void) gEmbeddedNNUESmallEnd; + + std::istream stream(&buffer); + if (NNUE::load_eval(eval_file, stream, small)) + currentEvalFileName[small] = eval_file; + } } } + } } // Verifies that the last net used was loaded successfully void NNUE::verify() { - std::string eval_file = std::string(Options["EvalFile"]); - if (eval_file.empty()) - eval_file = EvalFileDefaultName; - - if (currentEvalFileName != eval_file) + for (bool small : {false, true}) { + std::string eval_file = std::string(Options[EvFiles[small]]); + if (eval_file.empty()) + eval_file = EvFileNames[small]; - std::string msg1 = - "Network evaluation parameters compatible with the engine must be available."; - std::string msg2 = "The network file " + eval_file + " was not loaded successfully."; - std::string msg3 = "The UCI option EvalFile might need to specify the full path, " - "including the directory name, to the network file."; - std::string msg4 = "The default net can be downloaded from: " - "https://tests.stockfishchess.org/api/nn/" - + std::string(EvalFileDefaultName); - std::string msg5 = "The engine will be terminated now."; - - sync_cout << "info string ERROR: " << msg1 << sync_endl; - sync_cout << "info string ERROR: " << msg2 << sync_endl; - sync_cout << "info string ERROR: " << msg3 << sync_endl; - sync_cout << "info string ERROR: " << msg4 << sync_endl; - sync_cout << "info string ERROR: " << msg5 << sync_endl; - - exit(EXIT_FAILURE); - } + if (currentEvalFileName[small] != eval_file) + { + std::string msg1 = + "Network evaluation parameters compatible with the engine must be available."; + std::string msg2 = "The network file " + eval_file + " was not loaded successfully."; + std::string msg3 = "The UCI option EvalFile might need to specify the full path, " + "including the directory name, to the network file."; + std::string msg4 = "The default net can be downloaded from: " + "https://tests.stockfishchess.org/api/nn/" + + std::string(EvFileNames[small]); + std::string msg5 = "The engine will be terminated now."; + + sync_cout << "info string ERROR: " << msg1 << sync_endl; + sync_cout << "info string ERROR: " << msg2 << sync_endl; + sync_cout << "info string ERROR: " << msg3 << sync_endl; + sync_cout << "info string ERROR: " << msg4 << sync_endl; + sync_cout << "info string ERROR: " << msg5 << sync_endl; + + exit(EXIT_FAILURE); + } - sync_cout << "info string NNUE evaluation using " << eval_file << sync_endl; -} + sync_cout << "info string NNUE evaluation using " << eval_file << sync_endl; + } } - - -// Returns a static, purely materialistic evaluation of the position from -// the point of view of the given color. It can be divided by PawnValue to get -// an approximation of the material advantage on the board in terms of pawns. -Value Eval::simple_eval(const Position& pos, Color c) { - return PawnValue * (pos.count(c) - pos.count(~c)) - + (pos.non_pawn_material(c) - pos.non_pawn_material(~c)); } - // Evaluate is the evaluator for the outer world. It returns a static evaluation // of the position from the point of view of the side to move. Value Eval::evaluate(const Position& pos) { @@ -162,18 +167,28 @@ Value Eval::evaluate(const Position& pos) { Value v; Color stm = pos.side_to_move(); int shuffling = pos.rule50_count(); - int simpleEval = simple_eval(pos, stm) + (int(pos.key() & 7) - 3); + int simpleEval = pos.simple_eval() + (int(pos.key() & 7) - 3); + + int lazyThreshold = RookValue + KnightValue + 16 * shuffling * shuffling + + abs(pos.this_thread()->bestValue) + + abs(pos.this_thread()->rootSimpleEval); - bool lazy = abs(simpleEval) >= RookValue + KnightValue + 16 * shuffling * shuffling - + abs(pos.this_thread()->bestValue) - + abs(pos.this_thread()->rootSimpleEval); + bool lazy = abs(simpleEval) > lazyThreshold * 105 / 100; if (lazy) v = Value(simpleEval); else { - int nnueComplexity; - Value nnue = NNUE::evaluate(pos, true, &nnueComplexity); + int accBias = pos.state()->accumulatorBig.computed[0] + + pos.state()->accumulatorBig.computed[1] + - pos.state()->accumulatorSmall.computed[0] + - pos.state()->accumulatorSmall.computed[1]; + + int nnueComplexity; + bool smallNet = abs(simpleEval) > lazyThreshold * (90 + accBias) / 100; + + Value nnue = smallNet ? NNUE::evaluate(pos, true, &nnueComplexity) + : NNUE::evaluate(pos, true, &nnueComplexity); Value optimism = pos.this_thread()->optimism[stm]; @@ -216,7 +231,7 @@ std::string Eval::trace(Position& pos) { ss << std::showpoint << std::showpos << std::fixed << std::setprecision(2) << std::setw(15); Value v; - v = NNUE::evaluate(pos, false); + v = NNUE::evaluate(pos, false); v = pos.side_to_move() == WHITE ? v : -v; ss << "NNUE evaluation " << 0.01 * UCI::to_cp(v) << " (white side)\n"; diff --git a/src/evaluate.h b/src/evaluate.h index 2ab477eced2..48ca596bc05 100644 --- a/src/evaluate.h +++ b/src/evaluate.h @@ -31,15 +31,15 @@ namespace Eval { std::string trace(Position& pos); -Value simple_eval(const Position& pos, Color c); Value evaluate(const Position& pos); -extern std::string currentEvalFileName; +extern std::string currentEvalFileName[2]; // The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue // for the build process (profile-build and fishtest) to work. Do not change the // name of the macro, as it is used in the Makefile. -#define EvalFileDefaultName "nn-0000000000a0.nnue" +#define EvalFileDefaultNameBig "nn-0000000000a0.nnue" +#define EvalFileDefaultNameSmall "nn-a70fe1969e12.nnue" namespace NNUE { diff --git a/src/nnue/evaluate_nnue.cpp b/src/nnue/evaluate_nnue.cpp index ef6b7e91a60..26ce4bdcde2 100644 --- a/src/nnue/evaluate_nnue.cpp +++ b/src/nnue/evaluate_nnue.cpp @@ -40,14 +40,16 @@ namespace Stockfish::Eval::NNUE { // Input feature converter -LargePagePtr featureTransformer; +LargePagePtr> featureTransformerBig; +LargePagePtr> featureTransformerSmall; // Evaluation function -AlignedPtr network[LayerStacks]; +AlignedPtr> networkBig[LayerStacks]; +AlignedPtr> networkSmall[LayerStacks]; -// Evaluation function file name -std::string fileName; -std::string netDescription; +// Evaluation function file names +std::string fileName[2]; +std::string netDescription[2]; namespace Detail { @@ -91,11 +93,20 @@ bool write_parameters(std::ostream& stream, const T& reference) { // Initialize the evaluation function parameters -static void initialize() { +static void initialize(bool small) { - Detail::initialize(featureTransformer); - for (std::size_t i = 0; i < LayerStacks; ++i) - Detail::initialize(network[i]); + if (small) + { + Detail::initialize(featureTransformerSmall); + for (std::size_t i = 0; i < LayerStacks; ++i) + Detail::initialize(networkSmall[i]); + } + else + { + Detail::initialize(featureTransformerBig); + for (std::size_t i = 0; i < LayerStacks; ++i) + Detail::initialize(networkBig[i]); + } } // Read network header @@ -122,39 +133,57 @@ static bool write_header(std::ostream& stream, std::uint32_t hashValue, const st } // Read network parameters -static bool read_parameters(std::istream& stream) { +static bool read_parameters(std::istream& stream, bool small) { std::uint32_t hashValue; - if (!read_header(stream, &hashValue, &netDescription)) + if (!read_header(stream, &hashValue, &netDescription[small])) + return false; + if (hashValue != HashValue[small]) return false; - if (hashValue != HashValue) + if (!small && !Detail::read_parameters(stream, *featureTransformerBig)) return false; - if (!Detail::read_parameters(stream, *featureTransformer)) + if ( small && !Detail::read_parameters(stream, *featureTransformerSmall)) return false; for (std::size_t i = 0; i < LayerStacks; ++i) - if (!Detail::read_parameters(stream, *(network[i]))) + { + if (!small && !Detail::read_parameters(stream, *(networkBig[i]))) + return false; + if ( small && !Detail::read_parameters(stream, *(networkSmall[i]))) return false; + } return stream && stream.peek() == std::ios::traits_type::eof(); } // Write network parameters -static bool write_parameters(std::ostream& stream) { +static bool write_parameters(std::ostream& stream, bool small) { - if (!write_header(stream, HashValue, netDescription)) + if (!write_header(stream, HashValue[small], netDescription[small])) + return false; + if (!small && !Detail::write_parameters(stream, *featureTransformerBig)) return false; - if (!Detail::write_parameters(stream, *featureTransformer)) + if (small && !Detail::write_parameters(stream, *featureTransformerSmall)) return false; for (std::size_t i = 0; i < LayerStacks; ++i) - if (!Detail::write_parameters(stream, *(network[i]))) + { + if (!small && !Detail::write_parameters(stream, *(networkBig[i]))) return false; + if (small && !Detail::write_parameters(stream, *(networkSmall[i]))) + return false; + } return bool(stream); } void hint_common_parent_position(const Position& pos) { - featureTransformer->hint_common_access(pos); + + int simpleEval = pos.simple_eval(); + if (abs(simpleEval) < 2500) + featureTransformerBig->hint_common_access(pos); + else + featureTransformerSmall->hint_common_access(pos); } // Evaluation function. Perform differential calculation. +template Value evaluate(const Position& pos, bool adjusted, int* complexity) { // We manually align the arrays on the stack because with gcc < 9.3 @@ -165,19 +194,24 @@ Value evaluate(const Position& pos, bool adjusted, int* complexity) { #if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN) TransformedFeatureType - transformedFeaturesUnaligned[FeatureTransformer::BufferSize - + alignment / sizeof(TransformedFeatureType)]; + transformedFeaturesUnaligned[ + FeatureTransformer::BufferSize + + alignment / sizeof(TransformedFeatureType)]; auto* transformedFeatures = align_ptr_up(&transformedFeaturesUnaligned[0]); #else - alignas(alignment) TransformedFeatureType transformedFeatures[FeatureTransformer::BufferSize]; + + alignas(alignment) TransformedFeatureType transformedFeatures[ + FeatureTransformer::BufferSize]; #endif ASSERT_ALIGNED(transformedFeatures, alignment); const int bucket = (pos.count() - 1) / 4; - const auto psqt = featureTransformer->transform(pos, transformedFeatures, bucket); - const auto positional = network[bucket]->propagate(transformedFeatures); + const auto psqt = Small ? featureTransformerSmall->transform(pos, transformedFeatures, bucket) + : featureTransformerBig->transform(pos, transformedFeatures, bucket); + const auto positional = Small ? networkSmall[bucket]->propagate(transformedFeatures) + : networkBig[bucket]->propagate(transformedFeatures); if (complexity) *complexity = abs(psqt - positional) / OutputScale; @@ -190,6 +224,9 @@ Value evaluate(const Position& pos, bool adjusted, int* complexity) { return static_cast((psqt + positional) / OutputScale); } +template Value evaluate(const Position& pos, bool adjusted, int* complexity); +template Value evaluate(const Position& pos, bool adjusted, int* complexity); + struct NnueEvalTrace { static_assert(LayerStacks == PSQTBuckets); @@ -206,12 +243,12 @@ static NnueEvalTrace trace_evaluate(const Position& pos) { #if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN) TransformedFeatureType - transformedFeaturesUnaligned[FeatureTransformer::BufferSize + transformedFeaturesUnaligned[FeatureTransformer::BufferSize + alignment / sizeof(TransformedFeatureType)]; auto* transformedFeatures = align_ptr_up(&transformedFeaturesUnaligned[0]); #else - alignas(alignment) TransformedFeatureType transformedFeatures[FeatureTransformer::BufferSize]; + alignas(alignment) TransformedFeatureType transformedFeatures[FeatureTransformer::BufferSize]; #endif ASSERT_ALIGNED(transformedFeatures, alignment); @@ -220,8 +257,8 @@ static NnueEvalTrace trace_evaluate(const Position& pos) { t.correctBucket = (pos.count() - 1) / 4; for (IndexType bucket = 0; bucket < LayerStacks; ++bucket) { - const auto materialist = featureTransformer->transform(pos, transformedFeatures, bucket); - const auto positional = network[bucket]->propagate(transformedFeatures); + const auto materialist = featureTransformerBig->transform(pos, transformedFeatures, bucket); + const auto positional = networkBig[bucket]->propagate(transformedFeatures); t.psqt[bucket] = static_cast(materialist / OutputScale); t.positional[bucket] = static_cast(positional / OutputScale); @@ -310,7 +347,7 @@ std::string trace(Position& pos) { // We estimate the value of each piece by doing a differential evaluation from // the current base eval, simulating the removal of the piece from its square. - Value base = evaluate(pos); + Value base = evaluate(pos); base = pos.side_to_move() == WHITE ? base : -base; for (File f = FILE_A; f <= FILE_H; ++f) @@ -325,16 +362,16 @@ std::string trace(Position& pos) { auto st = pos.state(); pos.remove_piece(sq); - st->accumulator.computed[WHITE] = false; - st->accumulator.computed[BLACK] = false; + st->accumulatorBig.computed[WHITE] = false; + st->accumulatorBig.computed[BLACK] = false; - Value eval = evaluate(pos); + Value eval = evaluate(pos); eval = pos.side_to_move() == WHITE ? eval : -eval; v = base - eval; pos.put_piece(pc, sq); - st->accumulator.computed[WHITE] = false; - st->accumulator.computed[BLACK] = false; + st->accumulatorBig.computed[WHITE] = false; + st->accumulatorBig.computed[BLACK] = false; } writeSquare(f, r, pc, v); @@ -379,24 +416,24 @@ std::string trace(Position& pos) { // Load eval, from a file stream or a memory stream -bool load_eval(std::string name, std::istream& stream) { +bool load_eval(const std::string name, std::istream& stream, bool small) { - initialize(); - fileName = name; - return read_parameters(stream); + initialize(small); + fileName[small] = name; + return read_parameters(stream, small); } // Save eval, to a file stream or a memory stream -bool save_eval(std::ostream& stream) { +bool save_eval(std::ostream& stream, bool small) { - if (fileName.empty()) + if (fileName[small].empty()) return false; - return write_parameters(stream); + return write_parameters(stream, small); } // Save eval, to a file given by its name -bool save_eval(const std::optional& filename) { +bool save_eval(const std::optional& filename, bool small) { std::string actualFilename; std::string msg; @@ -405,7 +442,7 @@ bool save_eval(const std::optional& filename) { actualFilename = filename.value(); else { - if (currentEvalFileName != EvalFileDefaultName) + if (currentEvalFileName[small] != (small ? EvalFileDefaultNameSmall : EvalFileDefaultNameBig)) { msg = "Failed to export a net. " "A non-embedded net can only be saved if the filename is specified"; @@ -413,11 +450,11 @@ bool save_eval(const std::optional& filename) { sync_cout << msg << sync_endl; return false; } - actualFilename = EvalFileDefaultName; + actualFilename = (small ? EvalFileDefaultNameSmall : EvalFileDefaultNameBig); } std::ofstream stream(actualFilename, std::ios_base::binary); - bool saved = save_eval(stream); + bool saved = save_eval(stream, small); msg = saved ? "Network saved successfully to " + actualFilename : "Failed to export a net"; diff --git a/src/nnue/evaluate_nnue.h b/src/nnue/evaluate_nnue.h index 6edc212f4d7..e5367283e14 100644 --- a/src/nnue/evaluate_nnue.h +++ b/src/nnue/evaluate_nnue.h @@ -39,9 +39,11 @@ enum Value : int; namespace Stockfish::Eval::NNUE { // Hash value of evaluation function structure -constexpr std::uint32_t HashValue = - FeatureTransformer::get_hash_value() ^ Network::get_hash_value(); - +constexpr std::uint32_t HashValue[2] = + { FeatureTransformer::get_hash_value() + ^ Network::get_hash_value(), + FeatureTransformer::get_hash_value() + ^ Network::get_hash_value() }; // Deleter for automating release of memory area template @@ -67,12 +69,13 @@ template using LargePagePtr = std::unique_ptr>; std::string trace(Position& pos); -Value evaluate(const Position& pos, bool adjusted = false, int* complexity = nullptr); -void hint_common_parent_position(const Position& pos); +template +Value evaluate(const Position& pos, bool adjusted = false, int* complexity = nullptr); +void hint_common_parent_position(const Position& pos); -bool load_eval(std::string name, std::istream& stream); -bool save_eval(std::ostream& stream); -bool save_eval(const std::optional& filename); +bool load_eval(const std::string name, std::istream& stream, bool small); +bool save_eval(std::ostream& stream, bool small); +bool save_eval(const std::optional& filename, bool small); } // namespace Stockfish::Eval::NNUE diff --git a/src/nnue/nnue_accumulator.h b/src/nnue/nnue_accumulator.h index 2f1b1d35e52..6d45bd40310 100644 --- a/src/nnue/nnue_accumulator.h +++ b/src/nnue/nnue_accumulator.h @@ -29,8 +29,10 @@ namespace Stockfish::Eval::NNUE { // Class that holds the result of affine transformation of input features +template struct alignas(CacheLineSize) Accumulator { - std::int16_t accumulation[2][TransformedFeatureDimensions]; + std::int16_t accumulation[2][Small ? TransformedFeatureDimensionsSmall + : TransformedFeatureDimensionsBig]; std::int32_t psqtAccumulation[2][PSQTBuckets]; bool computed[2]; }; diff --git a/src/nnue/nnue_architecture.h b/src/nnue/nnue_architecture.h index e4c308cb267..adfeaf045eb 100644 --- a/src/nnue/nnue_architecture.h +++ b/src/nnue/nnue_architecture.h @@ -38,13 +38,22 @@ namespace Stockfish::Eval::NNUE { using FeatureSet = Features::HalfKAv2_hm; // Number of input feature dimensions after conversion -constexpr IndexType TransformedFeatureDimensions = 2560; -constexpr IndexType PSQTBuckets = 8; -constexpr IndexType LayerStacks = 8; +constexpr IndexType TransformedFeatureDimensionsBig = 2560; +constexpr int L2Big = 15; +constexpr int L3Big = 32; +constexpr IndexType TransformedFeatureDimensionsSmall = 1024; +constexpr int L2Small = 15; +constexpr int L3Small = 32; + +constexpr IndexType PSQTBuckets = 8; +constexpr IndexType LayerStacks = 8; + +template struct Network { - static constexpr int FC_0_OUTPUTS = 15; - static constexpr int FC_1_OUTPUTS = 32; + static constexpr IndexType TransformedFeatureDimensions = L1; + static constexpr int FC_0_OUTPUTS = L2; + static constexpr int FC_1_OUTPUTS = L3; Layers::AffineTransformSparseInput fc_0; Layers::SqrClippedReLU ac_sqr_0; @@ -84,13 +93,13 @@ struct Network { std::int32_t propagate(const TransformedFeatureType* transformedFeatures) { struct alignas(CacheLineSize) Buffer { - alignas(CacheLineSize) decltype(fc_0)::OutputBuffer fc_0_out; - alignas(CacheLineSize) decltype(ac_sqr_0)::OutputType + alignas(CacheLineSize) typename decltype(fc_0)::OutputBuffer fc_0_out; + alignas(CacheLineSize) typename decltype(ac_sqr_0)::OutputType ac_sqr_0_out[ceil_to_multiple(FC_0_OUTPUTS * 2, 32)]; - alignas(CacheLineSize) decltype(ac_0)::OutputBuffer ac_0_out; - alignas(CacheLineSize) decltype(fc_1)::OutputBuffer fc_1_out; - alignas(CacheLineSize) decltype(ac_1)::OutputBuffer ac_1_out; - alignas(CacheLineSize) decltype(fc_2)::OutputBuffer fc_2_out; + alignas(CacheLineSize) typename decltype(ac_0)::OutputBuffer ac_0_out; + alignas(CacheLineSize) typename decltype(fc_1)::OutputBuffer fc_1_out; + alignas(CacheLineSize) typename decltype(ac_1)::OutputBuffer ac_1_out; + alignas(CacheLineSize) typename decltype(fc_2)::OutputBuffer fc_2_out; Buffer() { std::memset(this, 0, sizeof(*this)); } }; @@ -108,7 +117,7 @@ struct Network { ac_sqr_0.propagate(buffer.fc_0_out, buffer.ac_sqr_0_out); ac_0.propagate(buffer.fc_0_out, buffer.ac_0_out); std::memcpy(buffer.ac_sqr_0_out + FC_0_OUTPUTS, buffer.ac_0_out, - FC_0_OUTPUTS * sizeof(decltype(ac_0)::OutputType)); + FC_0_OUTPUTS * sizeof(typename decltype(ac_0)::OutputType)); fc_1.propagate(buffer.ac_sqr_0_out, buffer.fc_1_out); ac_1.propagate(buffer.fc_1_out, buffer.ac_1_out); fc_2.propagate(buffer.ac_1_out, buffer.fc_2_out); diff --git a/src/nnue/nnue_feature_transformer.h b/src/nnue/nnue_feature_transformer.h index 2af80f07792..8154b2f0a1f 100644 --- a/src/nnue/nnue_feature_transformer.h +++ b/src/nnue/nnue_feature_transformer.h @@ -186,11 +186,6 @@ static constexpr int BestRegisterCount() { return 1; } - -static constexpr int NumRegs = - BestRegisterCount(); -static constexpr int NumPsqtRegs = - BestRegisterCount(); #if defined(__GNUC__) #pragma GCC diagnostic pop #endif @@ -198,13 +193,21 @@ static constexpr int NumPsqtRegs = // Input feature converter +template class FeatureTransformer { private: + static constexpr bool Small = TransformedFeatureDimensions == TransformedFeatureDimensionsSmall; + // Number of output dimensions for one side static constexpr IndexType HalfDimensions = TransformedFeatureDimensions; #ifdef VECTOR + static constexpr int NumRegs = + BestRegisterCount(); + static constexpr int NumPsqtRegs = + BestRegisterCount(); + static constexpr IndexType TileHeight = NumRegs * sizeof(vec_t) / 2; static constexpr IndexType PsqtTileHeight = NumPsqtRegs * sizeof(psqt_vec_t) / 4; static_assert(HalfDimensions % TileHeight == 0, "TileHeight must divide HalfDimensions"); @@ -247,14 +250,22 @@ class FeatureTransformer { return !stream.fail(); } + // Cast a pointer to a 2 dimensional array of width D + template + static constexpr T (*cast_2D(T* pt))[D] { + return (T(*)[D])pt; + } + // Convert input features std::int32_t transform(const Position& pos, OutputType* output, int bucket) const { update_accumulator(pos); update_accumulator(pos); const Color perspectives[2] = {pos.side_to_move(), ~pos.side_to_move()}; - const auto& accumulation = pos.state()->accumulator.accumulation; - const auto& psqtAccumulation = pos.state()->accumulator.psqtAccumulation; + const auto& accumulation = + cast_2D(pos.state()->template accumulation()); + const auto& psqtAccumulation = + cast_2D(pos.state()->template psqt_accumulation()); const auto psqt = (psqtAccumulation[perspectives[0]][bucket] - psqtAccumulation[perspectives[1]][bucket]) @@ -323,7 +334,7 @@ class FeatureTransformer { // of the estimated gain in terms of features to be added/subtracted. StateInfo *st = pos.state(), *next = nullptr; int gain = FeatureSet::refresh_cost(pos); - while (st->previous && !st->accumulator.computed[Perspective]) + while (st->previous && !st->template computed()[Perspective]) { // This governs when a full feature refresh is needed and how many // updates are better than just one full refresh. @@ -381,7 +392,7 @@ class FeatureTransformer { for (; i >= 0; --i) { - states_to_update[i]->accumulator.computed[Perspective] = true; + states_to_update[i]->template computed()[Perspective] = true; const StateInfo* end_state = i == 0 ? computed_st : states_to_update[i - 1]; @@ -401,10 +412,10 @@ class FeatureTransformer { { assert(states_to_update[0]); - auto accIn = - reinterpret_cast(&st->accumulator.accumulation[Perspective][0]); - auto accOut = reinterpret_cast( - &states_to_update[0]->accumulator.accumulation[Perspective][0]); + auto accIn = reinterpret_cast(&cast_2D( + st->template accumulation())[Perspective][0]); + auto accOut = reinterpret_cast(&cast_2D( + states_to_update[0]->template accumulation())[Perspective][0]); const IndexType offsetR0 = HalfDimensions * removed[0][0]; auto columnR0 = reinterpret_cast(&weights[offsetR0]); @@ -428,10 +439,10 @@ class FeatureTransformer { vec_add_16(columnR0[k], columnR1[k])); } - auto accPsqtIn = reinterpret_cast( - &st->accumulator.psqtAccumulation[Perspective][0]); - auto accPsqtOut = reinterpret_cast( - &states_to_update[0]->accumulator.psqtAccumulation[Perspective][0]); + auto accPsqtIn = reinterpret_cast(&cast_2D( + st->template psqt_accumulation())[Perspective][0]); + auto accPsqtOut = reinterpret_cast(&cast_2D( + states_to_update[0]->template psqt_accumulation())[Perspective][0]); const IndexType offsetPsqtR0 = PSQTBuckets * removed[0][0]; auto columnPsqtR0 = reinterpret_cast(&psqtWeights[offsetPsqtR0]); @@ -462,8 +473,8 @@ class FeatureTransformer { for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j) { // Load accumulator - auto accTileIn = reinterpret_cast( - &st->accumulator.accumulation[Perspective][j * TileHeight]); + auto accTileIn = reinterpret_cast(&cast_2D( + st->template accumulation())[Perspective][j * TileHeight]); for (IndexType k = 0; k < NumRegs; ++k) acc[k] = vec_load(&accTileIn[k]); @@ -488,8 +499,8 @@ class FeatureTransformer { } // Store accumulator - auto accTileOut = reinterpret_cast( - &states_to_update[i]->accumulator.accumulation[Perspective][j * TileHeight]); + auto accTileOut = reinterpret_cast(&cast_2D( + states_to_update[i]->template accumulation())[Perspective][j * TileHeight]); for (IndexType k = 0; k < NumRegs; ++k) vec_store(&accTileOut[k], acc[k]); } @@ -498,8 +509,8 @@ class FeatureTransformer { for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j) { // Load accumulator - auto accTilePsqtIn = reinterpret_cast( - &st->accumulator.psqtAccumulation[Perspective][j * PsqtTileHeight]); + auto accTilePsqtIn = reinterpret_cast(&cast_2D( + st->template psqt_accumulation())[Perspective][j * PsqtTileHeight]); for (std::size_t k = 0; k < NumPsqtRegs; ++k) psqt[k] = vec_load_psqt(&accTilePsqtIn[k]); @@ -524,9 +535,8 @@ class FeatureTransformer { } // Store accumulator - auto accTilePsqtOut = reinterpret_cast( - &states_to_update[i] - ->accumulator.psqtAccumulation[Perspective][j * PsqtTileHeight]); + auto accTilePsqtOut = reinterpret_cast(&cast_2D( + states_to_update[i]->template psqt_accumulation())[Perspective][j * PsqtTileHeight]); for (std::size_t k = 0; k < NumPsqtRegs; ++k) vec_store_psqt(&accTilePsqtOut[k], psqt[k]); } @@ -535,13 +545,13 @@ class FeatureTransformer { #else for (IndexType i = 0; states_to_update[i]; ++i) { - std::memcpy(states_to_update[i]->accumulator.accumulation[Perspective], - st->accumulator.accumulation[Perspective], + std::memcpy(cast_2D(states_to_update[i]->template accumulation())[Perspective], + cast_2D(st->template accumulation())[Perspective], HalfDimensions * sizeof(BiasType)); for (std::size_t k = 0; k < PSQTBuckets; ++k) - states_to_update[i]->accumulator.psqtAccumulation[Perspective][k] = - st->accumulator.psqtAccumulation[Perspective][k]; + cast_2D(states_to_update[i]->template psqt_accumulation())[Perspective][k] = + cast_2D(st->template psqt_accumulation())[Perspective][k]; st = states_to_update[i]; @@ -551,10 +561,11 @@ class FeatureTransformer { const IndexType offset = HalfDimensions * index; for (IndexType j = 0; j < HalfDimensions; ++j) - st->accumulator.accumulation[Perspective][j] -= weights[offset + j]; + cast_2D( + st->template accumulation())[Perspective][j] -= weights[offset + j]; for (std::size_t k = 0; k < PSQTBuckets; ++k) - st->accumulator.psqtAccumulation[Perspective][k] -= + cast_2D(st->template psqt_accumulation())[Perspective][k] -= psqtWeights[index * PSQTBuckets + k]; } @@ -564,10 +575,11 @@ class FeatureTransformer { const IndexType offset = HalfDimensions * index; for (IndexType j = 0; j < HalfDimensions; ++j) - st->accumulator.accumulation[Perspective][j] += weights[offset + j]; + cast_2D( + st->template accumulation())[Perspective][j] += weights[offset + j]; for (std::size_t k = 0; k < PSQTBuckets; ++k) - st->accumulator.psqtAccumulation[Perspective][k] += + cast_2D(st->template psqt_accumulation())[Perspective][k] += psqtWeights[index * PSQTBuckets + k]; } } @@ -586,8 +598,8 @@ class FeatureTransformer { // Refresh the accumulator // Could be extracted to a separate function because it's done in 2 places, // but it's unclear if compilers would correctly handle register allocation. - auto& accumulator = pos.state()->accumulator; - accumulator.computed[Perspective] = true; + StateInfo* st = pos.state(); + st->template computed()[Perspective] = true; FeatureSet::IndexList active; FeatureSet::append_active_indices(pos, active); @@ -607,8 +619,8 @@ class FeatureTransformer { acc[k] = vec_add_16(acc[k], column[k]); } - auto accTile = - reinterpret_cast(&accumulator.accumulation[Perspective][j * TileHeight]); + auto accTile = reinterpret_cast(&cast_2D( + st->template accumulation())[Perspective][j * TileHeight]); for (unsigned k = 0; k < NumRegs; k++) vec_store(&accTile[k], acc[k]); } @@ -627,28 +639,30 @@ class FeatureTransformer { psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]); } - auto accTilePsqt = reinterpret_cast( - &accumulator.psqtAccumulation[Perspective][j * PsqtTileHeight]); + auto accTilePsqt = reinterpret_cast(&cast_2D( + st->template psqt_accumulation())[Perspective][j * PsqtTileHeight]); for (std::size_t k = 0; k < NumPsqtRegs; ++k) vec_store_psqt(&accTilePsqt[k], psqt[k]); } #else - std::memcpy(accumulator.accumulation[Perspective], biases, + std::memcpy(cast_2D(st->template accumulation())[Perspective], + biases, HalfDimensions * sizeof(BiasType)); for (std::size_t k = 0; k < PSQTBuckets; ++k) - accumulator.psqtAccumulation[Perspective][k] = 0; + cast_2D(st->template psqt_accumulation())[Perspective][k] = 0; for (const auto index : active) { const IndexType offset = HalfDimensions * index; for (IndexType j = 0; j < HalfDimensions; ++j) - accumulator.accumulation[Perspective][j] += weights[offset + j]; + cast_2D( + st->template accumulation())[Perspective][j] += weights[offset + j]; for (std::size_t k = 0; k < PSQTBuckets; ++k) - accumulator.psqtAccumulation[Perspective][k] += + cast_2D(st->template psqt_accumulation())[Perspective][k] += psqtWeights[index * PSQTBuckets + k]; } #endif @@ -663,12 +677,12 @@ class FeatureTransformer { // Look for a usable accumulator of an earlier position. We keep track // of the estimated gain in terms of features to be added/subtracted. // Fast early exit. - if (pos.state()->accumulator.computed[Perspective]) + if (pos.state()->template computed()[Perspective]) return; auto [oldest_st, _] = try_find_computed_accumulator(pos); - if (oldest_st->accumulator.computed[Perspective]) + if (oldest_st->template computed()[Perspective]) { // Only update current position accumulator to minimize work. StateInfo* states_to_update[2] = {pos.state(), nullptr}; @@ -685,7 +699,7 @@ class FeatureTransformer { auto [oldest_st, next] = try_find_computed_accumulator(pos); - if (oldest_st->accumulator.computed[Perspective]) + if (oldest_st->template computed()[Perspective]) { if (next == nullptr) return; diff --git a/src/position.cpp b/src/position.cpp index c45dd7b2e22..f03afb0e4f6 100644 --- a/src/position.cpp +++ b/src/position.cpp @@ -684,8 +684,10 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) { ++st->pliesFromNull; // Used by NNUE - st->accumulator.computed[WHITE] = false; - st->accumulator.computed[BLACK] = false; + st->accumulatorBig.computed[WHITE] = + st->accumulatorBig.computed[BLACK] = + st->accumulatorSmall.computed[WHITE] = + st->accumulatorSmall.computed[BLACK] = false; auto& dp = st->dirtyPiece; dp.dirty_num = 1; @@ -964,15 +966,17 @@ void Position::do_null_move(StateInfo& newSt) { assert(!checkers()); assert(&newSt != st); - std::memcpy(&newSt, st, offsetof(StateInfo, accumulator)); + std::memcpy(&newSt, st, offsetof(StateInfo, accumulatorBig)); newSt.previous = st; st = &newSt; st->dirtyPiece.dirty_num = 0; st->dirtyPiece.piece[0] = NO_PIECE; // Avoid checks in UpdateAccumulator() - st->accumulator.computed[WHITE] = false; - st->accumulator.computed[BLACK] = false; + st->accumulatorBig.computed[WHITE] = + st->accumulatorBig.computed[BLACK] = + st->accumulatorSmall.computed[WHITE] = + st->accumulatorSmall.computed[BLACK] = false; if (st->epSquare != SQ_NONE) { diff --git a/src/position.h b/src/position.h index ce03c34f332..6d7aa8fde5e 100644 --- a/src/position.h +++ b/src/position.h @@ -57,8 +57,22 @@ struct StateInfo { int repetition; // Used by NNUE - Eval::NNUE::Accumulator accumulator; - DirtyPiece dirtyPiece; + Eval::NNUE::Accumulator accumulatorBig; + Eval::NNUE::Accumulator accumulatorSmall; + DirtyPiece dirtyPiece; + + template constexpr std::int16_t* accumulation() { + return Small ? (std::int16_t*)accumulatorSmall.accumulation + : (std::int16_t*)accumulatorBig.accumulation; + } + template constexpr std::int32_t* psqt_accumulation() { + return Small ? (std::int32_t*)accumulatorSmall.psqtAccumulation + : (std::int32_t*)accumulatorBig.psqtAccumulation; + } + template constexpr bool* computed() { + return Small ? accumulatorSmall.computed + : accumulatorBig.computed; + } }; @@ -160,6 +174,7 @@ class Position { int rule50_count() const; Value non_pawn_material(Color c) const; Value non_pawn_material() const; + Value simple_eval() const; // Position consistency check, for debugging bool pos_is_ok() const; @@ -305,6 +320,11 @@ inline Value Position::non_pawn_material() const { return non_pawn_material(WHITE) + non_pawn_material(BLACK); } +inline Value Position::simple_eval() const { + return PawnValue * (count(sideToMove) - count(~sideToMove)) + + (non_pawn_material(sideToMove) - non_pawn_material(~sideToMove)); +} + inline int Position::game_ply() const { return gamePly; } inline int Position::rule50_count() const { return st->rule50; } diff --git a/src/thread.cpp b/src/thread.cpp index bc884dedf01..f172199d691 100644 --- a/src/thread.cpp +++ b/src/thread.cpp @@ -210,7 +210,7 @@ void ThreadPool::start_thinking(Position& pos, th->rootMoves = rootMoves; th->rootPos.set(pos.fen(), pos.is_chess960(), &th->rootState, th); th->rootState = setupStates->back(); - th->rootSimpleEval = Eval::simple_eval(pos, pos.side_to_move()); + th->rootSimpleEval = pos.simple_eval(); } main()->start_searching(); diff --git a/src/uci.cpp b/src/uci.cpp index 95f6f349dd3..e169e78477a 100644 --- a/src/uci.cpp +++ b/src/uci.cpp @@ -320,7 +320,7 @@ void UCI::loop(int argc, char* argv[]) { std::string f; if (is >> std::skipws >> f) filename = f; - Eval::NNUE::save_eval(filename); + Eval::NNUE::save_eval(filename, false); } else if (token == "--help" || token == "help" || token == "--license" || token == "license") sync_cout diff --git a/src/ucioption.cpp b/src/ucioption.cpp index d0db1c76dd2..6858a296c52 100644 --- a/src/ucioption.cpp +++ b/src/ucioption.cpp @@ -84,7 +84,8 @@ void init(OptionsMap& o) { o["SyzygyProbeDepth"] << Option(1, 1, 100); o["Syzygy50MoveRule"] << Option(true); o["SyzygyProbeLimit"] << Option(7, 0, 7); - o["EvalFile"] << Option(EvalFileDefaultName, on_eval_file); + o["EvalFileBig"] << Option(EvalFileDefaultNameBig, on_eval_file); + o["EvalFileSmall"] << Option(EvalFileDefaultNameSmall, on_eval_file); } From f4b75a53588332b503cf8df0d0b4dacc7a08aa79 Mon Sep 17 00:00:00 2001 From: mstembera Date: Sat, 2 Dec 2023 20:13:35 -0800 Subject: [PATCH 2/7] Smaller 256 net and fixed Makefile by @linrock bench: 1380121 --- src/Makefile | 58 +++++++++++++++++++++++++++++++----- src/evaluate.h | 2 +- src/nnue/nnue_architecture.h | 2 +- 3 files changed, 53 insertions(+), 9 deletions(-) diff --git a/src/Makefile b/src/Makefile index 68aecc21168..44814aaea69 100644 --- a/src/Makefile +++ b/src/Makefile @@ -791,6 +791,7 @@ help: @echo "profile-build > standard build with profile-guided optimization" @echo "build > skip profile-guided optimization" @echo "net > Download the default nnue net" + @echo "net2 > Download the smaller nnue net" @echo "strip > Strip executable" @echo "install > Install executable" @echo "clean > Clean up" @@ -857,13 +858,13 @@ endif clang-profile-use clang-profile-make FORCE \ format analyze -analyze: net config-sanity objclean +analyze: net net2 config-sanity objclean $(MAKE) -k ARCH=$(ARCH) COMP=$(COMP) $(OBJS) -build: net config-sanity +build: net net2 config-sanity $(MAKE) ARCH=$(ARCH) COMP=$(COMP) all -profile-build: net config-sanity objclean profileclean +profile-build: net net2 config-sanity objclean profileclean @echo "" @echo "Step 1/4. Building instrumented executable ..." $(MAKE) ARCH=$(ARCH) COMP=$(COMP) $(profile_make) @@ -905,10 +906,9 @@ profileclean: @rm -f stockfish.res @rm -f ./-lstdc++.res -ifneq ("x","x") # set up shell variables for the net stuff netvariables: - $(eval nnuenet := $(shell grep EvalFileDefaultName evaluate.h | grep define | sed 's/.*\(nn-[a-z0-9]\{12\}.nnue\).*/\1/')) + $(eval nnuenet := $(shell grep EvalFileDefaultNameBig evaluate.h | grep define | sed 's/.*\(nn-[a-z0-9]\{12\}.nnue\).*/\1/')) $(eval nnuedownloadurl1 := https://tests.stockfishchess.org/api/nn/$(nnuenet)) $(eval nnuedownloadurl2 := https://github.com/official-stockfish/networks/raw/master/$(nnuenet)) $(eval curl_or_wget := $(shell if hash curl 2>/dev/null; then echo "curl -skL"; elif hash wget 2>/dev/null; then echo "wget -qO-"; fi)) @@ -952,7 +952,51 @@ net: netvariables fi; \ fi; \ -endif +netvariables2: + $(eval nnuenet := $(shell grep EvalFileDefaultNameSmall evaluate.h | grep define | sed 's/.*\(nn-[a-z0-9]\{12\}.nnue\).*/\1/')) + $(eval nnuedownloadurl1 := https://tests.stockfishchess.org/api/nn/$(nnuenet)) + $(eval nnuedownloadurl2 := https://github.com/official-stockfish/networks/raw/master/$(nnuenet)) + $(eval curl_or_wget := $(shell if hash curl 2>/dev/null; then echo "curl -skL"; elif hash wget 2>/dev/null; then echo "wget -qO-"; fi)) + $(eval shasum_command := $(shell if hash shasum 2>/dev/null; then echo "shasum -a 256 "; elif hash sha256sum 2>/dev/null; then echo "sha256sum "; fi)) + +# evaluation network (nnue) +net2: netvariables2 + @echo "Default net: $(nnuenet)" + @if [ "x$(curl_or_wget)" = "x" ]; then \ + echo "Neither curl nor wget is installed. Install one of these tools unless the net has been downloaded manually"; \ + fi + @if [ "x$(shasum_command)" = "x" ]; then \ + echo "shasum / sha256sum not found, skipping net validation"; \ + elif test -f "$(nnuenet)"; then \ + if [ "$(nnuenet)" != "nn-"`$(shasum_command) $(nnuenet) | cut -c1-12`".nnue" ]; then \ + echo "Removing invalid network"; rm -f $(nnuenet); \ + fi; \ + fi; + @for nnuedownloadurl in "$(nnuedownloadurl1)" "$(nnuedownloadurl2)"; do \ + if test -f "$(nnuenet)"; then \ + echo "$(nnuenet) available : OK"; break; \ + else \ + if [ "x$(curl_or_wget)" != "x" ]; then \ + echo "Downloading $${nnuedownloadurl}"; $(curl_or_wget) $${nnuedownloadurl} > $(nnuenet);\ + else \ + echo "No net found and download not possible"; exit 1;\ + fi; \ + fi; \ + if [ "x$(shasum_command)" != "x" ]; then \ + if [ "$(nnuenet)" != "nn-"`$(shasum_command) $(nnuenet) | cut -c1-12`".nnue" ]; then \ + echo "Removing failed download"; rm -f $(nnuenet); \ + fi; \ + fi; \ + done + @if ! test -f "$(nnuenet)"; then \ + echo "Failed to download $(nnuenet)."; \ + fi; + @if [ "x$(shasum_command)" != "x" ]; then \ + if [ "$(nnuenet)" = "nn-"`$(shasum_command) $(nnuenet) | cut -c1-12`".nnue" ]; then \ + echo "Network validated"; break; \ + fi; \ + fi; \ + format: $(CLANG-FORMAT) -i $(SRCS) $(HEADERS) -style=file @@ -1076,6 +1120,6 @@ icx-profile-use: .depend: $(SRCS) -@$(CXX) $(DEPENDFLAGS) -MM $(SRCS) > $@ 2> /dev/null -ifeq (, $(filter $(MAKECMDGOALS), help strip install clean net objclean profileclean config-sanity)) +ifeq (, $(filter $(MAKECMDGOALS), help strip install clean net net2 objclean profileclean config-sanity)) -include .depend endif diff --git a/src/evaluate.h b/src/evaluate.h index 48ca596bc05..6eafdd539f9 100644 --- a/src/evaluate.h +++ b/src/evaluate.h @@ -39,7 +39,7 @@ extern std::string currentEvalFileName[2]; // for the build process (profile-build and fishtest) to work. Do not change the // name of the macro, as it is used in the Makefile. #define EvalFileDefaultNameBig "nn-0000000000a0.nnue" -#define EvalFileDefaultNameSmall "nn-a70fe1969e12.nnue" +#define EvalFileDefaultNameSmall "nn-ecb35f70ff2a.nnue" namespace NNUE { diff --git a/src/nnue/nnue_architecture.h b/src/nnue/nnue_architecture.h index adfeaf045eb..0778c3f455e 100644 --- a/src/nnue/nnue_architecture.h +++ b/src/nnue/nnue_architecture.h @@ -42,7 +42,7 @@ constexpr IndexType TransformedFeatureDimensionsBig = 2560; constexpr int L2Big = 15; constexpr int L3Big = 32; -constexpr IndexType TransformedFeatureDimensionsSmall = 1024; +constexpr IndexType TransformedFeatureDimensionsSmall = 256; constexpr int L2Small = 15; constexpr int L3Small = 32; From eac74d6caf2b26a6c6b73582c5ab57c54f1c5afa Mon Sep 17 00:00:00 2001 From: mstembera Date: Sat, 2 Dec 2023 21:16:38 -0800 Subject: [PATCH 3/7] Change EvalFileBig back to EvalFile to make fishtest happy bench: 1380121 --- src/ucioption.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ucioption.cpp b/src/ucioption.cpp index 6858a296c52..e9f48d4ee67 100644 --- a/src/ucioption.cpp +++ b/src/ucioption.cpp @@ -84,7 +84,7 @@ void init(OptionsMap& o) { o["SyzygyProbeDepth"] << Option(1, 1, 100); o["Syzygy50MoveRule"] << Option(true); o["SyzygyProbeLimit"] << Option(7, 0, 7); - o["EvalFileBig"] << Option(EvalFileDefaultNameBig, on_eval_file); + o["EvalFile"] << Option(EvalFileDefaultNameBig, on_eval_file); o["EvalFileSmall"] << Option(EvalFileDefaultNameSmall, on_eval_file); } From 0455d0cab104f615676869e8643bba49446c535c Mon Sep 17 00:00:00 2001 From: Linmiao Xu Date: Sun, 3 Dec 2023 18:46:22 -0500 Subject: [PATCH 4/7] hint big nnue below 2000 simple eval bench 1380121 --- src/nnue/evaluate_nnue.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nnue/evaluate_nnue.cpp b/src/nnue/evaluate_nnue.cpp index 26ce4bdcde2..bb9efa80231 100644 --- a/src/nnue/evaluate_nnue.cpp +++ b/src/nnue/evaluate_nnue.cpp @@ -176,7 +176,7 @@ static bool write_parameters(std::ostream& stream, bool small) { void hint_common_parent_position(const Position& pos) { int simpleEval = pos.simple_eval(); - if (abs(simpleEval) < 2500) + if (abs(simpleEval) < 2000) featureTransformerBig->hint_common_access(pos); else featureTransformerSmall->hint_common_access(pos); From 64237596a0eaf37630a71b3e4a636a9a42be5058 Mon Sep 17 00:00:00 2001 From: Linmiao Xu Date: Sun, 3 Dec 2023 20:09:57 -0500 Subject: [PATCH 5/7] big below 2200, add stochastic term bench 1440404 --- src/nnue/evaluate_nnue.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/nnue/evaluate_nnue.cpp b/src/nnue/evaluate_nnue.cpp index bb9efa80231..94a40a3e6c2 100644 --- a/src/nnue/evaluate_nnue.cpp +++ b/src/nnue/evaluate_nnue.cpp @@ -175,8 +175,8 @@ static bool write_parameters(std::ostream& stream, bool small) { void hint_common_parent_position(const Position& pos) { - int simpleEval = pos.simple_eval(); - if (abs(simpleEval) < 2000) + int simpleEval = pos.simple_eval() + (int(pos.key() & 7) - 3); + if (abs(simpleEval) < 2200) featureTransformerBig->hint_common_access(pos); else featureTransformerSmall->hint_common_access(pos); From c23423787c49f33ee0e7856eb6befc8a10dd2e84 Mon Sep 17 00:00:00 2001 From: Muzhen Gaming <61100393+XInTheDark@users.noreply.github.com> Date: Tue, 5 Dec 2023 16:41:18 +0800 Subject: [PATCH 6/7] Try a search tune for dual NNUE. Bench: 1440404 --- src/search.cpp | 120 ++++++++++++++++++++++++++++++------------------- 1 file changed, 73 insertions(+), 47 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index b3ca8c9afe5..b0eb1eb8a26 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -47,6 +47,32 @@ namespace Stockfish { +int +a1=125, a2=43, a3=1487, a4=976, a5=808, a6=291, a7=350, a8=1200, a9=361, a10=361, a11=1182, +b1=10, b2=15335, b3=110, b4=121, b5=14, b6=1449, b7=1449, +c1=474, c2=270, c3=174, c4=9, c5=321, c6=29462, c7=17257, c8=24, c9=281, c10=152, c11=14, c12=8, c13=168, c14=70, +d1=416, d2=7, d3=239, d4=291, d5=185, d6=6, d7=3645, d8=7836, d9=13, d10=62, d11=123, d12=77, d13=127, d14=26, +e1=24, e2=64, e3=57, e4=18, e5=11, e6=15, e7=19, e8=9, e9=4194, e10=4000, e11=7, e12=3848, e13=14200, +f1=50, f2=2, f3=2, f4=12, f5=13828, f6=11369, f7=6, f8=657, f9=10, +g1=200, g2=90, g3=168, g4=168; + +TUNE(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, + b1); +TUNE(SetRange(1, 30000), b2); +TUNE(b3, b4, b5, b6, b7, + c1, c2, c3, c4); +TUNE(SetRange(1, 600), c5); +TUNE(c6, c7, c8, c9); +TUNE(SetRange(1, 300), c10); +TUNE(c11, c12, c13, c14, + d1, d2, d3, d4, d5, d6, d7); +TUNE(SetRange(1, 15000), d8); +TUNE(d9, d10, d11, d12, d13, d14, + e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12); +TUNE(SetRange(1, 30000), e13); +TUNE(f1, f2, f3, f4, f5, f6, f7, f8, f9, + g1, g2, g3, g4); + namespace Search { LimitsType Limits; @@ -77,7 +103,7 @@ enum NodeType { // Futility margin Value futility_margin(Depth d, bool noTtCutNode, bool improving) { - return Value((125 - 43 * noTtCutNode) * (d - improving)); + return Value((a1 - a2 * noTtCutNode) * (d - improving)); } // Reductions lookup table initialized at startup @@ -85,8 +111,8 @@ int Reductions[MAX_MOVES]; // [depth or moveNumber] Depth reduction(bool i, Depth d, int mn, Value delta, Value rootDelta) { int reductionScale = Reductions[d] * Reductions[mn]; - return (reductionScale + 1487 - int(delta) * 976 / int(rootDelta)) / 1024 - + (!i && reductionScale > 808); + return (reductionScale + a3 - int(delta) * a4 / int(rootDelta)) / 1024 + + (!i && reductionScale > a5); } constexpr int futility_move_count(bool improving, Depth depth) { @@ -94,10 +120,10 @@ constexpr int futility_move_count(bool improving, Depth depth) { } // History and stats update bonus, based on depth -int stat_bonus(Depth d) { return std::min(291 * d - 350, 1200); } +int stat_bonus(Depth d) { return std::min(a6 * d - a7, a8); } // History and stats update malus, based on depth -int stat_malus(Depth d) { return std::min(361 * d - 361, 1182); } +int stat_malus(Depth d) { return std::min(a9 * d - a10, a11); } // Add a small random component to draw evaluations to avoid 3-fold blindness Value value_draw(const Thread* thisThread) { @@ -367,12 +393,12 @@ void Thread::search() { // Reset aspiration window starting size Value avg = rootMoves[pvIdx].averageScore; - delta = Value(10) + int(avg) * avg / 15335; + delta = Value(b1) + int(avg) * avg / b2; alpha = std::max(avg - delta, -VALUE_INFINITE); beta = std::min(avg + delta, VALUE_INFINITE); // Adjust optimism based on root move's averageScore (~4 Elo) - optimism[us] = 110 * avg / (std::abs(avg) + 121); + optimism[us] = b3 * avg / (std::abs(avg) + b4); optimism[~us] = -optimism[us]; // Start with a small aspiration window and, in the case of a fail @@ -746,7 +772,7 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo // Use static evaluation difference to improve quiet move ordering (~4 Elo) if (is_ok((ss - 1)->currentMove) && !(ss - 1)->inCheck && !priorCapture) { - int bonus = std::clamp(-14 * int((ss - 1)->staticEval + ss->staticEval), -1449, 1449); + int bonus = std::clamp(-b5 * int((ss - 1)->staticEval + ss->staticEval), -b6, b7); thisThread->mainHistory[~us][from_to((ss - 1)->currentMove)] << bonus; if (type_of(pos.piece_on(prevSq)) != PAWN && type_of((ss - 1)->currentMove) != PROMOTION) thisThread->pawnHistory[pawn_structure(pos)][pos.piece_on(prevSq)][prevSq] << bonus / 4; @@ -765,7 +791,7 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo // If eval is really low check with qsearch if it can exceed alpha, if it can't, // return a fail low. // Adjust razor margin according to cutoffCnt. (~1 Elo) - if (eval < alpha - 474 - (270 - 174 * ((ss + 1)->cutoffCnt > 3)) * depth * depth) + if (eval < alpha - c1 - (c2 - c3 * ((ss + 1)->cutoffCnt > 3)) * depth * depth) { value = qsearch(pos, ss, alpha - 1, alpha); if (value < alpha) @@ -774,24 +800,24 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo // Step 8. Futility pruning: child node (~40 Elo) // The depth condition is important for mate finding. - if (!ss->ttPv && depth < 9 + if (!ss->ttPv && depth < c4 && eval - futility_margin(depth, cutNode && !ss->ttHit, improving) - - (ss - 1)->statScore / 321 + - (ss - 1)->statScore / c5 >= beta - && eval >= beta && eval < 29462 // smaller than TB wins + && eval >= beta && eval < c6 // smaller than TB wins && (!ttMove || ttCapture)) return (eval + beta) / 2; // Step 9. Null move search with verification search (~35 Elo) - if (!PvNode && (ss - 1)->currentMove != MOVE_NULL && (ss - 1)->statScore < 17257 && eval >= beta - && eval >= ss->staticEval && ss->staticEval >= beta - 24 * depth + 281 && !excludedMove + if (!PvNode && (ss - 1)->currentMove != MOVE_NULL && (ss - 1)->statScore < c7 && eval >= beta + && eval >= ss->staticEval && ss->staticEval >= beta - c8 * depth + c9 && !excludedMove && pos.non_pawn_material(us) && ss->ply >= thisThread->nmpMinPly && beta > VALUE_TB_LOSS_IN_MAX_PLY) { assert(eval - beta >= 0); // Null move dynamic reduction based on depth and eval - Depth R = std::min(int(eval - beta) / 152, 6) + depth / 3 + 4; + Depth R = std::min(int(eval - beta) / c10, 6) + depth / 3 + 4; ss->currentMove = MOVE_NULL; ss->continuationHistory = &thisThread->continuationHistory[0][0][NO_PIECE][0]; @@ -805,7 +831,7 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo // Do not return unproven mate or TB scores if (nullValue >= beta && nullValue < VALUE_TB_WIN_IN_MAX_PLY) { - if (thisThread->nmpMinPly || depth < 14) + if (thisThread->nmpMinPly || depth < c11) return nullValue; assert(!thisThread->nmpMinPly); // Recursive verification is not allowed @@ -835,10 +861,10 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo return qsearch(pos, ss, alpha, beta); // For cutNodes without a ttMove, we decrease depth by 2 if depth is high enough. - if (cutNode && depth >= 8 && !ttMove) + if (cutNode && depth >= c12 && !ttMove) depth -= 2; - probCutBeta = beta + 168 - 70 * improving; + probCutBeta = beta + c13 - c14 * improving; // Step 11. ProbCut (~10 Elo) // If we have a good enough capture (or queen promotion) and a reduced search returns a value @@ -896,7 +922,7 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo moves_loop: // When in check, search starts here // Step 12. A small Probcut idea, when we are in check (~4 Elo) - probCutBeta = beta + 416; + probCutBeta = beta + d1; if (ss->inCheck && !PvNode && ttCapture && (tte->bound() & BOUND_LOWER) && tte->depth() >= depth - 4 && ttValue >= probCutBeta && abs(ttValue) < VALUE_TB_WIN_IN_MAX_PLY && abs(beta) < VALUE_TB_WIN_IN_MAX_PLY) @@ -979,18 +1005,18 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo if (capture || givesCheck) { // Futility pruning for captures (~2 Elo) - if (!givesCheck && lmrDepth < 7 && !ss->inCheck) + if (!givesCheck && lmrDepth < d2 && !ss->inCheck) { Piece capturedPiece = pos.piece_on(to_sq(move)); int futilityEval = - ss->staticEval + 239 + 291 * lmrDepth + PieceValue[capturedPiece] + ss->staticEval + d3 + d4 * lmrDepth + PieceValue[capturedPiece] + captureHistory[movedPiece][to_sq(move)][type_of(capturedPiece)] / 7; if (futilityEval < alpha) continue; } // SEE based pruning for captures and checks (~11 Elo) - if (!pos.see_ge(move, Value(-185) * depth)) + if (!pos.see_ge(move, Value(-d5) * depth)) continue; } else @@ -1001,25 +1027,25 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo + thisThread->pawnHistory[pawn_structure(pos)][movedPiece][to_sq(move)]; // Continuation history based pruning (~2 Elo) - if (lmrDepth < 6 && history < -3645 * depth) + if (lmrDepth < d6 && history < -d7 * depth) continue; history += 2 * thisThread->mainHistory[us][from_to(move)]; - lmrDepth += history / 7836; + lmrDepth += history / d8; lmrDepth = std::max(lmrDepth, -1); // Futility pruning: parent node (~13 Elo) - if (!ss->inCheck && lmrDepth < 13 - && ss->staticEval + (bestValue < ss->staticEval - 62 ? 123 : 77) - + 127 * lmrDepth + if (!ss->inCheck && lmrDepth < d9 + && ss->staticEval + (bestValue < ss->staticEval - d10 ? d11 : d12) + + d13 * lmrDepth <= alpha) continue; lmrDepth = std::max(lmrDepth, 0); // Prune moves with negative SEE (~4 Elo) - if (!pos.see_ge(move, Value(-26 * lmrDepth * lmrDepth))) + if (!pos.see_ge(move, Value(-d14 * lmrDepth * lmrDepth))) continue; } } @@ -1039,11 +1065,11 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo // so changing them requires tests at these types of time controls. // Recursive singular search is avoided. if (!rootNode && move == ttMove && !excludedMove - && depth >= 4 - (thisThread->completedDepth > 24) + 2 * (PvNode && tte->is_pv()) + && depth >= 4 - (thisThread->completedDepth > e1) + 2 * (PvNode && tte->is_pv()) && abs(ttValue) < VALUE_TB_WIN_IN_MAX_PLY && (tte->bound() & BOUND_LOWER) && tte->depth() >= depth - 3) { - Value singularBeta = ttValue - (64 + 57 * (ss->ttPv && !PvNode)) * depth / 64; + Value singularBeta = ttValue - (e2 + e3 * (ss->ttPv && !PvNode)) * depth / 64; Depth singularDepth = newDepth / 2; ss->excludedMove = move; @@ -1057,10 +1083,10 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo singularQuietLMR = !ttCapture; // Avoid search explosion by limiting the number of double extensions - if (!PvNode && value < singularBeta - 18 && ss->doubleExtensions <= 11) + if (!PvNode && value < singularBeta - e4 && ss->doubleExtensions <= e5) { extension = 2; - depth += depth < 15; + depth += depth < e6; } } @@ -1084,7 +1110,7 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo // If we are on a cutNode but the ttMove is not assumed to fail high over current beta (~1 Elo) else if (cutNode) - extension = depth < 19 ? -2 : -1; + extension = depth < e7 ? -2 : -1; // If the ttMove is assumed to fail low over the value of the reduced search (~1 Elo) else if (ttValue <= value) @@ -1092,18 +1118,18 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo } // Check extensions (~1 Elo) - else if (givesCheck && depth > 9) + else if (givesCheck && depth > e8) extension = 1; // Quiet ttMove extensions (~1 Elo) else if (PvNode && move == ttMove && move == ss->killers[0] - && (*contHist[0])[movedPiece][to_sq(move)] >= 4194) + && (*contHist[0])[movedPiece][to_sq(move)] >= e9) extension = 1; // Recapture extensions (~1 Elo) else if (PvNode && move == ttMove && to_sq(move) == prevSq && captureHistory[movedPiece][to_sq(move)][type_of(pos.piece_on(to_sq(move)))] - > 4000) + > e10) extension = 1; } @@ -1127,7 +1153,7 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo r -= cutNode && tte->depth() >= depth ? 3 : 2; // Decrease reduction if opponent's move count is high (~1 Elo) - if ((ss - 1)->moveCount > 7) + if ((ss - 1)->moveCount > e11) r--; // Increase reduction for cut nodes (~3 Elo) @@ -1162,10 +1188,10 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo ss->statScore = 2 * thisThread->mainHistory[us][from_to(move)] + (*contHist[0])[movedPiece][to_sq(move)] + (*contHist[1])[movedPiece][to_sq(move)] - + (*contHist[3])[movedPiece][to_sq(move)] - 3848; + + (*contHist[3])[movedPiece][to_sq(move)] - e12; // Decrease/increase reduction for moves with a good/bad history (~25 Elo) - r -= ss->statScore / 14200; + r -= ss->statScore / e13; // Step 17. Late moves reduction / extension (LMR, ~117 Elo) // We use various heuristics for the sons of a node after the first son has @@ -1188,7 +1214,7 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo { // Adjust full-depth search based on LMR results - if the result // was good enough search deeper, if it was bad enough search shallower. - const bool doDeeperSearch = value > (bestValue + 50 + 2 * newDepth); // (~1 Elo) + const bool doDeeperSearch = value > (bestValue + f1 + f2 * newDepth); // (~1 Elo) const bool doShallowerSearch = value < bestValue + newDepth; // (~2 Elo) newDepth += doDeeperSearch - doShallowerSearch; @@ -1303,7 +1329,7 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo else { // Reduce other moves if we have found at least one score improvement (~2 Elo) - if (depth > 2 && depth < 12 && beta < 13828 && value > -11369) + if (depth > f3 && depth < f4 && beta < f5 && value > -f6) depth -= 2; assert(depth > 0); @@ -1342,8 +1368,8 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo // Bonus for prior countermove that caused the fail low else if (!priorCapture && prevSq != SQ_NONE) { - int bonus = (depth > 6) + (PvNode || cutNode) + (bestValue < alpha - 657) - + ((ss - 1)->moveCount > 10); + int bonus = (depth > f7) + (PvNode || cutNode) + (bestValue < alpha - f8) + + ((ss - 1)->moveCount > f9); update_continuation_histories(ss - 1, pos.piece_on(prevSq), prevSq, stat_bonus(depth) * bonus); thisThread->mainHistory[~us][from_to((ss - 1)->currentMove)] @@ -1475,7 +1501,7 @@ Value qsearch(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth) { if (bestValue > alpha) alpha = bestValue; - futilityBase = ss->staticEval + 200; + futilityBase = ss->staticEval + g1; } const PieceToHistory* contHist[] = {(ss - 1)->continuationHistory, @@ -1555,7 +1581,7 @@ Value qsearch(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth) { continue; // Do not search moves with bad enough SEE values (~5 Elo) - if (!pos.see_ge(move, Value(-90))) + if (!pos.see_ge(move, Value(-g2))) continue; } @@ -1691,7 +1717,7 @@ void update_all_stats(const Position& pos, if (!pos.capture_stage(bestMove)) { - int bestMoveBonus = bestValue > beta + 168 ? quietMoveBonus // larger bonus + int bestMoveBonus = bestValue > beta + g3 ? quietMoveBonus // larger bonus : stat_bonus(depth); // smaller bonus // Increase stats for the best move in case it was a quiet move @@ -1699,7 +1725,7 @@ void update_all_stats(const Position& pos, thisThread->pawnHistory[pawn_structure(pos)][moved_piece][to_sq(bestMove)] << quietMoveBonus; - int moveMalus = bestValue > beta + 168 ? quietMoveMalus // larger malus + int moveMalus = bestValue > beta + g4 ? quietMoveMalus // larger malus : stat_malus(depth); // smaller malus // Decrease stats for all non-best quiet moves From e15eec7221d67e52c1db3778b2772489e8955a0f Mon Sep 17 00:00:00 2001 From: Muzhen Gaming <61100393+XInTheDark@users.noreply.github.com> Date: Wed, 6 Dec 2023 21:20:41 +0800 Subject: [PATCH 7/7] v1: Tuned 44k games, use latest linrock smallnet. Bench: 1216398 --- src/evaluate.h | 2 +- src/search.cpp | 120 +++++++++++++++++++------------------------------ 2 files changed, 48 insertions(+), 74 deletions(-) diff --git a/src/evaluate.h b/src/evaluate.h index 6eafdd539f9..5a105870c54 100644 --- a/src/evaluate.h +++ b/src/evaluate.h @@ -39,7 +39,7 @@ extern std::string currentEvalFileName[2]; // for the build process (profile-build and fishtest) to work. Do not change the // name of the macro, as it is used in the Makefile. #define EvalFileDefaultNameBig "nn-0000000000a0.nnue" -#define EvalFileDefaultNameSmall "nn-ecb35f70ff2a.nnue" +#define EvalFileDefaultNameSmall "nn-9067e33176e8.nnue" namespace NNUE { diff --git a/src/search.cpp b/src/search.cpp index b0eb1eb8a26..132e70cd4bf 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -47,32 +47,6 @@ namespace Stockfish { -int -a1=125, a2=43, a3=1487, a4=976, a5=808, a6=291, a7=350, a8=1200, a9=361, a10=361, a11=1182, -b1=10, b2=15335, b3=110, b4=121, b5=14, b6=1449, b7=1449, -c1=474, c2=270, c3=174, c4=9, c5=321, c6=29462, c7=17257, c8=24, c9=281, c10=152, c11=14, c12=8, c13=168, c14=70, -d1=416, d2=7, d3=239, d4=291, d5=185, d6=6, d7=3645, d8=7836, d9=13, d10=62, d11=123, d12=77, d13=127, d14=26, -e1=24, e2=64, e3=57, e4=18, e5=11, e6=15, e7=19, e8=9, e9=4194, e10=4000, e11=7, e12=3848, e13=14200, -f1=50, f2=2, f3=2, f4=12, f5=13828, f6=11369, f7=6, f8=657, f9=10, -g1=200, g2=90, g3=168, g4=168; - -TUNE(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, - b1); -TUNE(SetRange(1, 30000), b2); -TUNE(b3, b4, b5, b6, b7, - c1, c2, c3, c4); -TUNE(SetRange(1, 600), c5); -TUNE(c6, c7, c8, c9); -TUNE(SetRange(1, 300), c10); -TUNE(c11, c12, c13, c14, - d1, d2, d3, d4, d5, d6, d7); -TUNE(SetRange(1, 15000), d8); -TUNE(d9, d10, d11, d12, d13, d14, - e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12); -TUNE(SetRange(1, 30000), e13); -TUNE(f1, f2, f3, f4, f5, f6, f7, f8, f9, - g1, g2, g3, g4); - namespace Search { LimitsType Limits; @@ -103,7 +77,7 @@ enum NodeType { // Futility margin Value futility_margin(Depth d, bool noTtCutNode, bool improving) { - return Value((a1 - a2 * noTtCutNode) * (d - improving)); + return Value((116 - 44 * noTtCutNode) * (d - improving)); } // Reductions lookup table initialized at startup @@ -111,8 +85,8 @@ int Reductions[MAX_MOVES]; // [depth or moveNumber] Depth reduction(bool i, Depth d, int mn, Value delta, Value rootDelta) { int reductionScale = Reductions[d] * Reductions[mn]; - return (reductionScale + a3 - int(delta) * a4 / int(rootDelta)) / 1024 - + (!i && reductionScale > a5); + return (reductionScale + 1346 - int(delta) * 896 / int(rootDelta)) / 1024 + + (!i && reductionScale > 880); } constexpr int futility_move_count(bool improving, Depth depth) { @@ -120,10 +94,10 @@ constexpr int futility_move_count(bool improving, Depth depth) { } // History and stats update bonus, based on depth -int stat_bonus(Depth d) { return std::min(a6 * d - a7, a8); } +int stat_bonus(Depth d) { return std::min(268 * d - 352, 1153); } // History and stats update malus, based on depth -int stat_malus(Depth d) { return std::min(a9 * d - a10, a11); } +int stat_malus(Depth d) { return std::min(400 * d - 354, 1201); } // Add a small random component to draw evaluations to avoid 3-fold blindness Value value_draw(const Thread* thisThread) { @@ -393,12 +367,12 @@ void Thread::search() { // Reset aspiration window starting size Value avg = rootMoves[pvIdx].averageScore; - delta = Value(b1) + int(avg) * avg / b2; + delta = Value(9) + int(avg) * avg / 14847; alpha = std::max(avg - delta, -VALUE_INFINITE); beta = std::min(avg + delta, VALUE_INFINITE); // Adjust optimism based on root move's averageScore (~4 Elo) - optimism[us] = b3 * avg / (std::abs(avg) + b4); + optimism[us] = 121 * avg / (std::abs(avg) + 109); optimism[~us] = -optimism[us]; // Start with a small aspiration window and, in the case of a fail @@ -772,7 +746,7 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo // Use static evaluation difference to improve quiet move ordering (~4 Elo) if (is_ok((ss - 1)->currentMove) && !(ss - 1)->inCheck && !priorCapture) { - int bonus = std::clamp(-b5 * int((ss - 1)->staticEval + ss->staticEval), -b6, b7); + int bonus = std::clamp(-13 * int((ss - 1)->staticEval + ss->staticEval), -1555, 1452); thisThread->mainHistory[~us][from_to((ss - 1)->currentMove)] << bonus; if (type_of(pos.piece_on(prevSq)) != PAWN && type_of((ss - 1)->currentMove) != PROMOTION) thisThread->pawnHistory[pawn_structure(pos)][pos.piece_on(prevSq)][prevSq] << bonus / 4; @@ -791,7 +765,7 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo // If eval is really low check with qsearch if it can exceed alpha, if it can't, // return a fail low. // Adjust razor margin according to cutoffCnt. (~1 Elo) - if (eval < alpha - c1 - (c2 - c3 * ((ss + 1)->cutoffCnt > 3)) * depth * depth) + if (eval < alpha - 472 - (284 - 165 * ((ss + 1)->cutoffCnt > 3)) * depth * depth) { value = qsearch(pos, ss, alpha - 1, alpha); if (value < alpha) @@ -800,24 +774,24 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo // Step 8. Futility pruning: child node (~40 Elo) // The depth condition is important for mate finding. - if (!ss->ttPv && depth < c4 + if (!ss->ttPv && depth < 9 && eval - futility_margin(depth, cutNode && !ss->ttHit, improving) - - (ss - 1)->statScore / c5 + - (ss - 1)->statScore / 337 >= beta - && eval >= beta && eval < c6 // smaller than TB wins + && eval >= beta && eval < 29008 // smaller than TB wins && (!ttMove || ttCapture)) return (eval + beta) / 2; // Step 9. Null move search with verification search (~35 Elo) - if (!PvNode && (ss - 1)->currentMove != MOVE_NULL && (ss - 1)->statScore < c7 && eval >= beta - && eval >= ss->staticEval && ss->staticEval >= beta - c8 * depth + c9 && !excludedMove + if (!PvNode && (ss - 1)->currentMove != MOVE_NULL && (ss - 1)->statScore < 17496 && eval >= beta + && eval >= ss->staticEval && ss->staticEval >= beta - 23 * depth + 304 && !excludedMove && pos.non_pawn_material(us) && ss->ply >= thisThread->nmpMinPly && beta > VALUE_TB_LOSS_IN_MAX_PLY) { assert(eval - beta >= 0); // Null move dynamic reduction based on depth and eval - Depth R = std::min(int(eval - beta) / c10, 6) + depth / 3 + 4; + Depth R = std::min(int(eval - beta) / 144, 6) + depth / 3 + 4; ss->currentMove = MOVE_NULL; ss->continuationHistory = &thisThread->continuationHistory[0][0][NO_PIECE][0]; @@ -831,7 +805,7 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo // Do not return unproven mate or TB scores if (nullValue >= beta && nullValue < VALUE_TB_WIN_IN_MAX_PLY) { - if (thisThread->nmpMinPly || depth < c11) + if (thisThread->nmpMinPly || depth < 15) return nullValue; assert(!thisThread->nmpMinPly); // Recursive verification is not allowed @@ -861,10 +835,10 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo return qsearch(pos, ss, alpha, beta); // For cutNodes without a ttMove, we decrease depth by 2 if depth is high enough. - if (cutNode && depth >= c12 && !ttMove) + if (cutNode && depth >= 8 && !ttMove) depth -= 2; - probCutBeta = beta + c13 - c14 * improving; + probCutBeta = beta + 163 - 67 * improving; // Step 11. ProbCut (~10 Elo) // If we have a good enough capture (or queen promotion) and a reduced search returns a value @@ -922,7 +896,7 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo moves_loop: // When in check, search starts here // Step 12. A small Probcut idea, when we are in check (~4 Elo) - probCutBeta = beta + d1; + probCutBeta = beta + 425; if (ss->inCheck && !PvNode && ttCapture && (tte->bound() & BOUND_LOWER) && tte->depth() >= depth - 4 && ttValue >= probCutBeta && abs(ttValue) < VALUE_TB_WIN_IN_MAX_PLY && abs(beta) < VALUE_TB_WIN_IN_MAX_PLY) @@ -1005,18 +979,18 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo if (capture || givesCheck) { // Futility pruning for captures (~2 Elo) - if (!givesCheck && lmrDepth < d2 && !ss->inCheck) + if (!givesCheck && lmrDepth < 7 && !ss->inCheck) { Piece capturedPiece = pos.piece_on(to_sq(move)); int futilityEval = - ss->staticEval + d3 + d4 * lmrDepth + PieceValue[capturedPiece] + ss->staticEval + 238 + 305 * lmrDepth + PieceValue[capturedPiece] + captureHistory[movedPiece][to_sq(move)][type_of(capturedPiece)] / 7; if (futilityEval < alpha) continue; } // SEE based pruning for captures and checks (~11 Elo) - if (!pos.see_ge(move, Value(-d5) * depth)) + if (!pos.see_ge(move, Value(-187) * depth)) continue; } else @@ -1027,25 +1001,25 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo + thisThread->pawnHistory[pawn_structure(pos)][movedPiece][to_sq(move)]; // Continuation history based pruning (~2 Elo) - if (lmrDepth < d6 && history < -d7 * depth) + if (lmrDepth < 6 && history < -3752 * depth) continue; history += 2 * thisThread->mainHistory[us][from_to(move)]; - lmrDepth += history / d8; + lmrDepth += history / 7838; lmrDepth = std::max(lmrDepth, -1); // Futility pruning: parent node (~13 Elo) - if (!ss->inCheck && lmrDepth < d9 - && ss->staticEval + (bestValue < ss->staticEval - d10 ? d11 : d12) - + d13 * lmrDepth + if (!ss->inCheck && lmrDepth < 14 + && ss->staticEval + (bestValue < ss->staticEval - 57 ? 124 : 71) + + 118 * lmrDepth <= alpha) continue; lmrDepth = std::max(lmrDepth, 0); // Prune moves with negative SEE (~4 Elo) - if (!pos.see_ge(move, Value(-d14 * lmrDepth * lmrDepth))) + if (!pos.see_ge(move, Value(-26 * lmrDepth * lmrDepth))) continue; } } @@ -1065,11 +1039,11 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo // so changing them requires tests at these types of time controls. // Recursive singular search is avoided. if (!rootNode && move == ttMove && !excludedMove - && depth >= 4 - (thisThread->completedDepth > e1) + 2 * (PvNode && tte->is_pv()) + && depth >= 4 - (thisThread->completedDepth > 27) + 2 * (PvNode && tte->is_pv()) && abs(ttValue) < VALUE_TB_WIN_IN_MAX_PLY && (tte->bound() & BOUND_LOWER) && tte->depth() >= depth - 3) { - Value singularBeta = ttValue - (e2 + e3 * (ss->ttPv && !PvNode)) * depth / 64; + Value singularBeta = ttValue - (66 + 58 * (ss->ttPv && !PvNode)) * depth / 64; Depth singularDepth = newDepth / 2; ss->excludedMove = move; @@ -1083,10 +1057,10 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo singularQuietLMR = !ttCapture; // Avoid search explosion by limiting the number of double extensions - if (!PvNode && value < singularBeta - e4 && ss->doubleExtensions <= e5) + if (!PvNode && value < singularBeta - 17 && ss->doubleExtensions <= 11) { extension = 2; - depth += depth < e6; + depth += depth < 15; } } @@ -1110,7 +1084,7 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo // If we are on a cutNode but the ttMove is not assumed to fail high over current beta (~1 Elo) else if (cutNode) - extension = depth < e7 ? -2 : -1; + extension = depth < 19 ? -2 : -1; // If the ttMove is assumed to fail low over the value of the reduced search (~1 Elo) else if (ttValue <= value) @@ -1118,18 +1092,18 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo } // Check extensions (~1 Elo) - else if (givesCheck && depth > e8) + else if (givesCheck && depth > 10) extension = 1; // Quiet ttMove extensions (~1 Elo) else if (PvNode && move == ttMove && move == ss->killers[0] - && (*contHist[0])[movedPiece][to_sq(move)] >= e9) + && (*contHist[0])[movedPiece][to_sq(move)] >= 4325) extension = 1; // Recapture extensions (~1 Elo) else if (PvNode && move == ttMove && to_sq(move) == prevSq && captureHistory[movedPiece][to_sq(move)][type_of(pos.piece_on(to_sq(move)))] - > e10) + > 4146) extension = 1; } @@ -1153,7 +1127,7 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo r -= cutNode && tte->depth() >= depth ? 3 : 2; // Decrease reduction if opponent's move count is high (~1 Elo) - if ((ss - 1)->moveCount > e11) + if ((ss - 1)->moveCount > 7) r--; // Increase reduction for cut nodes (~3 Elo) @@ -1188,10 +1162,10 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo ss->statScore = 2 * thisThread->mainHistory[us][from_to(move)] + (*contHist[0])[movedPiece][to_sq(move)] + (*contHist[1])[movedPiece][to_sq(move)] - + (*contHist[3])[movedPiece][to_sq(move)] - e12; + + (*contHist[3])[movedPiece][to_sq(move)] - 3817; // Decrease/increase reduction for moves with a good/bad history (~25 Elo) - r -= ss->statScore / e13; + r -= ss->statScore / 14767; // Step 17. Late moves reduction / extension (LMR, ~117 Elo) // We use various heuristics for the sons of a node after the first son has @@ -1214,7 +1188,7 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo { // Adjust full-depth search based on LMR results - if the result // was good enough search deeper, if it was bad enough search shallower. - const bool doDeeperSearch = value > (bestValue + f1 + f2 * newDepth); // (~1 Elo) + const bool doDeeperSearch = value > (bestValue + 53 + 2 * newDepth); // (~1 Elo) const bool doShallowerSearch = value < bestValue + newDepth; // (~2 Elo) newDepth += doDeeperSearch - doShallowerSearch; @@ -1329,7 +1303,7 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo else { // Reduce other moves if we have found at least one score improvement (~2 Elo) - if (depth > f3 && depth < f4 && beta < f5 && value > -f6) + if (depth > 2 && depth < 12 && beta < 13782 && value > -11541) depth -= 2; assert(depth > 0); @@ -1368,8 +1342,8 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo // Bonus for prior countermove that caused the fail low else if (!priorCapture && prevSq != SQ_NONE) { - int bonus = (depth > f7) + (PvNode || cutNode) + (bestValue < alpha - f8) - + ((ss - 1)->moveCount > f9); + int bonus = (depth > 6) + (PvNode || cutNode) + (bestValue < alpha - 656) + + ((ss - 1)->moveCount > 10); update_continuation_histories(ss - 1, pos.piece_on(prevSq), prevSq, stat_bonus(depth) * bonus); thisThread->mainHistory[~us][from_to((ss - 1)->currentMove)] @@ -1501,7 +1475,7 @@ Value qsearch(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth) { if (bestValue > alpha) alpha = bestValue; - futilityBase = ss->staticEval + g1; + futilityBase = ss->staticEval + 182; } const PieceToHistory* contHist[] = {(ss - 1)->continuationHistory, @@ -1581,7 +1555,7 @@ Value qsearch(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth) { continue; // Do not search moves with bad enough SEE values (~5 Elo) - if (!pos.see_ge(move, Value(-g2))) + if (!pos.see_ge(move, Value(-77))) continue; } @@ -1717,7 +1691,7 @@ void update_all_stats(const Position& pos, if (!pos.capture_stage(bestMove)) { - int bestMoveBonus = bestValue > beta + g3 ? quietMoveBonus // larger bonus + int bestMoveBonus = bestValue > beta + 173 ? quietMoveBonus // larger bonus : stat_bonus(depth); // smaller bonus // Increase stats for the best move in case it was a quiet move @@ -1725,7 +1699,7 @@ void update_all_stats(const Position& pos, thisThread->pawnHistory[pawn_structure(pos)][moved_piece][to_sq(bestMove)] << quietMoveBonus; - int moveMalus = bestValue > beta + g4 ? quietMoveMalus // larger malus + int moveMalus = bestValue > beta + 165 ? quietMoveMalus // larger malus : stat_malus(depth); // smaller malus // Decrease stats for all non-best quiet moves