From bdf6b44a7b82026caca84cfb20ea415e653c8f7b Mon Sep 17 00:00:00 2001
From: mstembera <m_stembera@yahoo.com>
Date: Sat, 2 Dec 2023 17:50:32 -0800
Subject: [PATCH 1/7] Dual net NNUE bench: 1449578

---
 src/Makefile                        |   3 +
 src/evaluate.cpp                    | 167 +++++++++++++++-------------
 src/evaluate.h                      |   6 +-
 src/nnue/evaluate_nnue.cpp          | 127 +++++++++++++--------
 src/nnue/evaluate_nnue.h            |  19 ++--
 src/nnue/nnue_accumulator.h         |   4 +-
 src/nnue/nnue_architecture.h        |  33 ++++--
 src/nnue/nnue_feature_transformer.h | 108 ++++++++++--------
 src/position.cpp                    |  14 ++-
 src/position.h                      |  24 +++-
 src/thread.cpp                      |   2 +-
 src/uci.cpp                         |   2 +-
 src/ucioption.cpp                   |   3 +-
 13 files changed, 310 insertions(+), 202 deletions(-)
diff --git a/src/Makefile b/src/Makefile
index 59ea7bfe7b5..68aecc21168 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -905,6 +905,7 @@ profileclean:
 	@rm -f stockfish.res
 	@rm -f ./-lstdc++.res
 
+ifneq ("x","x")
 # set up shell variables for the net stuff
 netvariables:
 	$(eval nnuenet := $(shell grep EvalFileDefaultName evaluate.h | grep define | sed 's/.*\(nn-[a-z0-9]\{12\}.nnue\).*/\1/'))
@@ -951,6 +952,8 @@ net: netvariables
 		fi; \
 	fi; \
 
+endif
+
 format:
 	$(CLANG-FORMAT) -i $(SRCS) $(HEADERS) -style=file
 
diff --git a/src/evaluate.cpp b/src/evaluate.cpp
index 9c39d4c07fb..007275ee676 100644
--- a/src/evaluate.cpp
+++ b/src/evaluate.cpp
@@ -43,11 +43,15 @@
 //     const unsigned int         gEmbeddedNNUESize;    // the size of the embedded file
 // Note that this does not work in Microsoft Visual Studio.
 #if !defined(_MSC_VER) && !defined(NNUE_EMBEDDING_OFF)
-INCBIN(EmbeddedNNUE, EvalFileDefaultName);
+INCBIN(EmbeddedNNUEBig,   EvalFileDefaultNameBig);
+INCBIN(EmbeddedNNUESmall, EvalFileDefaultNameSmall);
 #else
-const unsigned char        gEmbeddedNNUEData[1] = {0x0};
-const unsigned char* const gEmbeddedNNUEEnd     = &gEmbeddedNNUEData[1];
-const unsigned int         gEmbeddedNNUESize    = 1;
+const unsigned char        gEmbeddedNNUEBigData[1]   = {0x0};
+const unsigned char* const gEmbeddedNNUEBigEnd       = &gEmbeddedNNUEBigData[1];
+const unsigned int         gEmbeddedNNUEBigSize      = 1;
+const unsigned char        gEmbeddedNNUESmallData[1] = {0x0};
+const unsigned char* const gEmbeddedNNUESmallEnd     = &gEmbeddedNNUESmallData[1];
+const unsigned int         gEmbeddedNNUESmallSize    = 1;
 #endif
 
 
@@ -55,7 +59,9 @@ namespace Stockfish {
 
 namespace Eval {
 
-std::string currentEvalFileName = "None";
+std::string currentEvalFileName[2] = {"None", "None"};
+const std::string EvFiles[2]       = {"EvalFileBig", "EvalFileSmall"};
+const std::string EvFileNames[2]   = {EvalFileDefaultNameBig, EvalFileDefaultNameSmall};
 
 // Tries to load a NNUE network at startup time, or when the engine
 // receives a UCI command "setoption name EvalFile value nn-[a-z0-9]{12}.nnue"
@@ -66,9 +72,11 @@ std::string currentEvalFileName = "None";
 // variable to have the engine search in a special directory in their distro.
 void NNUE::init() {
 
-    std::string eval_file = std::string(Options["EvalFile"]);
-    if (eval_file.empty())
-        eval_file = EvalFileDefaultName;
+    for (bool small : {false, true})
+    {
+        std::string eval_file = std::string(Options[EvFiles[small]]);
+        if (eval_file.empty())
+            eval_file = EvFileNames[small];
 
 #if defined(DEFAULT_NNUE_DIRECTORY)
     std::vector<std::string> dirs = {"<internal>", "", CommandLine::binaryDirectory,
@@ -77,82 +85,79 @@ void NNUE::init() {
     std::vector<std::string> dirs = {"<internal>", "", CommandLine::binaryDirectory};
 #endif
 
-    for (const std::string& directory : dirs)
-        if (currentEvalFileName != eval_file)
+        for (const std::string& directory : dirs)
         {
-            if (directory != "<internal>")
+            if (currentEvalFileName[small] != eval_file)
             {
-                std::ifstream stream(directory + eval_file, std::ios::binary);
-                if (NNUE::load_eval(eval_file, stream))
-                    currentEvalFileName = eval_file;
-            }
-
-            if (directory == "<internal>" && eval_file == EvalFileDefaultName)
-            {
-                // C++ way to prepare a buffer for a memory stream
-                class MemoryBuffer: public std::basic_streambuf<char> {
-                   public:
-                    MemoryBuffer(char* p, size_t n) {
-                        setg(p, p, p + n);
-                        setp(p, p + n);
-                    }
-                };
-
-                MemoryBuffer buffer(
-                  const_cast<char*>(reinterpret_cast<const char*>(gEmbeddedNNUEData)),
-                  size_t(gEmbeddedNNUESize));
-                (void) gEmbeddedNNUEEnd;  // Silence warning on unused variable
-
-                std::istream stream(&buffer);
-                if (NNUE::load_eval(eval_file, stream))
-                    currentEvalFileName = eval_file;
+                if (directory != "<internal>")
+                {
+                    std::ifstream stream(directory + eval_file, std::ios::binary);
+                    if (NNUE::load_eval(eval_file, stream, small))
+                        currentEvalFileName[small] = eval_file;
+                }
+
+                if (directory == "<internal>" && eval_file == EvFileNames[small])
+                {
+                    // C++ way to prepare a buffer for a memory stream
+                    class MemoryBuffer: public std::basic_streambuf<char> {
+                       public:
+                        MemoryBuffer(char* p, size_t n) {
+                            setg(p, p, p + n);
+                            setp(p, p + n);
+                        }
+                    };
+
+                    MemoryBuffer buffer(
+                      const_cast<char*>(reinterpret_cast<const char*>(
+                        small ? gEmbeddedNNUESmallData : gEmbeddedNNUEBigData)),
+                      size_t(small ? gEmbeddedNNUESmallSize : gEmbeddedNNUEBigSize));
+                    (void) gEmbeddedNNUEBigEnd;  // Silence warning on unused variable
+                    (void) gEmbeddedNNUESmallEnd;
+
+                    std::istream stream(&buffer);
+                    if (NNUE::load_eval(eval_file, stream, small))
+                        currentEvalFileName[small] = eval_file;
+                }
             }
         }
+    }
 }
 
 // Verifies that the last net used was loaded successfully
 void NNUE::verify() {
 
-    std::string eval_file = std::string(Options["EvalFile"]);
-    if (eval_file.empty())
-        eval_file = EvalFileDefaultName;
-
-    if (currentEvalFileName != eval_file)
+    for (bool small : {false, true})
     {
+        std::string eval_file = std::string(Options[EvFiles[small]]);
+        if (eval_file.empty())
+            eval_file = EvFileNames[small];
 
-        std::string msg1 =
-          "Network evaluation parameters compatible with the engine must be available.";
-        std::string msg2 = "The network file " + eval_file + " was not loaded successfully.";
-        std::string msg3 = "The UCI option EvalFile might need to specify the full path, "
-                           "including the directory name, to the network file.";
-        std::string msg4 = "The default net can be downloaded from: "
-                           "https://tests.stockfishchess.org/api/nn/"
-                         + std::string(EvalFileDefaultName);
-        std::string msg5 = "The engine will be terminated now.";
-
-        sync_cout << "info string ERROR: " << msg1 << sync_endl;
-        sync_cout << "info string ERROR: " << msg2 << sync_endl;
-        sync_cout << "info string ERROR: " << msg3 << sync_endl;
-        sync_cout << "info string ERROR: " << msg4 << sync_endl;
-        sync_cout << "info string ERROR: " << msg5 << sync_endl;
-
-        exit(EXIT_FAILURE);
-    }
+        if (currentEvalFileName[small] != eval_file)
+        {
+            std::string msg1 =
+              "Network evaluation parameters compatible with the engine must be available.";
+            std::string msg2 = "The network file " + eval_file + " was not loaded successfully.";
+            std::string msg3 = "The UCI option EvalFile might need to specify the full path, "
+                               "including the directory name, to the network file.";
+            std::string msg4 = "The default net can be downloaded from: "
+                               "https://tests.stockfishchess.org/api/nn/"
+                             + std::string(EvFileNames[small]);
+            std::string msg5 = "The engine will be terminated now.";
+
+            sync_cout << "info string ERROR: " << msg1 << sync_endl;
+            sync_cout << "info string ERROR: " << msg2 << sync_endl;
+            sync_cout << "info string ERROR: " << msg3 << sync_endl;
+            sync_cout << "info string ERROR: " << msg4 << sync_endl;
+            sync_cout << "info string ERROR: " << msg5 << sync_endl;
+
+            exit(EXIT_FAILURE);
+        }
 
-    sync_cout << "info string NNUE evaluation using " << eval_file << sync_endl;
-}
+        sync_cout << "info string NNUE evaluation using " << eval_file << sync_endl;
+    }
 }
-
-
-// Returns a static, purely materialistic evaluation of the position from
-// the point of view of the given color. It can be divided by PawnValue to get
-// an approximation of the material advantage on the board in terms of pawns.
-Value Eval::simple_eval(const Position& pos, Color c) {
-    return PawnValue * (pos.count<PAWN>(c) - pos.count<PAWN>(~c))
-         + (pos.non_pawn_material(c) - pos.non_pawn_material(~c));
 }
 
-
 // Evaluate is the evaluator for the outer world. It returns a static evaluation
 // of the position from the point of view of the side to move.
 Value Eval::evaluate(const Position& pos) {
@@ -162,18 +167,28 @@ Value Eval::evaluate(const Position& pos) {
     Value v;
     Color stm        = pos.side_to_move();
     int   shuffling  = pos.rule50_count();
-    int   simpleEval = simple_eval(pos, stm) + (int(pos.key() & 7) - 3);
+    int   simpleEval = pos.simple_eval() + (int(pos.key() & 7) - 3);
+
+    int lazyThreshold = RookValue + KnightValue + 16 * shuffling * shuffling
+                                  + abs(pos.this_thread()->bestValue)
+                                  + abs(pos.this_thread()->rootSimpleEval);
 
-    bool lazy = abs(simpleEval) >= RookValue + KnightValue + 16 * shuffling * shuffling
-                                     + abs(pos.this_thread()->bestValue)
-                                     + abs(pos.this_thread()->rootSimpleEval);
+    bool lazy = abs(simpleEval) > lazyThreshold * 105 / 100;
 
     if (lazy)
         v = Value(simpleEval);
     else
     {
-        int   nnueComplexity;
-        Value nnue = NNUE::evaluate(pos, true, &nnueComplexity);
+        int accBias = pos.state()->accumulatorBig.computed[0]
+                    + pos.state()->accumulatorBig.computed[1]
+                    - pos.state()->accumulatorSmall.computed[0]
+                    - pos.state()->accumulatorSmall.computed[1];
+
+        int  nnueComplexity;
+        bool smallNet = abs(simpleEval) > lazyThreshold * (90 + accBias) / 100;
+
+        Value nnue = smallNet ? NNUE::evaluate<true>(pos, true, &nnueComplexity)
+                              : NNUE::evaluate<false>(pos, true, &nnueComplexity);
 
         Value optimism = pos.this_thread()->optimism[stm];
 
@@ -216,7 +231,7 @@ std::string Eval::trace(Position& pos) {
     ss << std::showpoint << std::showpos << std::fixed << std::setprecision(2) << std::setw(15);
 
     Value v;
-    v = NNUE::evaluate(pos, false);
+    v = NNUE::evaluate<false>(pos, false);
     v = pos.side_to_move() == WHITE ? v : -v;
     ss << "NNUE evaluation        " << 0.01 * UCI::to_cp(v) << " (white side)\n";
 
diff --git a/src/evaluate.h b/src/evaluate.h
index 2ab477eced2..48ca596bc05 100644
--- a/src/evaluate.h
+++ b/src/evaluate.h
@@ -31,15 +31,15 @@ namespace Eval {
 
 std::string trace(Position& pos);
 
-Value simple_eval(const Position& pos, Color c);
 Value evaluate(const Position& pos);
 
-extern std::string currentEvalFileName;
+extern std::string currentEvalFileName[2];
 
 // The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue
 // for the build process (profile-build and fishtest) to work. Do not change the
 // name of the macro, as it is used in the Makefile.
-#define EvalFileDefaultName "nn-0000000000a0.nnue"
+#define EvalFileDefaultNameBig   "nn-0000000000a0.nnue"
+#define EvalFileDefaultNameSmall "nn-a70fe1969e12.nnue"
 
 namespace NNUE {
 
diff --git a/src/nnue/evaluate_nnue.cpp b/src/nnue/evaluate_nnue.cpp
index ef6b7e91a60..26ce4bdcde2 100644
--- a/src/nnue/evaluate_nnue.cpp
+++ b/src/nnue/evaluate_nnue.cpp
@@ -40,14 +40,16 @@
 namespace Stockfish::Eval::NNUE {
 
 // Input feature converter
-LargePagePtr<FeatureTransformer> featureTransformer;
+LargePagePtr<FeatureTransformer<TransformedFeatureDimensionsBig>>   featureTransformerBig;
+LargePagePtr<FeatureTransformer<TransformedFeatureDimensionsSmall>> featureTransformerSmall;
 
 // Evaluation function
-AlignedPtr<Network> network[LayerStacks];
+AlignedPtr<Network<TransformedFeatureDimensionsBig,   L2Big,   L3Big>>   networkBig[LayerStacks];
+AlignedPtr<Network<TransformedFeatureDimensionsSmall, L2Small, L3Small>> networkSmall[LayerStacks];
 
-// Evaluation function file name
-std::string fileName;
-std::string netDescription;
+// Evaluation function file names
+std::string fileName[2];
+std::string netDescription[2];
 
 namespace Detail {
 
@@ -91,11 +93,20 @@ bool write_parameters(std::ostream& stream, const T& reference) {
 
 
 // Initialize the evaluation function parameters
-static void initialize() {
+static void initialize(bool small) {
 
-    Detail::initialize(featureTransformer);
-    for (std::size_t i = 0; i < LayerStacks; ++i)
-        Detail::initialize(network[i]);
+    if (small)
+    {
+        Detail::initialize(featureTransformerSmall);
+        for (std::size_t i = 0; i < LayerStacks; ++i)
+            Detail::initialize(networkSmall[i]);
+    }
+    else
+    {
+        Detail::initialize(featureTransformerBig);
+        for (std::size_t i = 0; i < LayerStacks; ++i)
+            Detail::initialize(networkBig[i]);
+    }
 }
 
 // Read network header
@@ -122,39 +133,57 @@ static bool write_header(std::ostream& stream, std::uint32_t hashValue, const st
 }
 
 // Read network parameters
-static bool read_parameters(std::istream& stream) {
+static bool read_parameters(std::istream& stream, bool small) {
 
     std::uint32_t hashValue;
-    if (!read_header(stream, &hashValue, &netDescription))
+    if (!read_header(stream, &hashValue, &netDescription[small]))
+        return false;
+    if (hashValue != HashValue[small])
         return false;
-    if (hashValue != HashValue)
+    if (!small && !Detail::read_parameters(stream, *featureTransformerBig))
         return false;
-    if (!Detail::read_parameters(stream, *featureTransformer))
+    if ( small && !Detail::read_parameters(stream, *featureTransformerSmall))
         return false;
     for (std::size_t i = 0; i < LayerStacks; ++i)
-        if (!Detail::read_parameters(stream, *(network[i])))
+    {
+        if (!small && !Detail::read_parameters(stream, *(networkBig[i])))
+            return false;
+        if ( small && !Detail::read_parameters(stream, *(networkSmall[i])))
             return false;
+    }
     return stream && stream.peek() == std::ios::traits_type::eof();
 }
 
 // Write network parameters
-static bool write_parameters(std::ostream& stream) {
+static bool write_parameters(std::ostream& stream, bool small) {
 
-    if (!write_header(stream, HashValue, netDescription))
+    if (!write_header(stream, HashValue[small], netDescription[small]))
+        return false;
+    if (!small && !Detail::write_parameters(stream, *featureTransformerBig))
         return false;
-    if (!Detail::write_parameters(stream, *featureTransformer))
+    if (small && !Detail::write_parameters(stream, *featureTransformerSmall))
         return false;
     for (std::size_t i = 0; i < LayerStacks; ++i)
-        if (!Detail::write_parameters(stream, *(network[i])))
+    {
+        if (!small && !Detail::write_parameters(stream, *(networkBig[i])))
             return false;
+        if (small && !Detail::write_parameters(stream, *(networkSmall[i])))
+            return false;
+    }
     return bool(stream);
 }
 
 void hint_common_parent_position(const Position& pos) {
-    featureTransformer->hint_common_access(pos);
+
+    int simpleEval = pos.simple_eval();
+    if (abs(simpleEval) < 2500)
+        featureTransformerBig->hint_common_access(pos);
+    else
+        featureTransformerSmall->hint_common_access(pos);
 }
 
 // Evaluation function. Perform differential calculation.
+template<bool Small>
 Value evaluate(const Position& pos, bool adjusted, int* complexity) {
 
     // We manually align the arrays on the stack because with gcc < 9.3
@@ -165,19 +194,24 @@ Value evaluate(const Position& pos, bool adjusted, int* complexity) {
 
 #if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN)
     TransformedFeatureType
-      transformedFeaturesUnaligned[FeatureTransformer::BufferSize
-                                   + alignment / sizeof(TransformedFeatureType)];
+      transformedFeaturesUnaligned[
+            FeatureTransformer<Small ? TransformedFeatureDimensionsSmall : TransformedFeatureDimensionsBig>::BufferSize
+          + alignment / sizeof(TransformedFeatureType)];
 
     auto* transformedFeatures = align_ptr_up<alignment>(&transformedFeaturesUnaligned[0]);
 #else
-    alignas(alignment) TransformedFeatureType transformedFeatures[FeatureTransformer::BufferSize];
+        
+    alignas(alignment) TransformedFeatureType transformedFeatures[
+        FeatureTransformer<Small ? TransformedFeatureDimensionsSmall : TransformedFeatureDimensionsBig>::BufferSize];
 #endif
 
     ASSERT_ALIGNED(transformedFeatures, alignment);
 
     const int  bucket     = (pos.count<ALL_PIECES>() - 1) / 4;
-    const auto psqt       = featureTransformer->transform(pos, transformedFeatures, bucket);
-    const auto positional = network[bucket]->propagate(transformedFeatures);
+    const auto psqt       = Small ? featureTransformerSmall->transform(pos, transformedFeatures, bucket)
+                                  : featureTransformerBig->transform(pos, transformedFeatures, bucket);
+    const auto positional = Small ? networkSmall[bucket]->propagate(transformedFeatures)
+                                  : networkBig[bucket]->propagate(transformedFeatures);
 
     if (complexity)
         *complexity = abs(psqt - positional) / OutputScale;
@@ -190,6 +224,9 @@ Value evaluate(const Position& pos, bool adjusted, int* complexity) {
         return static_cast<Value>((psqt + positional) / OutputScale);
 }
 
+template Value evaluate<false>(const Position& pos, bool adjusted, int* complexity);
+template Value evaluate<true>(const Position& pos, bool adjusted, int* complexity);
+
 struct NnueEvalTrace {
     static_assert(LayerStacks == PSQTBuckets);
 
@@ -206,12 +243,12 @@ static NnueEvalTrace trace_evaluate(const Position& pos) {
 
 #if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN)
     TransformedFeatureType
-      transformedFeaturesUnaligned[FeatureTransformer::BufferSize
+      transformedFeaturesUnaligned[FeatureTransformer<TransformedFeatureDimensionsBig>::BufferSize
                                    + alignment / sizeof(TransformedFeatureType)];
 
     auto* transformedFeatures = align_ptr_up<alignment>(&transformedFeaturesUnaligned[0]);
 #else
-    alignas(alignment) TransformedFeatureType transformedFeatures[FeatureTransformer::BufferSize];
+    alignas(alignment) TransformedFeatureType transformedFeatures[FeatureTransformer<TransformedFeatureDimensionsBig>::BufferSize];
 #endif
 
     ASSERT_ALIGNED(transformedFeatures, alignment);
@@ -220,8 +257,8 @@ static NnueEvalTrace trace_evaluate(const Position& pos) {
     t.correctBucket = (pos.count<ALL_PIECES>() - 1) / 4;
     for (IndexType bucket = 0; bucket < LayerStacks; ++bucket)
     {
-        const auto materialist = featureTransformer->transform(pos, transformedFeatures, bucket);
-        const auto positional  = network[bucket]->propagate(transformedFeatures);
+        const auto materialist = featureTransformerBig->transform(pos, transformedFeatures, bucket);
+        const auto positional  = networkBig[bucket]->propagate(transformedFeatures);
 
         t.psqt[bucket]       = static_cast<Value>(materialist / OutputScale);
         t.positional[bucket] = static_cast<Value>(positional / OutputScale);
@@ -310,7 +347,7 @@ std::string trace(Position& pos) {
 
     // We estimate the value of each piece by doing a differential evaluation from
     // the current base eval, simulating the removal of the piece from its square.
-    Value base = evaluate(pos);
+    Value base = evaluate<false>(pos);
     base       = pos.side_to_move() == WHITE ? base : -base;
 
     for (File f = FILE_A; f <= FILE_H; ++f)
@@ -325,16 +362,16 @@ std::string trace(Position& pos) {
                 auto st = pos.state();
 
                 pos.remove_piece(sq);
-                st->accumulator.computed[WHITE] = false;
-                st->accumulator.computed[BLACK] = false;
+                st->accumulatorBig.computed[WHITE] = false;
+                st->accumulatorBig.computed[BLACK] = false;
 
-                Value eval = evaluate(pos);
+                Value eval = evaluate<false>(pos);
                 eval       = pos.side_to_move() == WHITE ? eval : -eval;
                 v          = base - eval;
 
                 pos.put_piece(pc, sq);
-                st->accumulator.computed[WHITE] = false;
-                st->accumulator.computed[BLACK] = false;
+                st->accumulatorBig.computed[WHITE] = false;
+                st->accumulatorBig.computed[BLACK] = false;
             }
 
             writeSquare(f, r, pc, v);
@@ -379,24 +416,24 @@ std::string trace(Position& pos) {
 
 
 // Load eval, from a file stream or a memory stream
-bool load_eval(std::string name, std::istream& stream) {
+bool load_eval(const std::string name, std::istream& stream, bool small) {
 
-    initialize();
-    fileName = name;
-    return read_parameters(stream);
+    initialize(small);
+    fileName[small] = name;
+    return read_parameters(stream, small);
 }
 
 // Save eval, to a file stream or a memory stream
-bool save_eval(std::ostream& stream) {
+bool save_eval(std::ostream& stream, bool small) {
 
-    if (fileName.empty())
+    if (fileName[small].empty())
         return false;
 
-    return write_parameters(stream);
+    return write_parameters(stream, small);
 }
 
 // Save eval, to a file given by its name
-bool save_eval(const std::optional<std::string>& filename) {
+bool save_eval(const std::optional<std::string>& filename, bool small) {
 
     std::string actualFilename;
     std::string msg;
@@ -405,7 +442,7 @@ bool save_eval(const std::optional<std::string>& filename) {
         actualFilename = filename.value();
     else
     {
-        if (currentEvalFileName != EvalFileDefaultName)
+        if (currentEvalFileName[small] != (small ? EvalFileDefaultNameSmall : EvalFileDefaultNameBig))
         {
             msg = "Failed to export a net. "
                   "A non-embedded net can only be saved if the filename is specified";
@@ -413,11 +450,11 @@ bool save_eval(const std::optional<std::string>& filename) {
             sync_cout << msg << sync_endl;
             return false;
         }
-        actualFilename = EvalFileDefaultName;
+        actualFilename = (small ? EvalFileDefaultNameSmall : EvalFileDefaultNameBig);
     }
 
     std::ofstream stream(actualFilename, std::ios_base::binary);
-    bool          saved = save_eval(stream);
+    bool          saved = save_eval(stream, small);
 
     msg = saved ? "Network saved successfully to " + actualFilename : "Failed to export a net";
 
diff --git a/src/nnue/evaluate_nnue.h b/src/nnue/evaluate_nnue.h
index 6edc212f4d7..e5367283e14 100644
--- a/src/nnue/evaluate_nnue.h
+++ b/src/nnue/evaluate_nnue.h
@@ -39,9 +39,11 @@ enum Value : int;
 namespace Stockfish::Eval::NNUE {
 
 // Hash value of evaluation function structure
-constexpr std::uint32_t HashValue =
-  FeatureTransformer::get_hash_value() ^ Network::get_hash_value();
-
+constexpr std::uint32_t HashValue[2] =
+  {   FeatureTransformer<TransformedFeatureDimensionsBig>::get_hash_value()
+    ^ Network<TransformedFeatureDimensionsBig, L2Big, L3Big>::get_hash_value(),
+      FeatureTransformer<TransformedFeatureDimensionsSmall>::get_hash_value()
+    ^ Network<TransformedFeatureDimensionsSmall, L2Small, L3Small>::get_hash_value() };
 
 // Deleter for automating release of memory area
 template<typename T>
@@ -67,12 +69,13 @@ template<typename T>
 using LargePagePtr = std::unique_ptr<T, LargePageDeleter<T>>;
 
 std::string trace(Position& pos);
-Value       evaluate(const Position& pos, bool adjusted = false, int* complexity = nullptr);
-void        hint_common_parent_position(const Position& pos);
+template<bool Small>
+Value evaluate(const Position& pos, bool adjusted = false, int* complexity = nullptr);
+void  hint_common_parent_position(const Position& pos);
 
-bool load_eval(std::string name, std::istream& stream);
-bool save_eval(std::ostream& stream);
-bool save_eval(const std::optional<std::string>& filename);
+bool load_eval(const std::string name, std::istream& stream, bool small);
+bool save_eval(std::ostream& stream, bool small);
+bool save_eval(const std::optional<std::string>& filename, bool small);
 
 }  // namespace Stockfish::Eval::NNUE
 
diff --git a/src/nnue/nnue_accumulator.h b/src/nnue/nnue_accumulator.h
index 2f1b1d35e52..6d45bd40310 100644
--- a/src/nnue/nnue_accumulator.h
+++ b/src/nnue/nnue_accumulator.h
@@ -29,8 +29,10 @@
 namespace Stockfish::Eval::NNUE {
 
 // Class that holds the result of affine transformation of input features
+template<bool Small>
 struct alignas(CacheLineSize) Accumulator {
-    std::int16_t accumulation[2][TransformedFeatureDimensions];
+    std::int16_t accumulation[2][Small ? TransformedFeatureDimensionsSmall
+                                       : TransformedFeatureDimensionsBig];
     std::int32_t psqtAccumulation[2][PSQTBuckets];
     bool         computed[2];
 };
diff --git a/src/nnue/nnue_architecture.h b/src/nnue/nnue_architecture.h
index e4c308cb267..adfeaf045eb 100644
--- a/src/nnue/nnue_architecture.h
+++ b/src/nnue/nnue_architecture.h
@@ -38,13 +38,22 @@ namespace Stockfish::Eval::NNUE {
 using FeatureSet = Features::HalfKAv2_hm;
 
 // Number of input feature dimensions after conversion
-constexpr IndexType TransformedFeatureDimensions = 2560;
-constexpr IndexType PSQTBuckets                  = 8;
-constexpr IndexType LayerStacks                  = 8;
+constexpr IndexType TransformedFeatureDimensionsBig   = 2560;
+constexpr int       L2Big                             = 15;
+constexpr int       L3Big                             = 32;
 
+constexpr IndexType TransformedFeatureDimensionsSmall = 1024;
+constexpr int       L2Small                           = 15;
+constexpr int       L3Small                           = 32;
+
+constexpr IndexType PSQTBuckets                       = 8;
+constexpr IndexType LayerStacks                       = 8;
+
+template<IndexType L1, int L2, int L3>
 struct Network {
-    static constexpr int FC_0_OUTPUTS = 15;
-    static constexpr int FC_1_OUTPUTS = 32;
+    static constexpr IndexType TransformedFeatureDimensions = L1;
+    static constexpr int FC_0_OUTPUTS = L2;
+    static constexpr int FC_1_OUTPUTS = L3;
 
     Layers::AffineTransformSparseInput<TransformedFeatureDimensions, FC_0_OUTPUTS + 1> fc_0;
     Layers::SqrClippedReLU<FC_0_OUTPUTS + 1>                                           ac_sqr_0;
@@ -84,13 +93,13 @@ struct Network {
 
     std::int32_t propagate(const TransformedFeatureType* transformedFeatures) {
         struct alignas(CacheLineSize) Buffer {
-            alignas(CacheLineSize) decltype(fc_0)::OutputBuffer fc_0_out;
-            alignas(CacheLineSize) decltype(ac_sqr_0)::OutputType
+            alignas(CacheLineSize) typename decltype(fc_0)::OutputBuffer fc_0_out;
+            alignas(CacheLineSize) typename decltype(ac_sqr_0)::OutputType
               ac_sqr_0_out[ceil_to_multiple<IndexType>(FC_0_OUTPUTS * 2, 32)];
-            alignas(CacheLineSize) decltype(ac_0)::OutputBuffer ac_0_out;
-            alignas(CacheLineSize) decltype(fc_1)::OutputBuffer fc_1_out;
-            alignas(CacheLineSize) decltype(ac_1)::OutputBuffer ac_1_out;
-            alignas(CacheLineSize) decltype(fc_2)::OutputBuffer fc_2_out;
+            alignas(CacheLineSize) typename decltype(ac_0)::OutputBuffer ac_0_out;
+            alignas(CacheLineSize) typename decltype(fc_1)::OutputBuffer fc_1_out;
+            alignas(CacheLineSize) typename decltype(ac_1)::OutputBuffer ac_1_out;
+            alignas(CacheLineSize) typename decltype(fc_2)::OutputBuffer fc_2_out;
 
             Buffer() { std::memset(this, 0, sizeof(*this)); }
         };
@@ -108,7 +117,7 @@ struct Network {
         ac_sqr_0.propagate(buffer.fc_0_out, buffer.ac_sqr_0_out);
         ac_0.propagate(buffer.fc_0_out, buffer.ac_0_out);
         std::memcpy(buffer.ac_sqr_0_out + FC_0_OUTPUTS, buffer.ac_0_out,
-                    FC_0_OUTPUTS * sizeof(decltype(ac_0)::OutputType));
+                    FC_0_OUTPUTS * sizeof(typename decltype(ac_0)::OutputType));
         fc_1.propagate(buffer.ac_sqr_0_out, buffer.fc_1_out);
         ac_1.propagate(buffer.fc_1_out, buffer.ac_1_out);
         fc_2.propagate(buffer.ac_1_out, buffer.fc_2_out);
diff --git a/src/nnue/nnue_feature_transformer.h b/src/nnue/nnue_feature_transformer.h
index 2af80f07792..8154b2f0a1f 100644
--- a/src/nnue/nnue_feature_transformer.h
+++ b/src/nnue/nnue_feature_transformer.h
@@ -186,11 +186,6 @@ static constexpr int BestRegisterCount() {
 
     return 1;
 }
-
-static constexpr int NumRegs =
-  BestRegisterCount<vec_t, WeightType, TransformedFeatureDimensions, NumRegistersSIMD>();
-static constexpr int NumPsqtRegs =
-  BestRegisterCount<psqt_vec_t, PSQTWeightType, PSQTBuckets, NumRegistersSIMD>();
     #if defined(__GNUC__)
         #pragma GCC diagnostic pop
     #endif
@@ -198,13 +193,21 @@ static constexpr int NumPsqtRegs =
 
 
 // Input feature converter
+template<IndexType TransformedFeatureDimensions>
 class FeatureTransformer {
 
    private:
+    static constexpr bool Small = TransformedFeatureDimensions == TransformedFeatureDimensionsSmall;
+
     // Number of output dimensions for one side
     static constexpr IndexType HalfDimensions = TransformedFeatureDimensions;
 
 #ifdef VECTOR
+    static constexpr int NumRegs =
+      BestRegisterCount<vec_t, WeightType, TransformedFeatureDimensions, NumRegistersSIMD>();
+    static constexpr int NumPsqtRegs =
+      BestRegisterCount<psqt_vec_t, PSQTWeightType, PSQTBuckets, NumRegistersSIMD>();
+
     static constexpr IndexType TileHeight     = NumRegs * sizeof(vec_t) / 2;
     static constexpr IndexType PsqtTileHeight = NumPsqtRegs * sizeof(psqt_vec_t) / 4;
     static_assert(HalfDimensions % TileHeight == 0, "TileHeight must divide HalfDimensions");
@@ -247,14 +250,22 @@ class FeatureTransformer {
         return !stream.fail();
     }
 
+    // Cast a pointer to a 2 dimensional array of width D
+    template<int D, typename T>
+    static constexpr T (*cast_2D(T* pt))[D] {
+        return (T(*)[D])pt;
+    }
+
     // Convert input features
     std::int32_t transform(const Position& pos, OutputType* output, int bucket) const {
         update_accumulator<WHITE>(pos);
         update_accumulator<BLACK>(pos);
 
         const Color perspectives[2]  = {pos.side_to_move(), ~pos.side_to_move()};
-        const auto& accumulation     = pos.state()->accumulator.accumulation;
-        const auto& psqtAccumulation = pos.state()->accumulator.psqtAccumulation;
+        const auto& accumulation =
+          cast_2D<TransformedFeatureDimensions>(pos.state()->template accumulation<Small>());
+        const auto& psqtAccumulation =
+          cast_2D<PSQTBuckets>(pos.state()->template psqt_accumulation<Small>());
 
         const auto psqt =
           (psqtAccumulation[perspectives[0]][bucket] - psqtAccumulation[perspectives[1]][bucket])
@@ -323,7 +334,7 @@ class FeatureTransformer {
         // of the estimated gain in terms of features to be added/subtracted.
         StateInfo *st = pos.state(), *next = nullptr;
         int        gain = FeatureSet::refresh_cost(pos);
-        while (st->previous && !st->accumulator.computed[Perspective])
+        while (st->previous && !st->template computed<Small>()[Perspective])
         {
             // This governs when a full feature refresh is needed and how many
             // updates are better than just one full refresh.
@@ -381,7 +392,7 @@ class FeatureTransformer {
 
             for (; i >= 0; --i)
             {
-                states_to_update[i]->accumulator.computed[Perspective] = true;
+                states_to_update[i]->template computed<Small>()[Perspective] = true;
 
                 const StateInfo* end_state = i == 0 ? computed_st : states_to_update[i - 1];
 
@@ -401,10 +412,10 @@ class FeatureTransformer {
         {
             assert(states_to_update[0]);
 
-            auto accIn =
-              reinterpret_cast<const vec_t*>(&st->accumulator.accumulation[Perspective][0]);
-            auto accOut = reinterpret_cast<vec_t*>(
-              &states_to_update[0]->accumulator.accumulation[Perspective][0]);
+            auto accIn  = reinterpret_cast<const vec_t*>(&cast_2D<TransformedFeatureDimensions>(
+              st->template accumulation<Small>())[Perspective][0]);
+            auto accOut = reinterpret_cast<vec_t*>(&cast_2D<TransformedFeatureDimensions>(
+              states_to_update[0]->template accumulation<Small>())[Perspective][0]);
 
             const IndexType offsetR0 = HalfDimensions * removed[0][0];
             auto            columnR0 = reinterpret_cast<const vec_t*>(&weights[offsetR0]);
@@ -428,10 +439,10 @@ class FeatureTransformer {
                                            vec_add_16(columnR0[k], columnR1[k]));
             }
 
-            auto accPsqtIn = reinterpret_cast<const psqt_vec_t*>(
-              &st->accumulator.psqtAccumulation[Perspective][0]);
-            auto accPsqtOut = reinterpret_cast<psqt_vec_t*>(
-              &states_to_update[0]->accumulator.psqtAccumulation[Perspective][0]);
+            auto accPsqtIn = reinterpret_cast<const psqt_vec_t*>(&cast_2D<PSQTBuckets>(
+              st->template psqt_accumulation<Small>())[Perspective][0]);
+            auto accPsqtOut = reinterpret_cast<psqt_vec_t*>(&cast_2D<PSQTBuckets>(
+              states_to_update[0]->template psqt_accumulation<Small>())[Perspective][0]);
 
             const IndexType offsetPsqtR0 = PSQTBuckets * removed[0][0];
             auto columnPsqtR0 = reinterpret_cast<const psqt_vec_t*>(&psqtWeights[offsetPsqtR0]);
@@ -462,8 +473,8 @@ class FeatureTransformer {
             for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j)
             {
                 // Load accumulator
-                auto accTileIn = reinterpret_cast<const vec_t*>(
-                  &st->accumulator.accumulation[Perspective][j * TileHeight]);
+                auto accTileIn = reinterpret_cast<const vec_t*>(&cast_2D<TransformedFeatureDimensions>(
+                  st->template accumulation<Small>())[Perspective][j * TileHeight]);
                 for (IndexType k = 0; k < NumRegs; ++k)
                     acc[k] = vec_load(&accTileIn[k]);
 
@@ -488,8 +499,8 @@ class FeatureTransformer {
                     }
 
                     // Store accumulator
-                    auto accTileOut = reinterpret_cast<vec_t*>(
-                      &states_to_update[i]->accumulator.accumulation[Perspective][j * TileHeight]);
+                    auto accTileOut = reinterpret_cast<vec_t*>(&cast_2D<TransformedFeatureDimensions>(
+                      states_to_update[i]->template accumulation<Small>())[Perspective][j * TileHeight]);
                     for (IndexType k = 0; k < NumRegs; ++k)
                         vec_store(&accTileOut[k], acc[k]);
                 }
@@ -498,8 +509,8 @@ class FeatureTransformer {
             for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j)
             {
                 // Load accumulator
-                auto accTilePsqtIn = reinterpret_cast<const psqt_vec_t*>(
-                  &st->accumulator.psqtAccumulation[Perspective][j * PsqtTileHeight]);
+                auto accTilePsqtIn = reinterpret_cast<const psqt_vec_t*>(&cast_2D<PSQTBuckets>(
+                  st->template psqt_accumulation<Small>())[Perspective][j * PsqtTileHeight]);
                 for (std::size_t k = 0; k < NumPsqtRegs; ++k)
                     psqt[k] = vec_load_psqt(&accTilePsqtIn[k]);
 
@@ -524,9 +535,8 @@ class FeatureTransformer {
                     }
 
                     // Store accumulator
-                    auto accTilePsqtOut = reinterpret_cast<psqt_vec_t*>(
-                      &states_to_update[i]
-                         ->accumulator.psqtAccumulation[Perspective][j * PsqtTileHeight]);
+                    auto accTilePsqtOut = reinterpret_cast<psqt_vec_t*>(&cast_2D<PSQTBuckets>(
+                      states_to_update[i]->template psqt_accumulation<Small>())[Perspective][j * PsqtTileHeight]);
                     for (std::size_t k = 0; k < NumPsqtRegs; ++k)
                         vec_store_psqt(&accTilePsqtOut[k], psqt[k]);
                 }
@@ -535,13 +545,13 @@ class FeatureTransformer {
 #else
         for (IndexType i = 0; states_to_update[i]; ++i)
         {
-            std::memcpy(states_to_update[i]->accumulator.accumulation[Perspective],
-                        st->accumulator.accumulation[Perspective],
+            std::memcpy(cast_2D<TransformedFeatureDimensions>(states_to_update[i]->template accumulation<Small>())[Perspective],
+                        cast_2D<TransformedFeatureDimensions>(st->template accumulation<Small>())[Perspective],
                         HalfDimensions * sizeof(BiasType));
 
             for (std::size_t k = 0; k < PSQTBuckets; ++k)
-                states_to_update[i]->accumulator.psqtAccumulation[Perspective][k] =
-                  st->accumulator.psqtAccumulation[Perspective][k];
+                cast_2D<PSQTBuckets>(states_to_update[i]->template psqt_accumulation<Small>())[Perspective][k] =
+                  cast_2D<PSQTBuckets>(st->template psqt_accumulation<Small>())[Perspective][k];
 
             st = states_to_update[i];
 
@@ -551,10 +561,11 @@ class FeatureTransformer {
                 const IndexType offset = HalfDimensions * index;
 
                 for (IndexType j = 0; j < HalfDimensions; ++j)
-                    st->accumulator.accumulation[Perspective][j] -= weights[offset + j];
+                    cast_2D<TransformedFeatureDimensions>(
+                      st->template accumulation<Small>())[Perspective][j] -= weights[offset + j];
 
                 for (std::size_t k = 0; k < PSQTBuckets; ++k)
-                    st->accumulator.psqtAccumulation[Perspective][k] -=
+                    cast_2D<PSQTBuckets>(st->template psqt_accumulation<Small>())[Perspective][k] -=
                       psqtWeights[index * PSQTBuckets + k];
             }
 
@@ -564,10 +575,11 @@ class FeatureTransformer {
                 const IndexType offset = HalfDimensions * index;
 
                 for (IndexType j = 0; j < HalfDimensions; ++j)
-                    st->accumulator.accumulation[Perspective][j] += weights[offset + j];
+                    cast_2D<TransformedFeatureDimensions>(
+                      st->template accumulation<Small>())[Perspective][j] += weights[offset + j];
 
                 for (std::size_t k = 0; k < PSQTBuckets; ++k)
-                    st->accumulator.psqtAccumulation[Perspective][k] +=
+                    cast_2D<PSQTBuckets>(st->template psqt_accumulation<Small>())[Perspective][k] +=
                       psqtWeights[index * PSQTBuckets + k];
             }
         }
@@ -586,8 +598,8 @@ class FeatureTransformer {
         // Refresh the accumulator
         // Could be extracted to a separate function because it's done in 2 places,
         // but it's unclear if compilers would correctly handle register allocation.
-        auto& accumulator                 = pos.state()->accumulator;
-        accumulator.computed[Perspective] = true;
+        StateInfo* st = pos.state();
+        st->template computed<Small>()[Perspective] = true;
         FeatureSet::IndexList active;
         FeatureSet::append_active_indices<Perspective>(pos, active);
 
@@ -607,8 +619,8 @@ class FeatureTransformer {
                     acc[k] = vec_add_16(acc[k], column[k]);
             }
 
-            auto accTile =
-              reinterpret_cast<vec_t*>(&accumulator.accumulation[Perspective][j * TileHeight]);
+            auto accTile = reinterpret_cast<vec_t*>(&cast_2D<TransformedFeatureDimensions>(
+              st->template accumulation<Small>())[Perspective][j * TileHeight]);
             for (unsigned k = 0; k < NumRegs; k++)
                 vec_store(&accTile[k], acc[k]);
         }
@@ -627,28 +639,30 @@ class FeatureTransformer {
                     psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]);
             }
 
-            auto accTilePsqt = reinterpret_cast<psqt_vec_t*>(
-              &accumulator.psqtAccumulation[Perspective][j * PsqtTileHeight]);
+            auto accTilePsqt = reinterpret_cast<psqt_vec_t*>(&cast_2D<PSQTBuckets>(
+              st->template psqt_accumulation<Small>())[Perspective][j * PsqtTileHeight]);
             for (std::size_t k = 0; k < NumPsqtRegs; ++k)
                 vec_store_psqt(&accTilePsqt[k], psqt[k]);
         }
 
 #else
-        std::memcpy(accumulator.accumulation[Perspective], biases,
+        std::memcpy(cast_2D<TransformedFeatureDimensions>(st->template accumulation<Small>())[Perspective],
+                    biases,
                     HalfDimensions * sizeof(BiasType));
 
         for (std::size_t k = 0; k < PSQTBuckets; ++k)
-            accumulator.psqtAccumulation[Perspective][k] = 0;
+            cast_2D<PSQTBuckets>(st->template psqt_accumulation<Small>())[Perspective][k] = 0;
 
         for (const auto index : active)
         {
             const IndexType offset = HalfDimensions * index;
 
             for (IndexType j = 0; j < HalfDimensions; ++j)
-                accumulator.accumulation[Perspective][j] += weights[offset + j];
+                cast_2D<TransformedFeatureDimensions>(
+                  st->template accumulation<Small>())[Perspective][j] += weights[offset + j];
 
             for (std::size_t k = 0; k < PSQTBuckets; ++k)
-                accumulator.psqtAccumulation[Perspective][k] +=
+                cast_2D<PSQTBuckets>(st->template psqt_accumulation<Small>())[Perspective][k] +=
                   psqtWeights[index * PSQTBuckets + k];
         }
 #endif
@@ -663,12 +677,12 @@ class FeatureTransformer {
         // Look for a usable accumulator of an earlier position. We keep track
         // of the estimated gain in terms of features to be added/subtracted.
         // Fast early exit.
-        if (pos.state()->accumulator.computed[Perspective])
+        if (pos.state()->template computed<Small>()[Perspective])
             return;
 
         auto [oldest_st, _] = try_find_computed_accumulator<Perspective>(pos);
 
-        if (oldest_st->accumulator.computed[Perspective])
+        if (oldest_st->template computed<Small>()[Perspective])
         {
             // Only update current position accumulator to minimize work.
             StateInfo* states_to_update[2] = {pos.state(), nullptr};
@@ -685,7 +699,7 @@ class FeatureTransformer {
 
         auto [oldest_st, next] = try_find_computed_accumulator<Perspective>(pos);
 
-        if (oldest_st->accumulator.computed[Perspective])
+        if (oldest_st->template computed<Small>()[Perspective])
         {
             if (next == nullptr)
                 return;
diff --git a/src/position.cpp b/src/position.cpp
index c45dd7b2e22..f03afb0e4f6 100644
--- a/src/position.cpp
+++ b/src/position.cpp
@@ -684,8 +684,10 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
     ++st->pliesFromNull;
 
     // Used by NNUE
-    st->accumulator.computed[WHITE] = false;
-    st->accumulator.computed[BLACK] = false;
+    st->accumulatorBig.computed[WHITE] =
+    st->accumulatorBig.computed[BLACK] =
+    st->accumulatorSmall.computed[WHITE] =
+    st->accumulatorSmall.computed[BLACK] = false;
     auto& dp                        = st->dirtyPiece;
     dp.dirty_num                    = 1;
 
@@ -964,15 +966,17 @@ void Position::do_null_move(StateInfo& newSt) {
     assert(!checkers());
     assert(&newSt != st);
 
-    std::memcpy(&newSt, st, offsetof(StateInfo, accumulator));
+    std::memcpy(&newSt, st, offsetof(StateInfo, accumulatorBig));
 
     newSt.previous = st;
     st             = &newSt;
 
     st->dirtyPiece.dirty_num        = 0;
     st->dirtyPiece.piece[0]         = NO_PIECE;  // Avoid checks in UpdateAccumulator()
-    st->accumulator.computed[WHITE] = false;
-    st->accumulator.computed[BLACK] = false;
+    st->accumulatorBig.computed[WHITE] =
+    st->accumulatorBig.computed[BLACK] =
+    st->accumulatorSmall.computed[WHITE] =
+    st->accumulatorSmall.computed[BLACK] = false;
 
     if (st->epSquare != SQ_NONE)
     {
diff --git a/src/position.h b/src/position.h
index ce03c34f332..6d7aa8fde5e 100644
--- a/src/position.h
+++ b/src/position.h
@@ -57,8 +57,22 @@ struct StateInfo {
     int        repetition;
 
     // Used by NNUE
-    Eval::NNUE::Accumulator accumulator;
-    DirtyPiece              dirtyPiece;
+    Eval::NNUE::Accumulator<false> accumulatorBig;
+    Eval::NNUE::Accumulator<true>  accumulatorSmall;
+    DirtyPiece                     dirtyPiece;
+
+    template<bool Small> constexpr std::int16_t* accumulation() {
+        return Small ? (std::int16_t*)accumulatorSmall.accumulation
+                     : (std::int16_t*)accumulatorBig.accumulation;
+    }
+    template<bool Small> constexpr std::int32_t* psqt_accumulation() {
+        return Small ? (std::int32_t*)accumulatorSmall.psqtAccumulation
+                     : (std::int32_t*)accumulatorBig.psqtAccumulation;
+    }
+    template<bool Small> constexpr bool* computed() {
+        return Small ? accumulatorSmall.computed
+                     : accumulatorBig.computed;
+    }
 };
 
 
@@ -160,6 +174,7 @@ class Position {
     int     rule50_count() const;
     Value   non_pawn_material(Color c) const;
     Value   non_pawn_material() const;
+    Value   simple_eval() const;
 
     // Position consistency check, for debugging
     bool pos_is_ok() const;
@@ -305,6 +320,11 @@ inline Value Position::non_pawn_material() const {
     return non_pawn_material(WHITE) + non_pawn_material(BLACK);
 }
 
+inline Value Position::simple_eval() const {
+    return  PawnValue * (count<PAWN>(sideToMove) - count<PAWN>(~sideToMove))
+          + (non_pawn_material(sideToMove) - non_pawn_material(~sideToMove));
+}
+
 inline int Position::game_ply() const { return gamePly; }
 
 inline int Position::rule50_count() const { return st->rule50; }
diff --git a/src/thread.cpp b/src/thread.cpp
index bc884dedf01..f172199d691 100644
--- a/src/thread.cpp
+++ b/src/thread.cpp
@@ -210,7 +210,7 @@ void ThreadPool::start_thinking(Position&                 pos,
         th->rootMoves                      = rootMoves;
         th->rootPos.set(pos.fen(), pos.is_chess960(), &th->rootState, th);
         th->rootState      = setupStates->back();
-        th->rootSimpleEval = Eval::simple_eval(pos, pos.side_to_move());
+        th->rootSimpleEval = pos.simple_eval();
     }
 
     main()->start_searching();
diff --git a/src/uci.cpp b/src/uci.cpp
index 95f6f349dd3..e169e78477a 100644
--- a/src/uci.cpp
+++ b/src/uci.cpp
@@ -320,7 +320,7 @@ void UCI::loop(int argc, char* argv[]) {
             std::string                f;
             if (is >> std::skipws >> f)
                 filename = f;
-            Eval::NNUE::save_eval(filename);
+            Eval::NNUE::save_eval(filename, false);
         }
         else if (token == "--help" || token == "help" || token == "--license" || token == "license")
             sync_cout
diff --git a/src/ucioption.cpp b/src/ucioption.cpp
index d0db1c76dd2..6858a296c52 100644
--- a/src/ucioption.cpp
+++ b/src/ucioption.cpp
@@ -84,7 +84,8 @@ void init(OptionsMap& o) {
     o["SyzygyProbeDepth"] << Option(1, 1, 100);
     o["Syzygy50MoveRule"] << Option(true);
     o["SyzygyProbeLimit"] << Option(7, 0, 7);
-    o["EvalFile"] << Option(EvalFileDefaultName, on_eval_file);
+    o["EvalFileBig"] << Option(EvalFileDefaultNameBig, on_eval_file);
+    o["EvalFileSmall"] << Option(EvalFileDefaultNameSmall, on_eval_file);
 }
 
 

From f4b75a53588332b503cf8df0d0b4dacc7a08aa79 Mon Sep 17 00:00:00 2001
From: mstembera <m_stembera@yahoo.com>
Date: Sat, 2 Dec 2023 20:13:35 -0800
Subject: [PATCH 2/7] Smaller 256 net and fixed Makefile by @linrock bench:
 1380121

---
 src/Makefile                 | 58 +++++++++++++++++++++++++++++++-----
 src/evaluate.h               |  2 +-
 src/nnue/nnue_architecture.h |  2 +-
 3 files changed, 53 insertions(+), 9 deletions(-)

diff --git a/src/Makefile b/src/Makefile
index 68aecc21168..44814aaea69 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -791,6 +791,7 @@ help:
 	@echo "profile-build           > standard build with profile-guided optimization"
 	@echo "build                   > skip profile-guided optimization"
 	@echo "net                     > Download the default nnue net"
+	@echo "net2                    > Download the smaller nnue net"
 	@echo "strip                   > Strip executable"
 	@echo "install                 > Install executable"
 	@echo "clean                   > Clean up"
@@ -857,13 +858,13 @@ endif
 	clang-profile-use clang-profile-make FORCE \
 	format analyze
 
-analyze: net config-sanity objclean
+analyze: net net2 config-sanity objclean
 	$(MAKE) -k ARCH=$(ARCH) COMP=$(COMP) $(OBJS)
 
-build: net config-sanity
+build: net net2 config-sanity
 	$(MAKE) ARCH=$(ARCH) COMP=$(COMP) all
 
-profile-build: net config-sanity objclean profileclean
+profile-build: net net2 config-sanity objclean profileclean
 	@echo ""
 	@echo "Step 1/4. Building instrumented executable ..."
 	$(MAKE) ARCH=$(ARCH) COMP=$(COMP) $(profile_make)
@@ -905,10 +906,9 @@ profileclean:
 	@rm -f stockfish.res
 	@rm -f ./-lstdc++.res
 
-ifneq ("x","x")
 # set up shell variables for the net stuff
 netvariables:
-	$(eval nnuenet := $(shell grep EvalFileDefaultName evaluate.h | grep define | sed 's/.*\(nn-[a-z0-9]\{12\}.nnue\).*/\1/'))
+	$(eval nnuenet := $(shell grep EvalFileDefaultNameBig evaluate.h | grep define | sed 's/.*\(nn-[a-z0-9]\{12\}.nnue\).*/\1/'))
 	$(eval nnuedownloadurl1 := https://tests.stockfishchess.org/api/nn/$(nnuenet))
 	$(eval nnuedownloadurl2 := https://github.com/official-stockfish/networks/raw/master/$(nnuenet))
 	$(eval curl_or_wget := $(shell if hash curl 2>/dev/null; then echo "curl -skL"; elif hash wget 2>/dev/null; then echo "wget -qO-"; fi))
@@ -952,7 +952,51 @@ net: netvariables
 		fi; \
 	fi; \
 
-endif
+netvariables2:
+	$(eval nnuenet := $(shell grep EvalFileDefaultNameSmall evaluate.h | grep define | sed 's/.*\(nn-[a-z0-9]\{12\}.nnue\).*/\1/'))
+	$(eval nnuedownloadurl1 := https://tests.stockfishchess.org/api/nn/$(nnuenet))
+	$(eval nnuedownloadurl2 := https://github.com/official-stockfish/networks/raw/master/$(nnuenet))
+	$(eval curl_or_wget := $(shell if hash curl 2>/dev/null; then echo "curl -skL"; elif hash wget 2>/dev/null; then echo "wget -qO-"; fi))
+	$(eval shasum_command := $(shell if hash shasum 2>/dev/null; then echo "shasum -a 256 "; elif hash sha256sum 2>/dev/null; then echo "sha256sum "; fi))
+
+# evaluation network (nnue)
+net2: netvariables2
+	@echo "Default net: $(nnuenet)"
+	@if [ "x$(curl_or_wget)" = "x" ]; then \
+		echo "Neither curl nor wget is installed. Install one of these tools unless the net has been downloaded manually"; \
+	fi
+	@if [ "x$(shasum_command)" = "x" ]; then \
+		echo "shasum / sha256sum not found, skipping net validation"; \
+	elif test -f "$(nnuenet)"; then \
+		if [ "$(nnuenet)" != "nn-"`$(shasum_command) $(nnuenet) | cut -c1-12`".nnue" ]; then \
+			echo "Removing invalid network"; rm -f $(nnuenet); \
+		fi; \
+	fi;
+	@for nnuedownloadurl in "$(nnuedownloadurl1)" "$(nnuedownloadurl2)"; do \
+		if test -f "$(nnuenet)"; then \
+			echo "$(nnuenet) available : OK"; break; \
+		else \
+			if [ "x$(curl_or_wget)" != "x" ]; then \
+				echo "Downloading $${nnuedownloadurl}"; $(curl_or_wget) $${nnuedownloadurl} > $(nnuenet);\
+			else \
+				echo "No net found and download not possible"; exit 1;\
+			fi; \
+		fi; \
+		if [ "x$(shasum_command)" != "x" ]; then \
+			if [ "$(nnuenet)" != "nn-"`$(shasum_command) $(nnuenet) | cut -c1-12`".nnue" ]; then \
+				echo "Removing failed download"; rm -f $(nnuenet); \
+			fi; \
+		fi; \
+	done
+	@if ! test -f "$(nnuenet)"; then \
+		echo "Failed to download $(nnuenet)."; \
+	fi;
+	@if [ "x$(shasum_command)" != "x" ]; then \
+		if [ "$(nnuenet)" = "nn-"`$(shasum_command) $(nnuenet) | cut -c1-12`".nnue" ]; then \
+			echo "Network validated"; break; \
+		fi; \
+	fi; \
+
 
 format:
 	$(CLANG-FORMAT) -i $(SRCS) $(HEADERS) -style=file
@@ -1076,6 +1120,6 @@ icx-profile-use:
 .depend: $(SRCS)
 	-@$(CXX) $(DEPENDFLAGS) -MM $(SRCS) > $@ 2> /dev/null
 
-ifeq (, $(filter $(MAKECMDGOALS), help strip install clean net objclean profileclean config-sanity))
+ifeq (, $(filter $(MAKECMDGOALS), help strip install clean net net2 objclean profileclean config-sanity))
 -include .depend
 endif
diff --git a/src/evaluate.h b/src/evaluate.h
index 48ca596bc05..6eafdd539f9 100644
--- a/src/evaluate.h
+++ b/src/evaluate.h
@@ -39,7 +39,7 @@ extern std::string currentEvalFileName[2];
 // for the build process (profile-build and fishtest) to work. Do not change the
 // name of the macro, as it is used in the Makefile.
 #define EvalFileDefaultNameBig   "nn-0000000000a0.nnue"
-#define EvalFileDefaultNameSmall "nn-a70fe1969e12.nnue"
+#define EvalFileDefaultNameSmall "nn-ecb35f70ff2a.nnue"
 
 namespace NNUE {
 
diff --git a/src/nnue/nnue_architecture.h b/src/nnue/nnue_architecture.h
index adfeaf045eb..0778c3f455e 100644
--- a/src/nnue/nnue_architecture.h
+++ b/src/nnue/nnue_architecture.h
@@ -42,7 +42,7 @@ constexpr IndexType TransformedFeatureDimensionsBig   = 2560;
 constexpr int       L2Big                             = 15;
 constexpr int       L3Big                             = 32;
 
-constexpr IndexType TransformedFeatureDimensionsSmall = 1024;
+constexpr IndexType TransformedFeatureDimensionsSmall = 256;
 constexpr int       L2Small                           = 15;
 constexpr int       L3Small                           = 32;
 

From eac74d6caf2b26a6c6b73582c5ab57c54f1c5afa Mon Sep 17 00:00:00 2001
From: mstembera <m_stembera@yahoo.com>
Date: Sat, 2 Dec 2023 21:16:38 -0800
Subject: [PATCH 3/7] Change EvalFileBig back to EvalFile to make fishtest
 happy bench: 1380121

---
 src/ucioption.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/ucioption.cpp b/src/ucioption.cpp
index 6858a296c52..e9f48d4ee67 100644
--- a/src/ucioption.cpp
+++ b/src/ucioption.cpp
@@ -84,7 +84,7 @@ void init(OptionsMap& o) {
     o["SyzygyProbeDepth"] << Option(1, 1, 100);
     o["Syzygy50MoveRule"] << Option(true);
     o["SyzygyProbeLimit"] << Option(7, 0, 7);
-    o["EvalFileBig"] << Option(EvalFileDefaultNameBig, on_eval_file);
+    o["EvalFile"] << Option(EvalFileDefaultNameBig, on_eval_file);
     o["EvalFileSmall"] << Option(EvalFileDefaultNameSmall, on_eval_file);
 }
 

From 0455d0cab104f615676869e8643bba49446c535c Mon Sep 17 00:00:00 2001
From: Linmiao Xu <linmiao.xu@gmail.com>
Date: Sun, 3 Dec 2023 18:46:22 -0500
Subject: [PATCH 4/7] hint big nnue below 2000 simple eval

bench 1380121
---
 src/nnue/evaluate_nnue.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/nnue/evaluate_nnue.cpp b/src/nnue/evaluate_nnue.cpp
index 26ce4bdcde2..bb9efa80231 100644
--- a/src/nnue/evaluate_nnue.cpp
+++ b/src/nnue/evaluate_nnue.cpp
@@ -176,7 +176,7 @@ static bool write_parameters(std::ostream& stream, bool small) {
 void hint_common_parent_position(const Position& pos) {
 
     int simpleEval = pos.simple_eval();
-    if (abs(simpleEval) < 2500)
+    if (abs(simpleEval) < 2000)
         featureTransformerBig->hint_common_access(pos);
     else
         featureTransformerSmall->hint_common_access(pos);

From 64237596a0eaf37630a71b3e4a636a9a42be5058 Mon Sep 17 00:00:00 2001
From: Linmiao Xu <linmiao.xu@gmail.com>
Date: Sun, 3 Dec 2023 20:09:57 -0500
Subject: [PATCH 5/7] big below 2200, add stochastic term

bench 1440404
---
 src/nnue/evaluate_nnue.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/nnue/evaluate_nnue.cpp b/src/nnue/evaluate_nnue.cpp
index bb9efa80231..94a40a3e6c2 100644
--- a/src/nnue/evaluate_nnue.cpp
+++ b/src/nnue/evaluate_nnue.cpp
@@ -175,8 +175,8 @@ static bool write_parameters(std::ostream& stream, bool small) {
 
 void hint_common_parent_position(const Position& pos) {
 
-    int simpleEval = pos.simple_eval();
-    if (abs(simpleEval) < 2000)
+    int simpleEval = pos.simple_eval() + (int(pos.key() & 7) - 3);
+    if (abs(simpleEval) < 2200)
         featureTransformerBig->hint_common_access(pos);
     else
         featureTransformerSmall->hint_common_access(pos);

From c23423787c49f33ee0e7856eb6befc8a10dd2e84 Mon Sep 17 00:00:00 2001
From: Muzhen Gaming <61100393+XInTheDark@users.noreply.github.com>
Date: Tue, 5 Dec 2023 16:41:18 +0800
Subject: [PATCH 6/7] Try a search tune for dual NNUE. Bench: 1440404

---
 src/search.cpp | 120 ++++++++++++++++++++++++++++++-------------------
 1 file changed, 73 insertions(+), 47 deletions(-)

diff --git a/src/search.cpp b/src/search.cpp
index b3ca8c9afe5..b0eb1eb8a26 100644
--- a/src/search.cpp
+++ b/src/search.cpp
@@ -47,6 +47,32 @@
 
 namespace Stockfish {
 
+int
+a1=125, a2=43, a3=1487, a4=976, a5=808, a6=291, a7=350, a8=1200, a9=361, a10=361, a11=1182,
+b1=10, b2=15335, b3=110, b4=121, b5=14, b6=1449, b7=1449,
+c1=474, c2=270, c3=174, c4=9, c5=321, c6=29462, c7=17257, c8=24, c9=281, c10=152, c11=14, c12=8, c13=168, c14=70,
+d1=416, d2=7, d3=239, d4=291, d5=185, d6=6, d7=3645, d8=7836, d9=13, d10=62, d11=123, d12=77, d13=127, d14=26,
+e1=24, e2=64, e3=57, e4=18, e5=11, e6=15, e7=19, e8=9, e9=4194, e10=4000, e11=7, e12=3848, e13=14200,
+f1=50, f2=2, f3=2, f4=12, f5=13828, f6=11369, f7=6, f8=657, f9=10,
+g1=200, g2=90, g3=168, g4=168;
+
+TUNE(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11,
+     b1);
+TUNE(SetRange(1, 30000), b2);
+TUNE(b3, b4, b5, b6, b7,
+     c1, c2, c3, c4);
+TUNE(SetRange(1, 600), c5);
+TUNE(c6, c7, c8, c9);
+TUNE(SetRange(1, 300), c10);
+TUNE(c11, c12, c13, c14,
+     d1, d2, d3, d4, d5, d6, d7);
+TUNE(SetRange(1, 15000), d8);
+TUNE(d9, d10, d11, d12, d13, d14,
+     e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12);
+TUNE(SetRange(1, 30000), e13);
+TUNE(f1, f2, f3, f4, f5, f6, f7, f8, f9,
+     g1, g2, g3, g4);
+
 namespace Search {
 
 LimitsType Limits;
@@ -77,7 +103,7 @@ enum NodeType {
 
 // Futility margin
 Value futility_margin(Depth d, bool noTtCutNode, bool improving) {
-    return Value((125 - 43 * noTtCutNode) * (d - improving));
+    return Value((a1 - a2 * noTtCutNode) * (d - improving));
 }
 
 // Reductions lookup table initialized at startup
@@ -85,8 +111,8 @@ int Reductions[MAX_MOVES];  // [depth or moveNumber]
 
 Depth reduction(bool i, Depth d, int mn, Value delta, Value rootDelta) {
     int reductionScale = Reductions[d] * Reductions[mn];
-    return (reductionScale + 1487 - int(delta) * 976 / int(rootDelta)) / 1024
-         + (!i && reductionScale > 808);
+    return (reductionScale + a3 - int(delta) * a4 / int(rootDelta)) / 1024
+         + (!i && reductionScale > a5);
 }
 
 constexpr int futility_move_count(bool improving, Depth depth) {
@@ -94,10 +120,10 @@ constexpr int futility_move_count(bool improving, Depth depth) {
 }
 
 // History and stats update bonus, based on depth
-int stat_bonus(Depth d) { return std::min(291 * d - 350, 1200); }
+int stat_bonus(Depth d) { return std::min(a6 * d - a7, a8); }
 
 // History and stats update malus, based on depth
-int stat_malus(Depth d) { return std::min(361 * d - 361, 1182); }
+int stat_malus(Depth d) { return std::min(a9 * d - a10, a11); }
 
 // Add a small random component to draw evaluations to avoid 3-fold blindness
 Value value_draw(const Thread* thisThread) {
@@ -367,12 +393,12 @@ void Thread::search() {
 
             // Reset aspiration window starting size
             Value avg = rootMoves[pvIdx].averageScore;
-            delta     = Value(10) + int(avg) * avg / 15335;
+            delta     = Value(b1) + int(avg) * avg / b2;
             alpha     = std::max(avg - delta, -VALUE_INFINITE);
             beta      = std::min(avg + delta, VALUE_INFINITE);
 
             // Adjust optimism based on root move's averageScore (~4 Elo)
-            optimism[us]  = 110 * avg / (std::abs(avg) + 121);
+            optimism[us]  = b3 * avg / (std::abs(avg) + b4);
             optimism[~us] = -optimism[us];
 
             // Start with a small aspiration window and, in the case of a fail
@@ -746,7 +772,7 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo
     // Use static evaluation difference to improve quiet move ordering (~4 Elo)
     if (is_ok((ss - 1)->currentMove) && !(ss - 1)->inCheck && !priorCapture)
     {
-        int bonus = std::clamp(-14 * int((ss - 1)->staticEval + ss->staticEval), -1449, 1449);
+        int bonus = std::clamp(-b5 * int((ss - 1)->staticEval + ss->staticEval), -b6, b7);
         thisThread->mainHistory[~us][from_to((ss - 1)->currentMove)] << bonus;
         if (type_of(pos.piece_on(prevSq)) != PAWN && type_of((ss - 1)->currentMove) != PROMOTION)
             thisThread->pawnHistory[pawn_structure(pos)][pos.piece_on(prevSq)][prevSq] << bonus / 4;
@@ -765,7 +791,7 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo
     // If eval is really low check with qsearch if it can exceed alpha, if it can't,
     // return a fail low.
     // Adjust razor margin according to cutoffCnt. (~1 Elo)
-    if (eval < alpha - 474 - (270 - 174 * ((ss + 1)->cutoffCnt > 3)) * depth * depth)
+    if (eval < alpha - c1 - (c2 - c3 * ((ss + 1)->cutoffCnt > 3)) * depth * depth)
     {
         value = qsearch<NonPV>(pos, ss, alpha - 1, alpha);
         if (value < alpha)
@@ -774,24 +800,24 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo
 
     // Step 8. Futility pruning: child node (~40 Elo)
     // The depth condition is important for mate finding.
-    if (!ss->ttPv && depth < 9
+    if (!ss->ttPv && depth < c4
         && eval - futility_margin(depth, cutNode && !ss->ttHit, improving)
-               - (ss - 1)->statScore / 321
+               - (ss - 1)->statScore / c5
              >= beta
-        && eval >= beta && eval < 29462  // smaller than TB wins
+        && eval >= beta && eval < c6  // smaller than TB wins
         && (!ttMove || ttCapture))
         return (eval + beta) / 2;
 
     // Step 9. Null move search with verification search (~35 Elo)
-    if (!PvNode && (ss - 1)->currentMove != MOVE_NULL && (ss - 1)->statScore < 17257 && eval >= beta
-        && eval >= ss->staticEval && ss->staticEval >= beta - 24 * depth + 281 && !excludedMove
+    if (!PvNode && (ss - 1)->currentMove != MOVE_NULL && (ss - 1)->statScore < c7 && eval >= beta
+        && eval >= ss->staticEval && ss->staticEval >= beta - c8 * depth + c9 && !excludedMove
         && pos.non_pawn_material(us) && ss->ply >= thisThread->nmpMinPly
         && beta > VALUE_TB_LOSS_IN_MAX_PLY)
     {
         assert(eval - beta >= 0);
 
         // Null move dynamic reduction based on depth and eval
-        Depth R = std::min(int(eval - beta) / 152, 6) + depth / 3 + 4;
+        Depth R = std::min(int(eval - beta) / c10, 6) + depth / 3 + 4;
 
         ss->currentMove         = MOVE_NULL;
         ss->continuationHistory = &thisThread->continuationHistory[0][0][NO_PIECE][0];
@@ -805,7 +831,7 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo
         // Do not return unproven mate or TB scores
         if (nullValue >= beta && nullValue < VALUE_TB_WIN_IN_MAX_PLY)
         {
-            if (thisThread->nmpMinPly || depth < 14)
+            if (thisThread->nmpMinPly || depth < c11)
                 return nullValue;
 
             assert(!thisThread->nmpMinPly);  // Recursive verification is not allowed
@@ -835,10 +861,10 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo
         return qsearch<PV>(pos, ss, alpha, beta);
 
     // For cutNodes without a ttMove, we decrease depth by 2 if depth is high enough.
-    if (cutNode && depth >= 8 && !ttMove)
+    if (cutNode && depth >= c12 && !ttMove)
         depth -= 2;
 
-    probCutBeta = beta + 168 - 70 * improving;
+    probCutBeta = beta + c13 - c14 * improving;
 
     // Step 11. ProbCut (~10 Elo)
     // If we have a good enough capture (or queen promotion) and a reduced search returns a value
@@ -896,7 +922,7 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo
 moves_loop:  // When in check, search starts here
 
     // Step 12. A small Probcut idea, when we are in check (~4 Elo)
-    probCutBeta = beta + 416;
+    probCutBeta = beta + d1;
     if (ss->inCheck && !PvNode && ttCapture && (tte->bound() & BOUND_LOWER)
         && tte->depth() >= depth - 4 && ttValue >= probCutBeta
         && abs(ttValue) < VALUE_TB_WIN_IN_MAX_PLY && abs(beta) < VALUE_TB_WIN_IN_MAX_PLY)
@@ -979,18 +1005,18 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo
             if (capture || givesCheck)
             {
                 // Futility pruning for captures (~2 Elo)
-                if (!givesCheck && lmrDepth < 7 && !ss->inCheck)
+                if (!givesCheck && lmrDepth < d2 && !ss->inCheck)
                 {
                     Piece capturedPiece = pos.piece_on(to_sq(move));
                     int   futilityEval =
-                      ss->staticEval + 239 + 291 * lmrDepth + PieceValue[capturedPiece]
+                      ss->staticEval + d3 + d4 * lmrDepth + PieceValue[capturedPiece]
                       + captureHistory[movedPiece][to_sq(move)][type_of(capturedPiece)] / 7;
                     if (futilityEval < alpha)
                         continue;
                 }
 
                 // SEE based pruning for captures and checks (~11 Elo)
-                if (!pos.see_ge(move, Value(-185) * depth))
+                if (!pos.see_ge(move, Value(-d5) * depth))
                     continue;
             }
             else
@@ -1001,25 +1027,25 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo
                             + thisThread->pawnHistory[pawn_structure(pos)][movedPiece][to_sq(move)];
 
                 // Continuation history based pruning (~2 Elo)
-                if (lmrDepth < 6 && history < -3645 * depth)
+                if (lmrDepth < d6 && history < -d7 * depth)
                     continue;
 
                 history += 2 * thisThread->mainHistory[us][from_to(move)];
 
-                lmrDepth += history / 7836;
+                lmrDepth += history / d8;
                 lmrDepth = std::max(lmrDepth, -1);
 
                 // Futility pruning: parent node (~13 Elo)
-                if (!ss->inCheck && lmrDepth < 13
-                    && ss->staticEval + (bestValue < ss->staticEval - 62 ? 123 : 77)
-                           + 127 * lmrDepth
+                if (!ss->inCheck && lmrDepth < d9
+                    && ss->staticEval + (bestValue < ss->staticEval - d10 ? d11 : d12)
+                           + d13 * lmrDepth
                          <= alpha)
                     continue;
 
                 lmrDepth = std::max(lmrDepth, 0);
 
                 // Prune moves with negative SEE (~4 Elo)
-                if (!pos.see_ge(move, Value(-26 * lmrDepth * lmrDepth)))
+                if (!pos.see_ge(move, Value(-d14 * lmrDepth * lmrDepth)))
                     continue;
             }
         }
@@ -1039,11 +1065,11 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo
             // so changing them requires tests at these types of time controls.
             // Recursive singular search is avoided.
             if (!rootNode && move == ttMove && !excludedMove
-                && depth >= 4 - (thisThread->completedDepth > 24) + 2 * (PvNode && tte->is_pv())
+                && depth >= 4 - (thisThread->completedDepth > e1) + 2 * (PvNode && tte->is_pv())
                 && abs(ttValue) < VALUE_TB_WIN_IN_MAX_PLY && (tte->bound() & BOUND_LOWER)
                 && tte->depth() >= depth - 3)
             {
-                Value singularBeta  = ttValue - (64 + 57 * (ss->ttPv && !PvNode)) * depth / 64;
+                Value singularBeta  = ttValue - (e2 + e3 * (ss->ttPv && !PvNode)) * depth / 64;
                 Depth singularDepth = newDepth / 2;
 
                 ss->excludedMove = move;
@@ -1057,10 +1083,10 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo
                     singularQuietLMR = !ttCapture;
 
                     // Avoid search explosion by limiting the number of double extensions
-                    if (!PvNode && value < singularBeta - 18 && ss->doubleExtensions <= 11)
+                    if (!PvNode && value < singularBeta - e4 && ss->doubleExtensions <= e5)
                     {
                         extension = 2;
-                        depth += depth < 15;
+                        depth += depth < e6;
                     }
                 }
 
@@ -1084,7 +1110,7 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo
 
                 // If we are on a cutNode but the ttMove is not assumed to fail high over current beta (~1 Elo)
                 else if (cutNode)
-                    extension = depth < 19 ? -2 : -1;
+                    extension = depth < e7 ? -2 : -1;
 
                 // If the ttMove is assumed to fail low over the value of the reduced search (~1 Elo)
                 else if (ttValue <= value)
@@ -1092,18 +1118,18 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo
             }
 
             // Check extensions (~1 Elo)
-            else if (givesCheck && depth > 9)
+            else if (givesCheck && depth > e8)
                 extension = 1;
 
             // Quiet ttMove extensions (~1 Elo)
             else if (PvNode && move == ttMove && move == ss->killers[0]
-                     && (*contHist[0])[movedPiece][to_sq(move)] >= 4194)
+                     && (*contHist[0])[movedPiece][to_sq(move)] >= e9)
                 extension = 1;
 
             // Recapture extensions (~1 Elo)
             else if (PvNode && move == ttMove && to_sq(move) == prevSq
                      && captureHistory[movedPiece][to_sq(move)][type_of(pos.piece_on(to_sq(move)))]
-                          > 4000)
+                          > e10)
                 extension = 1;
         }
 
@@ -1127,7 +1153,7 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo
             r -= cutNode && tte->depth() >= depth ? 3 : 2;
 
         // Decrease reduction if opponent's move count is high (~1 Elo)
-        if ((ss - 1)->moveCount > 7)
+        if ((ss - 1)->moveCount > e11)
             r--;
 
         // Increase reduction for cut nodes (~3 Elo)
@@ -1162,10 +1188,10 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo
         ss->statScore = 2 * thisThread->mainHistory[us][from_to(move)]
                       + (*contHist[0])[movedPiece][to_sq(move)]
                       + (*contHist[1])[movedPiece][to_sq(move)]
-                      + (*contHist[3])[movedPiece][to_sq(move)] - 3848;
+                      + (*contHist[3])[movedPiece][to_sq(move)] - e12;
 
         // Decrease/increase reduction for moves with a good/bad history (~25 Elo)
-        r -= ss->statScore / 14200;
+        r -= ss->statScore / e13;
 
         // Step 17. Late moves reduction / extension (LMR, ~117 Elo)
         // We use various heuristics for the sons of a node after the first son has
@@ -1188,7 +1214,7 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo
             {
                 // Adjust full-depth search based on LMR results - if the result
                 // was good enough search deeper, if it was bad enough search shallower.
-                const bool doDeeperSearch    = value > (bestValue + 50 + 2 * newDepth);  // (~1 Elo)
+                const bool doDeeperSearch    = value > (bestValue + f1 + f2 * newDepth);  // (~1 Elo)
                 const bool doShallowerSearch = value < bestValue + newDepth;             // (~2 Elo)
 
                 newDepth += doDeeperSearch - doShallowerSearch;
@@ -1303,7 +1329,7 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo
                 else
                 {
                     // Reduce other moves if we have found at least one score improvement (~2 Elo)
-                    if (depth > 2 && depth < 12 && beta < 13828 && value > -11369)
+                    if (depth > f3 && depth < f4 && beta < f5 && value > -f6)
                         depth -= 2;
 
                     assert(depth > 0);
@@ -1342,8 +1368,8 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo
     // Bonus for prior countermove that caused the fail low
     else if (!priorCapture && prevSq != SQ_NONE)
     {
-        int bonus = (depth > 6) + (PvNode || cutNode) + (bestValue < alpha - 657)
-                  + ((ss - 1)->moveCount > 10);
+        int bonus = (depth > f7) + (PvNode || cutNode) + (bestValue < alpha - f8)
+                  + ((ss - 1)->moveCount > f9);
         update_continuation_histories(ss - 1, pos.piece_on(prevSq), prevSq,
                                       stat_bonus(depth) * bonus);
         thisThread->mainHistory[~us][from_to((ss - 1)->currentMove)]
@@ -1475,7 +1501,7 @@ Value qsearch(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth) {
         if (bestValue > alpha)
             alpha = bestValue;
 
-        futilityBase = ss->staticEval + 200;
+        futilityBase = ss->staticEval + g1;
     }
 
     const PieceToHistory* contHist[] = {(ss - 1)->continuationHistory,
@@ -1555,7 +1581,7 @@ Value qsearch(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth) {
                 continue;
 
             // Do not search moves with bad enough SEE values (~5 Elo)
-            if (!pos.see_ge(move, Value(-90)))
+            if (!pos.see_ge(move, Value(-g2)))
                 continue;
         }
 
@@ -1691,7 +1717,7 @@ void update_all_stats(const Position& pos,
 
     if (!pos.capture_stage(bestMove))
     {
-        int bestMoveBonus = bestValue > beta + 168 ? quietMoveBonus      // larger bonus
+        int bestMoveBonus = bestValue > beta + g3 ? quietMoveBonus      // larger bonus
                                                    : stat_bonus(depth);  // smaller bonus
 
         // Increase stats for the best move in case it was a quiet move
@@ -1699,7 +1725,7 @@ void update_all_stats(const Position& pos,
         thisThread->pawnHistory[pawn_structure(pos)][moved_piece][to_sq(bestMove)]
           << quietMoveBonus;
 
-        int moveMalus = bestValue > beta + 168 ? quietMoveMalus      // larger malus
+        int moveMalus = bestValue > beta + g4 ? quietMoveMalus      // larger malus
                                                : stat_malus(depth);  // smaller malus
 
         // Decrease stats for all non-best quiet moves

From e15eec7221d67e52c1db3778b2772489e8955a0f Mon Sep 17 00:00:00 2001
From: Muzhen Gaming <61100393+XInTheDark@users.noreply.github.com>
Date: Wed, 6 Dec 2023 21:20:41 +0800
Subject: [PATCH 7/7] v1: Tuned 44k games, use latest linrock smallnet. Bench:
 1216398

---
 src/evaluate.h |   2 +-
 src/search.cpp | 120 +++++++++++++++++++------------------------------
 2 files changed, 48 insertions(+), 74 deletions(-)

diff --git a/src/evaluate.h b/src/evaluate.h
index 6eafdd539f9..5a105870c54 100644
--- a/src/evaluate.h
+++ b/src/evaluate.h
@@ -39,7 +39,7 @@ extern std::string currentEvalFileName[2];
 // for the build process (profile-build and fishtest) to work. Do not change the
 // name of the macro, as it is used in the Makefile.
 #define EvalFileDefaultNameBig   "nn-0000000000a0.nnue"
-#define EvalFileDefaultNameSmall "nn-ecb35f70ff2a.nnue"
+#define EvalFileDefaultNameSmall "nn-9067e33176e8.nnue"
 
 namespace NNUE {
 
diff --git a/src/search.cpp b/src/search.cpp
index b0eb1eb8a26..132e70cd4bf 100644
--- a/src/search.cpp
+++ b/src/search.cpp
@@ -47,32 +47,6 @@
 
 namespace Stockfish {
 
-int
-a1=125, a2=43, a3=1487, a4=976, a5=808, a6=291, a7=350, a8=1200, a9=361, a10=361, a11=1182,
-b1=10, b2=15335, b3=110, b4=121, b5=14, b6=1449, b7=1449,
-c1=474, c2=270, c3=174, c4=9, c5=321, c6=29462, c7=17257, c8=24, c9=281, c10=152, c11=14, c12=8, c13=168, c14=70,
-d1=416, d2=7, d3=239, d4=291, d5=185, d6=6, d7=3645, d8=7836, d9=13, d10=62, d11=123, d12=77, d13=127, d14=26,
-e1=24, e2=64, e3=57, e4=18, e5=11, e6=15, e7=19, e8=9, e9=4194, e10=4000, e11=7, e12=3848, e13=14200,
-f1=50, f2=2, f3=2, f4=12, f5=13828, f6=11369, f7=6, f8=657, f9=10,
-g1=200, g2=90, g3=168, g4=168;
-
-TUNE(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11,
-     b1);
-TUNE(SetRange(1, 30000), b2);
-TUNE(b3, b4, b5, b6, b7,
-     c1, c2, c3, c4);
-TUNE(SetRange(1, 600), c5);
-TUNE(c6, c7, c8, c9);
-TUNE(SetRange(1, 300), c10);
-TUNE(c11, c12, c13, c14,
-     d1, d2, d3, d4, d5, d6, d7);
-TUNE(SetRange(1, 15000), d8);
-TUNE(d9, d10, d11, d12, d13, d14,
-     e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12);
-TUNE(SetRange(1, 30000), e13);
-TUNE(f1, f2, f3, f4, f5, f6, f7, f8, f9,
-     g1, g2, g3, g4);
-
 namespace Search {
 
 LimitsType Limits;
@@ -103,7 +77,7 @@ enum NodeType {
 
 // Futility margin
 Value futility_margin(Depth d, bool noTtCutNode, bool improving) {
-    return Value((a1 - a2 * noTtCutNode) * (d - improving));
+    return Value((116 - 44 * noTtCutNode) * (d - improving));
 }
 
 // Reductions lookup table initialized at startup
@@ -111,8 +85,8 @@ int Reductions[MAX_MOVES];  // [depth or moveNumber]
 
 Depth reduction(bool i, Depth d, int mn, Value delta, Value rootDelta) {
     int reductionScale = Reductions[d] * Reductions[mn];
-    return (reductionScale + a3 - int(delta) * a4 / int(rootDelta)) / 1024
-         + (!i && reductionScale > a5);
+    return (reductionScale + 1346 - int(delta) * 896 / int(rootDelta)) / 1024
+         + (!i && reductionScale > 880);
 }
 
 constexpr int futility_move_count(bool improving, Depth depth) {
@@ -120,10 +94,10 @@ constexpr int futility_move_count(bool improving, Depth depth) {
 }
 
 // History and stats update bonus, based on depth
-int stat_bonus(Depth d) { return std::min(a6 * d - a7, a8); }
+int stat_bonus(Depth d) { return std::min(268 * d - 352, 1153); }
 
 // History and stats update malus, based on depth
-int stat_malus(Depth d) { return std::min(a9 * d - a10, a11); }
+int stat_malus(Depth d) { return std::min(400 * d - 354, 1201); }
 
 // Add a small random component to draw evaluations to avoid 3-fold blindness
 Value value_draw(const Thread* thisThread) {
@@ -393,12 +367,12 @@ void Thread::search() {
 
             // Reset aspiration window starting size
             Value avg = rootMoves[pvIdx].averageScore;
-            delta     = Value(b1) + int(avg) * avg / b2;
+            delta     = Value(9) + int(avg) * avg / 14847;
             alpha     = std::max(avg - delta, -VALUE_INFINITE);
             beta      = std::min(avg + delta, VALUE_INFINITE);
 
             // Adjust optimism based on root move's averageScore (~4 Elo)
-            optimism[us]  = b3 * avg / (std::abs(avg) + b4);
+            optimism[us]  = 121 * avg / (std::abs(avg) + 109);
             optimism[~us] = -optimism[us];
 
             // Start with a small aspiration window and, in the case of a fail
@@ -772,7 +746,7 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo
     // Use static evaluation difference to improve quiet move ordering (~4 Elo)
     if (is_ok((ss - 1)->currentMove) && !(ss - 1)->inCheck && !priorCapture)
     {
-        int bonus = std::clamp(-b5 * int((ss - 1)->staticEval + ss->staticEval), -b6, b7);
+        int bonus = std::clamp(-13 * int((ss - 1)->staticEval + ss->staticEval), -1555, 1452);
         thisThread->mainHistory[~us][from_to((ss - 1)->currentMove)] << bonus;
         if (type_of(pos.piece_on(prevSq)) != PAWN && type_of((ss - 1)->currentMove) != PROMOTION)
             thisThread->pawnHistory[pawn_structure(pos)][pos.piece_on(prevSq)][prevSq] << bonus / 4;
@@ -791,7 +765,7 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo
     // If eval is really low check with qsearch if it can exceed alpha, if it can't,
     // return a fail low.
     // Adjust razor margin according to cutoffCnt. (~1 Elo)
-    if (eval < alpha - c1 - (c2 - c3 * ((ss + 1)->cutoffCnt > 3)) * depth * depth)
+    if (eval < alpha - 472 - (284 - 165 * ((ss + 1)->cutoffCnt > 3)) * depth * depth)
     {
         value = qsearch<NonPV>(pos, ss, alpha - 1, alpha);
         if (value < alpha)
@@ -800,24 +774,24 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo
 
     // Step 8. Futility pruning: child node (~40 Elo)
     // The depth condition is important for mate finding.
-    if (!ss->ttPv && depth < c4
+    if (!ss->ttPv && depth < 9
         && eval - futility_margin(depth, cutNode && !ss->ttHit, improving)
-               - (ss - 1)->statScore / c5
+               - (ss - 1)->statScore / 337
              >= beta
-        && eval >= beta && eval < c6  // smaller than TB wins
+        && eval >= beta && eval < 29008  // smaller than TB wins
         && (!ttMove || ttCapture))
         return (eval + beta) / 2;
 
     // Step 9. Null move search with verification search (~35 Elo)
-    if (!PvNode && (ss - 1)->currentMove != MOVE_NULL && (ss - 1)->statScore < c7 && eval >= beta
-        && eval >= ss->staticEval && ss->staticEval >= beta - c8 * depth + c9 && !excludedMove
+    if (!PvNode && (ss - 1)->currentMove != MOVE_NULL && (ss - 1)->statScore < 17496 && eval >= beta
+        && eval >= ss->staticEval && ss->staticEval >= beta - 23 * depth + 304 && !excludedMove
         && pos.non_pawn_material(us) && ss->ply >= thisThread->nmpMinPly
         && beta > VALUE_TB_LOSS_IN_MAX_PLY)
     {
         assert(eval - beta >= 0);
 
         // Null move dynamic reduction based on depth and eval
-        Depth R = std::min(int(eval - beta) / c10, 6) + depth / 3 + 4;
+        Depth R = std::min(int(eval - beta) / 144, 6) + depth / 3 + 4;
 
         ss->currentMove         = MOVE_NULL;
         ss->continuationHistory = &thisThread->continuationHistory[0][0][NO_PIECE][0];
@@ -831,7 +805,7 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo
         // Do not return unproven mate or TB scores
         if (nullValue >= beta && nullValue < VALUE_TB_WIN_IN_MAX_PLY)
         {
-            if (thisThread->nmpMinPly || depth < c11)
+            if (thisThread->nmpMinPly || depth < 15)
                 return nullValue;
 
             assert(!thisThread->nmpMinPly);  // Recursive verification is not allowed
@@ -861,10 +835,10 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo
         return qsearch<PV>(pos, ss, alpha, beta);
 
     // For cutNodes without a ttMove, we decrease depth by 2 if depth is high enough.
-    if (cutNode && depth >= c12 && !ttMove)
+    if (cutNode && depth >= 8 && !ttMove)
         depth -= 2;
 
-    probCutBeta = beta + c13 - c14 * improving;
+    probCutBeta = beta + 163 - 67 * improving;
 
     // Step 11. ProbCut (~10 Elo)
     // If we have a good enough capture (or queen promotion) and a reduced search returns a value
@@ -922,7 +896,7 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo
 moves_loop:  // When in check, search starts here
 
     // Step 12. A small Probcut idea, when we are in check (~4 Elo)
-    probCutBeta = beta + d1;
+    probCutBeta = beta + 425;
     if (ss->inCheck && !PvNode && ttCapture && (tte->bound() & BOUND_LOWER)
         && tte->depth() >= depth - 4 && ttValue >= probCutBeta
         && abs(ttValue) < VALUE_TB_WIN_IN_MAX_PLY && abs(beta) < VALUE_TB_WIN_IN_MAX_PLY)
@@ -1005,18 +979,18 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo
             if (capture || givesCheck)
             {
                 // Futility pruning for captures (~2 Elo)
-                if (!givesCheck && lmrDepth < d2 && !ss->inCheck)
+                if (!givesCheck && lmrDepth < 7 && !ss->inCheck)
                 {
                     Piece capturedPiece = pos.piece_on(to_sq(move));
                     int   futilityEval =
-                      ss->staticEval + d3 + d4 * lmrDepth + PieceValue[capturedPiece]
+                      ss->staticEval + 238 + 305 * lmrDepth + PieceValue[capturedPiece]
                       + captureHistory[movedPiece][to_sq(move)][type_of(capturedPiece)] / 7;
                     if (futilityEval < alpha)
                         continue;
                 }
 
                 // SEE based pruning for captures and checks (~11 Elo)
-                if (!pos.see_ge(move, Value(-d5) * depth))
+                if (!pos.see_ge(move, Value(-187) * depth))
                     continue;
             }
             else
@@ -1027,25 +1001,25 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo
                             + thisThread->pawnHistory[pawn_structure(pos)][movedPiece][to_sq(move)];
 
                 // Continuation history based pruning (~2 Elo)
-                if (lmrDepth < d6 && history < -d7 * depth)
+                if (lmrDepth < 6 && history < -3752 * depth)
                     continue;
 
                 history += 2 * thisThread->mainHistory[us][from_to(move)];
 
-                lmrDepth += history / d8;
+                lmrDepth += history / 7838;
                 lmrDepth = std::max(lmrDepth, -1);
 
                 // Futility pruning: parent node (~13 Elo)
-                if (!ss->inCheck && lmrDepth < d9
-                    && ss->staticEval + (bestValue < ss->staticEval - d10 ? d11 : d12)
-                           + d13 * lmrDepth
+                if (!ss->inCheck && lmrDepth < 14
+                    && ss->staticEval + (bestValue < ss->staticEval - 57 ? 124 : 71)
+                           + 118 * lmrDepth
                          <= alpha)
                     continue;
 
                 lmrDepth = std::max(lmrDepth, 0);
 
                 // Prune moves with negative SEE (~4 Elo)
-                if (!pos.see_ge(move, Value(-d14 * lmrDepth * lmrDepth)))
+                if (!pos.see_ge(move, Value(-26 * lmrDepth * lmrDepth)))
                     continue;
             }
         }
@@ -1065,11 +1039,11 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo
             // so changing them requires tests at these types of time controls.
             // Recursive singular search is avoided.
             if (!rootNode && move == ttMove && !excludedMove
-                && depth >= 4 - (thisThread->completedDepth > e1) + 2 * (PvNode && tte->is_pv())
+                && depth >= 4 - (thisThread->completedDepth > 27) + 2 * (PvNode && tte->is_pv())
                 && abs(ttValue) < VALUE_TB_WIN_IN_MAX_PLY && (tte->bound() & BOUND_LOWER)
                 && tte->depth() >= depth - 3)
             {
-                Value singularBeta  = ttValue - (e2 + e3 * (ss->ttPv && !PvNode)) * depth / 64;
+                Value singularBeta  = ttValue - (66 + 58 * (ss->ttPv && !PvNode)) * depth / 64;
                 Depth singularDepth = newDepth / 2;
 
                 ss->excludedMove = move;
@@ -1083,10 +1057,10 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo
                     singularQuietLMR = !ttCapture;
 
                     // Avoid search explosion by limiting the number of double extensions
-                    if (!PvNode && value < singularBeta - e4 && ss->doubleExtensions <= e5)
+                    if (!PvNode && value < singularBeta - 17 && ss->doubleExtensions <= 11)
                     {
                         extension = 2;
-                        depth += depth < e6;
+                        depth += depth < 15;
                     }
                 }
 
@@ -1110,7 +1084,7 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo
 
                 // If we are on a cutNode but the ttMove is not assumed to fail high over current beta (~1 Elo)
                 else if (cutNode)
-                    extension = depth < e7 ? -2 : -1;
+                    extension = depth < 19 ? -2 : -1;
 
                 // If the ttMove is assumed to fail low over the value of the reduced search (~1 Elo)
                 else if (ttValue <= value)
@@ -1118,18 +1092,18 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo
             }
 
             // Check extensions (~1 Elo)
-            else if (givesCheck && depth > e8)
+            else if (givesCheck && depth > 10)
                 extension = 1;
 
             // Quiet ttMove extensions (~1 Elo)
             else if (PvNode && move == ttMove && move == ss->killers[0]
-                     && (*contHist[0])[movedPiece][to_sq(move)] >= e9)
+                     && (*contHist[0])[movedPiece][to_sq(move)] >= 4325)
                 extension = 1;
 
             // Recapture extensions (~1 Elo)
             else if (PvNode && move == ttMove && to_sq(move) == prevSq
                      && captureHistory[movedPiece][to_sq(move)][type_of(pos.piece_on(to_sq(move)))]
-                          > e10)
+                          > 4146)
                 extension = 1;
         }
 
@@ -1153,7 +1127,7 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo
             r -= cutNode && tte->depth() >= depth ? 3 : 2;
 
         // Decrease reduction if opponent's move count is high (~1 Elo)
-        if ((ss - 1)->moveCount > e11)
+        if ((ss - 1)->moveCount > 7)
             r--;
 
         // Increase reduction for cut nodes (~3 Elo)
@@ -1188,10 +1162,10 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo
         ss->statScore = 2 * thisThread->mainHistory[us][from_to(move)]
                       + (*contHist[0])[movedPiece][to_sq(move)]
                       + (*contHist[1])[movedPiece][to_sq(move)]
-                      + (*contHist[3])[movedPiece][to_sq(move)] - e12;
+                      + (*contHist[3])[movedPiece][to_sq(move)] - 3817;
 
         // Decrease/increase reduction for moves with a good/bad history (~25 Elo)
-        r -= ss->statScore / e13;
+        r -= ss->statScore / 14767;
 
         // Step 17. Late moves reduction / extension (LMR, ~117 Elo)
         // We use various heuristics for the sons of a node after the first son has
@@ -1214,7 +1188,7 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo
             {
                 // Adjust full-depth search based on LMR results - if the result
                 // was good enough search deeper, if it was bad enough search shallower.
-                const bool doDeeperSearch    = value > (bestValue + f1 + f2 * newDepth);  // (~1 Elo)
+                const bool doDeeperSearch    = value > (bestValue + 53 + 2 * newDepth);  // (~1 Elo)
                 const bool doShallowerSearch = value < bestValue + newDepth;             // (~2 Elo)
 
                 newDepth += doDeeperSearch - doShallowerSearch;
@@ -1329,7 +1303,7 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo
                 else
                 {
                     // Reduce other moves if we have found at least one score improvement (~2 Elo)
-                    if (depth > f3 && depth < f4 && beta < f5 && value > -f6)
+                    if (depth > 2 && depth < 12 && beta < 13782 && value > -11541)
                         depth -= 2;
 
                     assert(depth > 0);
@@ -1368,8 +1342,8 @@ Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, boo
     // Bonus for prior countermove that caused the fail low
     else if (!priorCapture && prevSq != SQ_NONE)
     {
-        int bonus = (depth > f7) + (PvNode || cutNode) + (bestValue < alpha - f8)
-                  + ((ss - 1)->moveCount > f9);
+        int bonus = (depth > 6) + (PvNode || cutNode) + (bestValue < alpha - 656)
+                  + ((ss - 1)->moveCount > 10);
         update_continuation_histories(ss - 1, pos.piece_on(prevSq), prevSq,
                                       stat_bonus(depth) * bonus);
         thisThread->mainHistory[~us][from_to((ss - 1)->currentMove)]
@@ -1501,7 +1475,7 @@ Value qsearch(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth) {
         if (bestValue > alpha)
             alpha = bestValue;
 
-        futilityBase = ss->staticEval + g1;
+        futilityBase = ss->staticEval + 182;
     }
 
     const PieceToHistory* contHist[] = {(ss - 1)->continuationHistory,
@@ -1581,7 +1555,7 @@ Value qsearch(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth) {
                 continue;
 
             // Do not search moves with bad enough SEE values (~5 Elo)
-            if (!pos.see_ge(move, Value(-g2)))
+            if (!pos.see_ge(move, Value(-77)))
                 continue;
         }
 
@@ -1717,7 +1691,7 @@ void update_all_stats(const Position& pos,
 
     if (!pos.capture_stage(bestMove))
     {
-        int bestMoveBonus = bestValue > beta + g3 ? quietMoveBonus      // larger bonus
+        int bestMoveBonus = bestValue > beta + 173 ? quietMoveBonus      // larger bonus
                                                    : stat_bonus(depth);  // smaller bonus
 
         // Increase stats for the best move in case it was a quiet move
@@ -1725,7 +1699,7 @@ void update_all_stats(const Position& pos,
         thisThread->pawnHistory[pawn_structure(pos)][moved_piece][to_sq(bestMove)]
           << quietMoveBonus;
 
-        int moveMalus = bestValue > beta + g4 ? quietMoveMalus      // larger malus
+        int moveMalus = bestValue > beta + 165 ? quietMoveMalus      // larger malus
                                                : stat_malus(depth);  // smaller malus
 
         // Decrease stats for all non-best quiet moves