From 7f24a11a85b331b855735d3d56d95ada731da1d7 Mon Sep 17 00:00:00 2001 From: Alexander Lyashuk Date: Tue, 3 Dec 2019 08:01:04 +0100 Subject: [PATCH 001/151] Master is now v0.24.0-dev (#1011) * Master is now v0.24.0-dev * update changelog.txt --- changelog.txt | 52 ++++++++++++++++++++++++++++++++++++++++++++++++- src/version.inc | 2 +- 2 files changed, 52 insertions(+), 2 deletions(-) diff --git a/changelog.txt b/changelog.txt index 8b48a739d8..0a1cb50ced 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,4 +1,54 @@ -v0.22.0-rc1 (2019-08-03) +v0.23.0 (2019-12-01) +~~~~~~~ + +* Fixed the order of BLAS options so that Eigen is lower priority, to match + assumption in check_opencl patch introduced in v0.23.0-rc2. + +v0.23.0-rc2 (2019-11-27) +~~~~~~~~~~~ + +* Fixes in nps and time reporting during search. +* Introduced DNNL BLAS build for modern CPUs in addition to OpenBLAS. +* Build fixes on MacOS without OpenCL. +* Fixed smart pruning and KLDGain trying to stop search in `go infinite` mode. +* OpenCL package now has check_opencl tool to find computation behaves sanely. +* Fixed a bug in interoperation of shortsighteness and certainty propagation. + +v0.23.0-rc1 (2019-11-21) +~~~~~~~~~~~ + +* Support for Fischer Random Chess (`UCI_Chess960` option to enable FRC-style + castling). Also added support for FRC-compatible weight files, but no training + code yet. +* New option `--logit-q` (UCI: `LogitQ`). Changes subtree selection algorithm a + bit, possibly making it stronger (experimental, default off). +* Lc0 now reports WDL score. To enable it, use `--show-wdl` command-line + argument or `UCI_ShowWdl` UCI option. +* Added "Badgame split" mode during the training. After the engine makes + inferior move due to temperature, the game is branched and later the game is + replayed from the position of the branch. +* Added experimental `--short-sightedness` (UCI: `ShortSightedness`) parameter. + Treats longer variations as more "drawish". +* Lc0 can now open Fat Fritz weight files. +* Time management code refactoring. No functional changes, but will make time + management changes easier. +* Lc0 logo is now printed in red! \o/ +* Command line argument `-v` is now short for `--verbose-move-stats`. +* Errors in `--backend-opts` parameter syntax are now reported. +* The most basic version of "certainty propagation" feature (actually without + "propagation"). If the engine sees checkmate, it plays it! + (before it could play other good move). +* Benchmark mode no longer supports smart pruning. +* Various small changes: hidden options to control Dirichlet noise, floating + point optimizations, Better error reporting if there is exception in worker + thread, better error messages in CUDA backend. + +v0.22.0 (2019-08-05) +~~~~~~~ + +(no changes) + +v0.22.0-rc1 (2019-08-03) ~~~~~~~~~~~ * Remove softmax calculation from backends and apply it after filtering for diff --git a/src/version.inc b/src/version.inc index 3799309afd..351665c5ab 100644 --- a/src/version.inc +++ b/src/version.inc @@ -1,4 +1,4 @@ #define LC0_VERSION_MAJOR 0 -#define LC0_VERSION_MINOR 23 +#define LC0_VERSION_MINOR 24 #define LC0_VERSION_PATCH 0 #define LC0_VERSION_POSTFIX "dev" From 5130f3914a83459ad3f09e4181822659f4f1b0a7 Mon Sep 17 00:00:00 2001 From: borg323 <39573933+borg323@users.noreply.github.com> Date: Tue, 3 Dec 2019 20:08:16 +0200 Subject: [PATCH 002/151] save training data with no visits for single legal move (#1033) --- src/mcts/node.cc | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/mcts/node.cc b/src/mcts/node.cc index 05e06938c3..c5a89c524c 100644 --- a/src/mcts/node.cc +++ b/src/mcts/node.cc @@ -336,16 +336,20 @@ V4TrainingData Node::GetV4TrainingData(GameResult game_result, result.version = 4; // Populate probabilities. - const float total_n = static_cast(GetChildrenVisits()); + auto total_n = GetChildrenVisits(); // Prevent garbage/invalid training data from being uploaded to server. - if (total_n <= 0.0f) throw Exception("Search generated invalid data!"); + // It's possible to have N=0 when there is only one legal move in position + // (due to smart pruning). + if (total_n == 0 && GetNumEdges() != 1) { + throw Exception("Search generated invalid data!"); + } // Set illegal moves to have -1 probability. std::fill(std::begin(result.probabilities), std::end(result.probabilities), -1); // Set moves probabilities according to their relative amount of visits. for (const auto& child : Edges()) { result.probabilities[child.edge()->GetMove().as_nn_index()] = - child.GetN() / total_n; + total_n > 0 ? child.GetN() / static_cast(total_n) : 1; } // Populate planes. From 97cae9ac9346d181d71fa00de45ce1edb19998a8 Mon Sep 17 00:00:00 2001 From: borg323 <39573933+borg323@users.noreply.github.com> Date: Tue, 3 Dec 2019 20:52:08 +0200 Subject: [PATCH 003/151] also upload a cuda zip without the dlls (#1032) --- appveyor.yml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index 5f928fa4cc..5b19e808fb 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -77,13 +77,14 @@ after_build: - cmd: IF %APPVEYOR_REPO_TAG%==true 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip %APPVEYOR_BUILD_FOLDER%\build\lc0.exe - cmd: IF %APPVEYOR_REPO_TAG%==true appveyor DownloadFile "https://ci.appveyor.com/api/projects/LeelaChessZero/lczero-client/artifacts/client.exe?branch=release&pr=false&job=Environment%%3A%%20NAME%%3D.exe%%2C%%20GOOS%%3Dwindows" - cmd: IF %APPVEYOR_REPO_TAG%==true 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip client.exe +- cmd: IF %APPVEYOR_REPO_TAG%==true type COPYING |more /P > dist\COPYING +- cmd: IF %APPVEYOR_REPO_TAG%==true 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip .\dist\COPYING +- cmd: IF %APPVEYOR_REPO_TAG%==true IF %CUDA%==true copy lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%-nodll.zip - cmd: IF %APPVEYOR_REPO_TAG%==true IF %NAME%==cpu-openblas 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip C:\cache\OpenBLAS\dist64\bin\libopenblas.dll - cmd: IF %APPVEYOR_REPO_TAG%==true IF %NAME%==cpu-dnnl 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip C:\cache\dnnl_win_1.1.1_cpu_vcomp\bin\dnnl.dll - cmd: IF %APPVEYOR_REPO_TAG%==true IF %OPENCL%==true 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip C:\cache\opencl-nug.0.777.77\build\native\bin\OpenCL.dll - cmd: IF %APPVEYOR_REPO_TAG%==true IF %CUDA%==true 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip "%CUDA_PATH%\bin\cudart64_100.dll" "%CUDA_PATH%\bin\cublas64_100.dll" - cmd: IF %APPVEYOR_REPO_TAG%==true IF %CUDA%==true 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip "%PKG_FOLDER%\cuda\bin\cudnn64_7.dll" -- cmd: IF %APPVEYOR_REPO_TAG%==true type COPYING |more /P > dist\COPYING -- cmd: IF %APPVEYOR_REPO_TAG%==true 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip .\dist\COPYING - cmd: IF %APPVEYOR_REPO_TAG%==true IF %NAME%==cpu-dnnl copy "%PKG_FOLDER%\dnnl_win_1.1.1_cpu_vcomp\LICENSE" dist\DNNL-LICENSE - cmd: IF %APPVEYOR_REPO_TAG%==true IF %NAME%==cpu-dnnl 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip .\dist\DNNL-LICENSE - cmd: IF %APPVEYOR_REPO_TAG%==true IF %OPENCL%==true type scripts\check_opencl.bat |more /P > dist\check_opencl.bat @@ -95,13 +96,13 @@ after_build: artifacts: - path: build/lc0.exe name: lc0-$(NAME) - - path: lc0-$(APPVEYOR_REPO_TAG_NAME)-windows-$(NAME).zip + - path: /*.zip/ name: lc0-$(APPVEYOR_REPO_TAG_NAME)-windows-$(NAME)-zip - path: build/lc0.pdb name: lc0-debug-symbols deploy: - provider: GitHub - artifact: lc0-$(APPVEYOR_REPO_TAG_NAME)-windows-$(NAME).zip + artifact: /.*\.zip/ auth_token: secure: USFAdwQKTXqOXQjCYQfzWvzRpUhvqJLBkN4hbOg+j876vDxGZHt9bMYayb5evePp on: From ca82e0ad5a66d179e8339b89c6c7b8e262889508 Mon Sep 17 00:00:00 2001 From: borg323 <39573933+borg323@users.noreply.github.com> Date: Mon, 9 Dec 2019 14:31:10 +0200 Subject: [PATCH 004/151] cudnn-auto backend (#1026) --- src/neural/cuda/network_cudnn.cc | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/neural/cuda/network_cudnn.cc b/src/neural/cuda/network_cudnn.cc index 295dbb62ea..78af0551d7 100644 --- a/src/neural/cuda/network_cudnn.cc +++ b/src/neural/cuda/network_cudnn.cc @@ -819,6 +819,25 @@ std::unique_ptr MakeCudnnNetwork(const WeightsFile& weights, return std::make_unique>(weights, options); } +std::unique_ptr MakeCudnnNetworkAuto(const WeightsFile& weights, + const OptionsDict& options) { + int gpu_id = options.GetOrDefault("gpu", 0); + cudaDeviceProp deviceProp = {}; + // No error checking here, this will be repeated later. + cudaGetDeviceProperties(&deviceProp, gpu_id); + + // Check if the GPU supports FP16. + if (deviceProp.major >= 7 || + (deviceProp.major == 6 && deviceProp.minor != 1) || + (deviceProp.major == 5 && deviceProp.minor == 3)) { + CERR << "Switching to [cudnn-fp16]..."; + return MakeCudnnNetwork(weights, options); + } + CERR << "Switching to [cudnn]..."; + return MakeCudnnNetwork(weights, options); +} + +REGISTER_NETWORK("cudnn-auto", MakeCudnnNetworkAuto, 120) REGISTER_NETWORK("cudnn", MakeCudnnNetwork, 110) REGISTER_NETWORK("cudnn-fp16", MakeCudnnNetwork, 105) From 108c11bfa47aee30d2eced66d05f0a59b098a39f Mon Sep 17 00:00:00 2001 From: borg323 <39573933+borg323@users.noreply.github.com> Date: Thu, 12 Dec 2019 12:29:07 +0200 Subject: [PATCH 005/151] fix various meson warnings (#1030) --- meson.build | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/meson.build b/meson.build index e9bba27b03..be75f12437 100644 --- a/meson.build +++ b/meson.build @@ -15,8 +15,8 @@ # along with Leela Chess. If not, see . project('lc0', 'cpp', - default_options : ['cpp_std=c++14', 'b_ndebug=if-release'], - meson_version: '>=0.45') + default_options : ['cpp_std=c++14', 'b_ndebug=if-release', 'warning_level=3'], + meson_version: '>=0.46') cc = meson.get_compiler('cpp') if cc.get_id() == 'clang' @@ -24,9 +24,6 @@ if cc.get_id() == 'clang' add_project_arguments('-Wthread-safety', language : 'cpp') endif if cc.get_id() == 'clang' or cc.get_id() == 'gcc' - add_project_arguments('-Wextra', language : 'cpp') - add_project_arguments('-pedantic', language : 'cpp') - if get_option('buildtype') == 'release' add_project_arguments('-march=native', language : 'cpp') endif From 1b971dc29a7ffac293cd5be3f09986fa45a7a6c4 Mon Sep 17 00:00:00 2001 From: Tilps Date: Tue, 31 Dec 2019 13:43:20 +1100 Subject: [PATCH 006/151] Fix training data for odd book lengths. (#1046) * Fix training data for odd book lengths. * Simplify based on Mardak's suggestion. --- src/selfplay/game.cc | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/selfplay/game.cc b/src/selfplay/game.cc index 3234ee74e4..aba8bce95e 100644 --- a/src/selfplay/game.cc +++ b/src/selfplay/game.cc @@ -262,10 +262,8 @@ void SelfPlayGame::Abort() { } void SelfPlayGame::WriteTrainingData(TrainingDataWriter* writer) const { - assert(!training_data_.empty()); - bool black_to_move = - tree_[0]->GetPositionHistory().Starting().IsBlackToMove(); for (auto chunk : training_data_) { + const bool black_to_move = chunk.side_to_move; if (game_result_ == GameResult::WHITE_WON) { chunk.result = black_to_move ? -1 : 1; } else if (game_result_ == GameResult::BLACK_WON) { @@ -274,7 +272,6 @@ void SelfPlayGame::WriteTrainingData(TrainingDataWriter* writer) const { chunk.result = 0; } writer->WriteChunk(chunk); - black_to_move = !black_to_move; } } From f83f81de68cf94585ac786662d63998169a837e8 Mon Sep 17 00:00:00 2001 From: Tilps Date: Tue, 31 Dec 2019 18:00:02 +1100 Subject: [PATCH 007/151] Don't discard games that are in positions that are already done. (#1048) * Don't discard games that are in positions that are already done. * Minor optimization. * Minor tweak for consistency with surrounding code. --- src/selfplay/game.cc | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/selfplay/game.cc b/src/selfplay/game.cc index aba8bce95e..95c948595b 100644 --- a/src/selfplay/game.cc +++ b/src/selfplay/game.cc @@ -206,9 +206,18 @@ void SelfPlayGame::Play(int white_threads, int black_threads, bool training, kMinimumAllowedVistsId.GetId())) { break; } - auto move_list_to_discard = GetMoves(); - move_list_to_discard.push_back(move); - options_[idx].discarded_callback(move_list_to_discard); + PositionHistory history_copy = tree_[idx]->GetPositionHistory(); + Move move_for_history = move; + if (tree_[idx]->IsBlackToMove()) { + move_for_history.Mirror(); + } + history_copy.Append(move_for_history); + // Ensure not to discard games that are already decided. + if (history_copy.ComputeGameResult() == GameResult::UNDECIDED) { + auto move_list_to_discard = GetMoves(); + move_list_to_discard.push_back(move); + options_[idx].discarded_callback(move_list_to_discard); + } search_->ResetBestMove(); } // Add best move to the tree. From 7a05b0e87c35f573bfa58cb0faffde7e715528dc Mon Sep 17 00:00:00 2001 From: borg323 <39573933+borg323@users.noreply.github.com> Date: Thu, 16 Jan 2020 14:28:11 +0200 Subject: [PATCH 008/151] update readme (#1055) --- README.md | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 3b030d5ccf..2d01fe5f0a 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ Lc0 is a UCI-compliant chess engine designed to play chess via neural network, s Lc0 can be acquired either via a git clone or an archive download from GitHub. Be aware that there is a required submodule which isn't included in source archives. -For essentially all purposes, including selfplay game generation and match play, we highly recommend using the latest `release/version` branch (for example `release/0.19`), which is equivalent to using the latest version tag. +For essentially all purposes, including selfplay game generation and match play, we highly recommend using the latest `release/version` branch (for example `release/0.23`), which is equivalent to using the latest version tag. Versioning follows the Semantic Versioning guidelines, with major, minor and patch sections. The training server enforces game quality using the versions output by the client and engine. @@ -17,11 +17,11 @@ Versioning follows the Semantic Versioning guidelines, with major, minor and pat Download using git: ``` -git clone -b release/0.21 --recurse-submodules https://github.com/LeelaChessZero/lc0.git +git clone -b release/0.23 --recurse-submodules https://github.com/LeelaChessZero/lc0.git ``` If you prefer to download an archive, you need to also download and place the submodule: - * Download the [.zip](https://api.github.com/repos/LeelaChessZero/lc0/zipball/release/0.21) file ([.tar.gz](https://api.github.com/repos/LeelaChessZero/lc0/tarball/release/0.21) archive is also available) + * Download the [.zip](https://api.github.com/repos/LeelaChessZero/lc0/zipball/release/0.23) file ([.tar.gz](https://api.github.com/repos/LeelaChessZero/lc0/tarball/release/0.23) archive is also available) * Extract * Download https://github.com/LeelaChessZero/lczero-common/archive/master.zip (also available as [.tar.gz](https://github.com/LeelaChessZero/lczero-common/archive/master.tar.gz)) * Move the second archive into the first archive's `libs/lczero-common/` folder and extract @@ -103,10 +103,8 @@ Make sure that `~/.local/bin` is in your `PATH` environment variable. You can no #### openSUSE (all versions) -Instructions, packages and tools for building on openSUSE are at the following page +Instructions, packages and tools for building on openSUSE are at [openSUSE_install.md](openSUSE_install.md) -[openSUSE_install.md](openSUSE_install.md) -======= #### Docker Use https://github.com/vochicong/lc0-docker @@ -141,13 +139,18 @@ Or. ### Mac +First you need to install some required packages: 1. Install brew as per the instructions at https://brew.sh/ 2. Install python3: `brew install python3` 3. Install meson: `brew install meson` 4. Install ninja: `brew install ninja` 5. When using Mojave install SDK headers: installer -pkg /Library/Developer/CommandLineTools/Packages/macOS_SDK_headers_for_macOS_10.14.pkg -target / -6. Run `./build.sh` -7. The resulting binary will be in build/release + +Now download the lc0 source, if you haven't already done so, following the instructions earlier in the page. + +6. Go to the lc0 directory. +7. Run `./build.sh` +8. The resulting binary will be in build/release ### Raspberry Pi From 78d9cc1c5eb2dc9e3fa0778c0b6f3f4f09fbd00a Mon Sep 17 00:00:00 2001 From: borg323 <39573933+borg323@users.noreply.github.com> Date: Thu, 16 Jan 2020 16:40:43 +0200 Subject: [PATCH 009/151] bypass wrapdb (#1056) --- subprojects/eigen.wrap | 2 +- subprojects/gtest.wrap | 2 +- subprojects/zlib.wrap | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/subprojects/eigen.wrap b/subprojects/eigen.wrap index 230c22fc74..fb30893c6c 100644 --- a/subprojects/eigen.wrap +++ b/subprojects/eigen.wrap @@ -4,6 +4,6 @@ source_filename=eigen-eigen-b3f3d4950030.zip directory=eigen-eigen-b3f3d4950030 source_hash=35fa84bc23114b9d37c4597745f8b4e03354a5077579fdba597019f595a602b6 -patch_url = https://wrapdb.mesonbuild.com/v1/projects/eigen/3.3.5/1/get_zip +patch_url = https://github.com/mesonbuild/eigen/releases/download/3.3.5-1/eigen.zip patch_filename = eigen-3.3.5-1-wrap.zip patch_hash = ef83f81b932ad2d9491648881feeaf422a58edc072f54db32cd08d48660c8e97 diff --git a/subprojects/gtest.wrap b/subprojects/gtest.wrap index c7c51c2876..f8f3868715 100644 --- a/subprojects/gtest.wrap +++ b/subprojects/gtest.wrap @@ -5,7 +5,7 @@ source_url = https://github.com/google/googletest/archive/release-1.8.1.zip source_filename = gtest-1.8.1.zip source_hash = 927827c183d01734cc5cfef85e0ff3f5a92ffe6188e0d18e909c5efebf28a0c7 -patch_url = https://wrapdb.mesonbuild.com/v1/projects/gtest/1.8.1/1/get_zip +patch_url = https://github.com/mesonbuild/gtest/releases/download/1.8.1-1/gtest.zip patch_filename = gtest-1.8.1-1-wrap.zip patch_hash = f79f5fd46e09507b3f2e09a51ea6eb20020effe543335f5aee59f30cc8d15805 diff --git a/subprojects/zlib.wrap b/subprojects/zlib.wrap index d774c0717a..308de43e83 100644 --- a/subprojects/zlib.wrap +++ b/subprojects/zlib.wrap @@ -5,6 +5,6 @@ source_url = http://zlib.net/fossils/zlib-1.2.11.tar.gz source_filename = zlib-1.2.11.tar.gz source_hash = c3e5e9fdd5004dcb542feda5ee4f0ff0744628baf8ed2dd5d66f8ca1197cb1a1 -patch_url = https://wrapdb.mesonbuild.com/v1/projects/zlib/1.2.11/3/get_zip +patch_url = https://github.com/mesonbuild/zlib/releases/download/1.2.11-3/zlib.zip patch_filename = zlib-1.2.11-3-wrap.zip -patch_hash = f07dc491ab3d05daf00632a0591e2ae61b470615b5b73bcf9b3f061fff65cff0 \ No newline at end of file +patch_hash = f07dc491ab3d05daf00632a0591e2ae61b470615b5b73bcf9b3f061fff65cff0 From 6ccfe082af7a586d4d85725a428addc2f9a703b2 Mon Sep 17 00:00:00 2001 From: borg323 <39573933+borg323@users.noreply.github.com> Date: Sun, 26 Jan 2020 12:10:25 +0200 Subject: [PATCH 010/151] use std::exp() in blas backend (#933) --- src/neural/blas/se_unit.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/neural/blas/se_unit.cc b/src/neural/blas/se_unit.cc index 57033a5562..f051d04e6b 100644 --- a/src/neural/blas/se_unit.cc +++ b/src/neural/blas/se_unit.cc @@ -47,7 +47,7 @@ static void apply_se(const size_t channels, const size_t batch_size, }; const auto lambda_sigmoid = [](const auto val) { - return 1.0f / (1.0f + exp(-val)); + return 1.0f / (1.0f + std::exp(-val)); }; for (auto c = size_t{0}; c < channels * batch_size; c++) { From 40161e9fa70f1e08c94030f7b7ddaa2abded6f77 Mon Sep 17 00:00:00 2001 From: Tilps Date: Mon, 27 Jan 2020 09:08:17 +1100 Subject: [PATCH 011/151] Pgn tournament (#1060) * Add opening book support to selfplay * Some cleanup. * Make book reader return 'normal' rather than black mirrored moves * Remove unneeded line missed in merge. * Add some more options for how to choose openings from the book. * Some formatting. * Simplify non-random opening book modes to wrap if specific game count is set. * Some small cleanup. * Fix compile with clang, maybe? * Initial pass based on feedback. * Extract pgn logic to its own header. Also fix a bug I introduced changing the wrong 9 to an 8. * Do some OOification of the pgn reading code. * Fix missing newline. --- src/chess/pgn.h | 262 +++++++++++++++++++++++++++++++++++++ src/selfplay/tournament.cc | 62 ++++++++- src/selfplay/tournament.h | 5 +- src/utils/random.h | 9 ++ 4 files changed, 330 insertions(+), 8 deletions(-) create mode 100644 src/chess/pgn.h diff --git a/src/chess/pgn.h b/src/chess/pgn.h new file mode 100644 index 0000000000..61f6cb556f --- /dev/null +++ b/src/chess/pgn.h @@ -0,0 +1,262 @@ +/* + This file is part of Leela Chess Zero. + Copyright (C) 2020 The LCZero Authors + + Leela Chess is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Leela Chess is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Leela Chess. If not, see . + + Additional permission under GNU GPL version 3 section 7 + + If you modify this Program, or any covered work, by linking or + combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA + Toolkit and the NVIDIA CUDA Deep Neural Network library (or a + modified version of those libraries), containing parts covered by the + terms of the respective license agreement, the licensors of this + Program grant you additional permission to convey the resulting work. +*/ + +#pragma once + +#include + +#include "chess/bitboard.h" +#include "chess/board.h" + +namespace lczero { + +class PgnReader { + public: + void AddPgnFile(const std::string& filepath) { + std::ifstream file(filepath); + std::string line; + bool in_comment = false; + while (std::getline(file, line)) { + // TODO: support line breaks in tags to ensure they are properly ignored. + if (line.empty() || line[0] == '[') { + Flush(); + continue; + } + // Handle braced comments. + int cur_offset = 0; + while (in_comment && line.find('}', cur_offset) != std::string::npos || + !in_comment && line.find('{', cur_offset) != std::string::npos) { + if (in_comment && line.find('}', cur_offset) != std::string::npos) { + line = line.substr(0, cur_offset) + + line.substr(line.find('}', cur_offset) + 1); + in_comment = false; + } else { + cur_offset = line.find('{', cur_offset); + in_comment = true; + } + } + if (in_comment) { + line = line.substr(0, cur_offset); + } + // Trim trailing comment. + if (line.find(';') != std::string::npos) { + line = line.substr(0, line.find(';')); + } + if (line.empty()) continue; + std::istringstream iss(line); + std::string word; + while (!iss.eof()) { + word.clear(); + iss >> word; + if (word.size() < 2) continue; + // Trim move numbers from front. + const auto idx = word.find('.'); + if (idx != std::string::npos) { + bool all_nums = true; + for (int i = 0; i < idx; i++) { + if (word[i] < '0' || word[i] > '9') { + all_nums = false; + break; + } + } + if (all_nums) { + word = word.substr(idx + 1); + } + } + // Pure move numbers can be skipped. + if (word.size() < 2) continue; + // Ignore score line. + if (word == "1/2-1/2" || word == "1-0" || word == "0-1" || word == "*") + continue; + cur_game_.push_back(SanToMove(word, cur_board_)); + cur_board_.ApplyMove(cur_game_.back()); + // Board ApplyMove wants mirrored for black, but outside code wants + // normal, so mirror it back again. + // Check equal to 0 since we've already added the position. + if ((cur_game_.size() % 2) == 0) { + cur_game_.back().Mirror(); + } + cur_board_.Mirror(); + } + } + Flush(); + } + std::vector GetGames() const { return games_; } + std::vector&& ReleaseGames() { return std::move(games_); } + + private: + void Flush() { + if (cur_game_.empty()) return; + games_.push_back(cur_game_); + cur_game_.clear(); + cur_board_.SetFromFen(ChessBoard::kStartposFen); + } + + Move::Promotion PieceToPromotion(int p) { + switch (p) { + case -1: + return Move::Promotion::None; + case 2: + return Move::Promotion::Queen; + case 3: + return Move::Promotion::Bishop; + case 4: + return Move::Promotion::Knight; + case 5: + return Move::Promotion::Rook; + default: + // 0 and 1 are pawn and king, which are not legal promotions, other + // numbers don't correspond to a known piece type. + std::cerr << "Unexpected promotion!!" << std::endl; + throw Exception("Trying to create a move with illegal promotion."); + } + } + + Move SanToMove(const std::string& san, const ChessBoard& board) { + int p = 0; + int idx = 0; + if (san[0] == 'K') { + p = 1; + } else if (san[0] == 'Q') { + p = 2; + } else if (san[0] == 'B') { + p = 3; + } else if (san[0] == 'N') { + p = 4; + } else if (san[0] == 'R') { + p = 5; + } else if (san[0] == 'O' && san.size() > 2 && san[1] == '-' && + san[2] == 'O') { + Move m; + if (san.size() > 4 && san[3] == '-' && san[4] == 'O') { + m = Move(BoardSquare(0, 4), BoardSquare(0, 2)); + } else { + m = Move(BoardSquare(0, 4), BoardSquare(0, 6)); + } + return m; + } + if (p != 0) idx++; + // Formats e4 1e5 de5 d1e5 - with optional x's - followed by =Q for + // promotions, and even more characters after that also optional. + int r1 = -1; + int c1 = -1; + int r2 = -1; + int c2 = -1; + int p2 = -1; + bool pPending = false; + for (; idx < san.size(); idx++) { + if (san[idx] == 'x') continue; + if (san[idx] == '=') { + pPending = true; + continue; + } + if (san[idx] >= '1' && san[idx] <= '8') { + r1 = r2; + r2 = san[idx] - '1'; + continue; + } + if (san[idx] >= 'a' && san[idx] <= 'h') { + c1 = c2; + c2 = san[idx] - 'a'; + continue; + } + if (pPending) { + if (san[idx] == 'Q') { + p2 = 2; + } else if (san[idx] == 'B') { + p2 = 3; + } else if (san[idx] == 'N') { + p2 = 4; + } else if (san[idx] == 'R') { + p2 = 5; + } + pPending = false; + break; + } + break; + } + if (r1 == -1 || c1 == -1) { + // Need to find the from cell based on piece. + int sr1 = r1; + int sr2 = r2; + if (board.flipped()) { + if (sr1 != -1) sr1 = 7 - sr1; + sr2 = 7 - sr2; + } + BitBoard searchBits; + if (p == 0) { + searchBits = (board.pawns() & board.ours()); + } else if (p == 1) { + searchBits = board.our_king(); + } else if (p == 2) { + searchBits = (board.queens() & board.ours()); + } else if (p == 3) { + searchBits = (board.bishops() & board.ours()); + } else if (p == 4) { + searchBits = board.our_knights(); + } else if (p == 5) { + searchBits = (board.rooks() & board.ours()); + } + auto plm = board.GenerateLegalMoves(); + int pr1 = -1; + int pc1 = -1; + for (BoardSquare sq : searchBits) { + if (sr1 != -1 && sq.row() != sr1) continue; + if (c1 != -1 && sq.col() != c1) continue; + if (std::find(plm.begin(), plm.end(), + Move(sq, BoardSquare(sr2, c2), PieceToPromotion(p2))) == + plm.end()) { + continue; + } + if (pc1 != -1) { + std::cerr << "Ambiguous!!" << std::endl; + throw Exception("Opening book move seems ambiguous."); + } + pr1 = sq.row(); + pc1 = sq.col(); + } + if (pc1 == -1) { + std::cerr << "No Match!!" << std::endl; + throw Exception("Opening book move seems illegal."); + } + r1 = pr1; + c1 = pc1; + if (board.flipped()) { + r1 = 7 - r1; + } + } + Move m(BoardSquare(r1, c1), BoardSquare(r2, c2), PieceToPromotion(p2)); + if (board.flipped()) m.Mirror(); + return m; + } + + ChessBoard cur_board_{ChessBoard::kStartposFen}; + MoveList cur_game_; + std::vector games_; +}; + +} // namespace lczero diff --git a/src/selfplay/tournament.cc b/src/selfplay/tournament.cc index 5738938d4f..a1a7b582f6 100644 --- a/src/selfplay/tournament.cc +++ b/src/selfplay/tournament.cc @@ -27,6 +27,7 @@ #include "selfplay/tournament.h" +#include "chess/pgn.h" #include "mcts/search.h" #include "mcts/stoppers/factory.h" #include "neural/factory.h" @@ -39,7 +40,10 @@ namespace { const OptionId kShareTreesId{"share-trees", "ShareTrees", "When on, game tree is shared for two players; " "when off, each side has a separate tree."}; -const OptionId kTotalGamesId{"games", "Games", "Number of games to play."}; +const OptionId kTotalGamesId{ + "games", "Games", + "Number of games to play. -1 to play forever, -2 to play equal to book " + "length, or double book length if mirrored."}; const OptionId kParallelGamesId{"parallelism", "Parallelism", "Number of games to play in parallel."}; const OptionId kThreadsId{ @@ -57,6 +61,8 @@ const OptionId kTrainingId{ "temporary subdirectory that the engine creates."}; const OptionId kVerboseThinkingId{"verbose-thinking", "VerboseThinking", "Show verbose thinking messages."}; +const OptionId kMoveThinkingId{"move-thinking", "MoveThinking", + "Show all the per-move thinking."}; const OptionId kResignPlaythroughId{ "resign-playthrough", "ResignPlaythrough", "The percentage of games which ignore resign."}; @@ -64,6 +70,15 @@ const OptionId kDiscardedStartChanceId{ "discarded-start-chance", "DiscardedStartChance", "The percentage chance each game will attempt to start from a position " "discarded due to not getting enough visits."}; +const OptionId kOpeningsFileId{ + "openings-pgn", "OpeningsPgnFile", + "A path name to a pgn file containing openings to use."}; +const OptionId kOpeningsMirroredId{ + "mirror-openings", "MirrorOpenings", + "If true, each opening will be played in pairs. " + "Not really compatible with openings mode random."}; +const OptionId kOpeningsModeId{"openings-mode", "OpeningsMode", + "A choice of sequential, shuffled, or random."}; } // namespace @@ -84,8 +99,14 @@ void SelfPlayTournament::PopulateOptions(OptionsParser* options) { options->Add(kTimeMsId, -1, 999999999) = -1; options->Add(kTrainingId) = false; options->Add(kVerboseThinkingId) = false; + options->Add(kMoveThinkingId) = false; options->Add(kResignPlaythroughId, 0.0f, 100.0f) = 0.0f; options->Add(kDiscardedStartChanceId, 0.0f, 100.0f) = 0.0f; + options->Add(kOpeningsFileId) = ""; + options->Add(kOpeningsMirroredId) = false; + std::vector openings_modes = {"sequential", "shuffled", + "random"}; + options->Add(kOpeningsModeId, openings_modes) = "sequential"; SelfPlayGame::PopulateUciParams(options); @@ -130,9 +151,18 @@ SelfPlayTournament::SelfPlayTournament( kResignPlaythrough(options.Get(kResignPlaythroughId.GetId())), kDiscardedStartChance( options.Get(kDiscardedStartChanceId.GetId())) { + std::string book = options.Get(kOpeningsFileId.GetId()); + if (!book.empty()) { + PgnReader book_reader; + book_reader.AddPgnFile(book); + openings_ = book_reader.ReleaseGames(); + if (options.Get(kOpeningsModeId.GetId()) == "shuffled") { + Random::Get().Shuffle(openings_.begin(), openings_.end()); + } + } // If playing just one game, the player1 is white, otherwise randomize. if (kTotalGames != 1) { - next_game_black_ = Random::Get().GetBool(); + first_game_black_ = Random::Get().GetBool(); } static const char* kPlayerNames[2] = {"player1", "player2"}; @@ -179,8 +209,17 @@ void SelfPlayTournament::PlayOneGame(int game_number) { MoveList opening; { Mutex::Lock lock(mutex_); - player1_black = next_game_black_; - next_game_black_ = !next_game_black_; + player1_black = ((game_number % 2) == 1) != first_game_black_; + if (!openings_.empty()) { + if (player_options_[0].Get(kOpeningsMirroredId.GetId())) { + opening = openings_[(game_number / 2) % openings_.size()]; + } else if (player_options_[0].Get(kOpeningsModeId.GetId()) == + "random") { + opening = openings_[Random::Get().GetInt(0, openings_.size() - 1)]; + } else { + opening = openings_[game_number % openings_.size()]; + } + } if (discard_pile_.size() > 0 && Random::Get().GetFloat(100.0f) < kDiscardedStartChance) { const size_t idx = Random::Get().GetInt(0, discard_pile_.size() - 1); @@ -199,6 +238,8 @@ void SelfPlayTournament::PlayOneGame(int game_number) { for (int pl_idx : {0, 1}) { const bool verbose_thinking = player_options_[pl_idx].Get(kVerboseThinkingId.GetId()); + const bool move_thinking = + player_options_[pl_idx].Get(kMoveThinkingId.GetId()); // Populate per-player options. PlayerOptions& opt = options[color_idx[pl_idx]]; opt.network = networks_[pl_idx].get(); @@ -208,8 +249,12 @@ void SelfPlayTournament::PlayOneGame(int game_number) { // "bestmove" callback. opt.best_move_callback = [this, game_number, pl_idx, player1_black, - verbose_thinking, + verbose_thinking, move_thinking, &last_thinking_info](const BestMoveInfo& info) { + if (!move_thinking) { + last_thinking_info.clear(); + return; + } // In non-verbose mode, output the last "info" message. if (!verbose_thinking && !last_thinking_info.empty()) { info_callback_(last_thinking_info); @@ -324,7 +369,12 @@ void SelfPlayTournament::Worker() { { Mutex::Lock lock(mutex_); if (abort_) break; - if (kTotalGames != -1 && games_count_ >= kTotalGames) break; + bool mirrored = player_options_[0].Get(kOpeningsMirroredId.GetId()); + if (kTotalGames >= 0 && games_count_ >= kTotalGames || + kTotalGames == -2 && !openings_.empty() && + games_count_ >= + static_cast(openings_.size()) * (mirrored ? 2 : 1)) + break; game_id = games_count_++; } PlayOneGame(game_id); diff --git a/src/selfplay/tournament.h b/src/selfplay/tournament.h index 0168cc700c..48201d1833 100644 --- a/src/selfplay/tournament.h +++ b/src/selfplay/tournament.h @@ -71,12 +71,13 @@ class SelfPlayTournament { void PlayOneGame(int game_id); Mutex mutex_; - // Whether next game will be black for player1. - bool next_game_black_ GUARDED_BY(mutex_) = false; + // Whether first game will be black for player1. + bool first_game_black_ GUARDED_BY(mutex_) = false; std::vector discard_pile_ GUARDED_BY(mutex_); // Number of games which already started. int games_count_ GUARDED_BY(mutex_) = 0; bool abort_ GUARDED_BY(mutex_) = false; + std::vector openings_ GUARDED_BY(mutex_); // Games in progress. Exposed here to be able to abort them in case if // Abort(). Stored as list and not vector so that threads can keep iterators // to them and not worry that it becomes invalid. diff --git a/src/utils/random.h b/src/utils/random.h index d5005cc3ed..4434f9f0e2 100644 --- a/src/utils/random.h +++ b/src/utils/random.h @@ -27,6 +27,7 @@ #pragma once +#include #include #include #include "utils/mutex.h" @@ -43,6 +44,8 @@ class Random { int GetInt(int min, int max); std::string GetString(int length); bool GetBool(); + template + void Shuffle(RandomAccessIterator s, RandomAccessIterator e); private: Random(); @@ -51,4 +54,10 @@ class Random { std::mt19937 gen_ GUARDED_BY(mutex_); }; +template +void Random::Shuffle(RandomAccessIterator s, RandomAccessIterator e) { + Mutex::Lock lock(mutex_); + std::shuffle(s, e, gen_); +} + } // namespace lczero From 6da1a471d86fcbe38f0100d99368733d252438ef Mon Sep 17 00:00:00 2001 From: Ankan Banerjee Date: Tue, 28 Jan 2020 20:32:58 +0530 Subject: [PATCH 012/151] DirectX12 Backend for Lc0 (#1045) * Uses DirectCompute shaders and Metacommand APIs (for matrix multiply and/or convolution). * Default is to try fp16 precision/datatype for running the network (can use backend-opts=fp16=false to run with fp32). * Default is to attempt using winograd algorithm for the convolutions using GEMM metacommand for the matrix multiply. If GEMM metacommand isn't supported, we try using Convolution Metacommand directly. If neither of the Metacommands is supported, we use winograd algorithm with a compute shader to do the matrix multiply (slow!). * Both FP16 and FP32 modes are supported. * Performance on AMD significantly faster than OpenCL on all tested GPUs. * Performance on Nvidia on par with cudnn/cudnn-fp16 backend (slower in some cases, faster in some). --- .gitignore | 3 +- appveyor.yml | 5 +- build-cuda-ninja.cmd | 14 + meson.build | 26 +- meson_options.txt | 5 + src/neural/cuda/network_cudnn.cc | 17 +- src/neural/dx/MetaCommand.h | 140 ++ src/neural/dx/dx_common.h | 73 + src/neural/dx/fp16_utils.cc | 105 ++ src/neural/dx/fp16_utils.h | 33 + src/neural/dx/layers_dx.cc | 764 ++++++++ src/neural/dx/layers_dx.h | 212 +++ src/neural/dx/network_dx.cc | 971 +++++++++++ src/neural/dx/network_dx.h | 238 +++ src/neural/dx/shader_wrapper.cc | 485 ++++++ src/neural/dx/shader_wrapper.h | 140 ++ src/neural/dx/shaders/AddVectors.hlsl | 87 + src/neural/dx/shaders/Conv1x1.hlsl | 93 + src/neural/dx/shaders/ExpandPlanes.hlsl | 130 ++ src/neural/dx/shaders/Gemm.hlsl | 206 +++ src/neural/dx/shaders/PolicyMap.hlsl | 63 + src/neural/dx/shaders/SE.hlsl | 193 +++ src/neural/dx/shaders/WinogradCommon.h | 290 ++++ src/neural/dx/shaders/WinogradTransform.hlsl | 451 +++++ .../dx/shaders/WinogradTransformSE.hlsl | 320 ++++ src/neural/dx/shaders/dxc_helper.py | 14 + src/neural/dx/shaders/meson.build | 183 ++ src/neural/dx/shaders/shader_shared.h | 52 + src/neural/dx/shaders/shaders.h | 57 + third_party/d3dx12.h | 1534 +++++++++++++++++ 30 files changed, 6894 insertions(+), 10 deletions(-) create mode 100644 build-cuda-ninja.cmd create mode 100644 src/neural/dx/MetaCommand.h create mode 100644 src/neural/dx/dx_common.h create mode 100644 src/neural/dx/fp16_utils.cc create mode 100644 src/neural/dx/fp16_utils.h create mode 100644 src/neural/dx/layers_dx.cc create mode 100644 src/neural/dx/layers_dx.h create mode 100644 src/neural/dx/network_dx.cc create mode 100644 src/neural/dx/network_dx.h create mode 100644 src/neural/dx/shader_wrapper.cc create mode 100644 src/neural/dx/shader_wrapper.h create mode 100644 src/neural/dx/shaders/AddVectors.hlsl create mode 100644 src/neural/dx/shaders/Conv1x1.hlsl create mode 100644 src/neural/dx/shaders/ExpandPlanes.hlsl create mode 100644 src/neural/dx/shaders/Gemm.hlsl create mode 100644 src/neural/dx/shaders/PolicyMap.hlsl create mode 100644 src/neural/dx/shaders/SE.hlsl create mode 100644 src/neural/dx/shaders/WinogradCommon.h create mode 100644 src/neural/dx/shaders/WinogradTransform.hlsl create mode 100644 src/neural/dx/shaders/WinogradTransformSE.hlsl create mode 100755 src/neural/dx/shaders/dxc_helper.py create mode 100644 src/neural/dx/shaders/meson.build create mode 100644 src/neural/dx/shaders/shader_shared.h create mode 100644 src/neural/dx/shaders/shaders.h create mode 100644 third_party/d3dx12.h diff --git a/.gitignore b/.gitignore index 68f058248c..7e1728d1f4 100644 --- a/.gitignore +++ b/.gitignore @@ -8,4 +8,5 @@ subprojects/* !subprojects/*.wrap lc0.xcodeproj/ *.swp -.clang_complete \ No newline at end of file +.clang_complete +src/.vs/ \ No newline at end of file diff --git a/appveyor.yml b/appveyor.yml index 5b19e808fb..9409083910 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -6,16 +6,19 @@ image: environment: matrix: - NAME: gpu-nvidia-cuda + - NAME: gpu-dx - NAME: gpu-opencl - NAME: cpu-dnnl - NAME: cpu-openblas clone_folder: c:\projects\lc0 install: - cmd: set CUDA=false +- cmd: set DX=false - cmd: set OPENCL=false - cmd: set BLAS=false - cmd: set GTEST=false - cmd: IF %NAME%==gpu-nvidia-cuda set CUDA=true +- cmd: IF %NAME%==gpu-dx set DX=true - cmd: IF %NAME%==gpu-opencl set OPENCL=true - cmd: IF %NAME%==cpu-dnnl set BLAS=true - cmd: IF %NAME%==cpu-openblas set BLAS=true @@ -60,7 +63,7 @@ before_build: - cmd: git submodule update --init --recursive - cmd: SET BUILD_BLAS=%BLAS% - cmd: IF %OPENCL%==true SET BUILD_BLAS=true -- cmd: meson build --backend vs2017 --buildtype release -Dgtest=%GTEST% -Dopencl=%OPENCL% -Dblas=%BUILD_BLAS% -Ddnnl=true -Deigen=true -Dcudnn=%CUDA% -Dispc_native_only=false -Dpopcnt=false -Dcudnn_include="%CUDA_PATH%\include","%PKG_FOLDER%\cuda\include" -Dcudnn_libdirs="%CUDA_PATH%\lib\x64","%PKG_FOLDER%\cuda\lib\x64" -Dprotobuf_include="%PKG_FOLDER%\protobuf\include" -Dprotobuf_libdir="%PKG_FOLDER%\protobuf\lib" -Dopenblas_include="%PKG_FOLDER%\OpenBLAS\dist64\include" -Dopenblas_libdirs="%PKG_FOLDER%\OpenBLAS\dist64\lib" -Ddnnl_dir="%PKG_FOLDER%\dnnl_win_1.1.1_cpu_vcomp" -Dopencl_include="%PKG_FOLDER%\opencl-nug.0.777.77\build\native\include" -Dopencl_libdirs="%PKG_FOLDER%\opencl-nug.0.777.77\build\native\lib\x64" -Ddefault_library=static +- cmd: meson build --backend vs2017 --buildtype release -Dgtest=%GTEST% -Dopencl=%OPENCL% -Dblas=%BUILD_BLAS% -Ddnnl=true -Deigen=true -Ddx=%DX% -Dcudnn=%CUDA% -Dispc_native_only=false -Dpopcnt=false -Dcudnn_include="%CUDA_PATH%\include","%PKG_FOLDER%\cuda\include" -Dcudnn_libdirs="%CUDA_PATH%\lib\x64","%PKG_FOLDER%\cuda\lib\x64" -Dprotobuf_include="%PKG_FOLDER%\protobuf\include" -Dprotobuf_libdir="%PKG_FOLDER%\protobuf\lib" -Dopenblas_include="%PKG_FOLDER%\OpenBLAS\dist64\include" -Dopenblas_libdirs="%PKG_FOLDER%\OpenBLAS\dist64\lib" -Ddnnl_dir="%PKG_FOLDER%\dnnl_win_1.1.1_cpu_vcomp" -Dopencl_include="%PKG_FOLDER%\opencl-nug.0.777.77\build\native\include" -Dopencl_libdirs="%PKG_FOLDER%\opencl-nug.0.777.77\build\native\lib\x64" -Ddefault_library=static build_script: - cmd: IF %APPVEYOR_REPO_TAG%==false msbuild "C:\projects\lc0\build\lc0.sln" /m /p:WholeProgramOptimization=true /p:DebugInformationFormat=ProgramDatabase /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" - cmd: IF %APPVEYOR_REPO_TAG%==true msbuild "C:\projects\lc0\build\lc0.sln" /m /p:WholeProgramOptimization=PGInstrument /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" diff --git a/build-cuda-ninja.cmd b/build-cuda-ninja.cmd new file mode 100644 index 0000000000..e326c9b3be --- /dev/null +++ b/build-cuda-ninja.cmd @@ -0,0 +1,14 @@ +rd /s build + +call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat" amd64 +meson.py build --buildtype release ^ +-Dcudnn_libdirs="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.0\lib\x64","C:\dev\cuDNN\cuda\lib\x64" ^ +-Dcudnn_include="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.0\include","C:\dev\cuDNN\cuda\include" ^ +-Ddefault_library=static + +pause + + +cd build + +ninja \ No newline at end of file diff --git a/meson.build b/meson.build index be75f12437..1f49503744 100644 --- a/meson.build +++ b/meson.build @@ -1,5 +1,5 @@ # This file is part of Leela Chess Zero. -# Copyright (C) 2018-2019 The LCZero Authors +# Copyright (C) 2018-2020 The LCZero Authors # # Leela Chess is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -441,6 +441,30 @@ if get_option('build_backends') files += cuda_gen.process(cuda_files_nvcc_fp16, extra_args: ['-arch=compute_70', '-code=sm_70']) has_backends = true endif + + ## ~~~~~~~~ + ## DirectX + ## ~~~~~~~~ + + # we should always be able to build DirectX12 backend on windows platform + if host_machine.system() == 'windows' and get_option('dx') + dx_d3d12 = cc.find_library('d3d12') + dx_dxgi = cc.find_library('dxgi') + + dx_files = [ + 'src/neural/dx/network_dx.cc', + 'src/neural/dx/shader_wrapper.cc', + 'src/neural/dx/layers_dx.cc', + 'src/neural/dx/fp16_utils.cc', + ] + files += dx_files + deps += [dx_d3d12, dx_dxgi] + + subdir('src/neural/dx/shaders') + + has_backends = true + endif + endif # if get_option('build_backends') diff --git a/meson_options.txt b/meson_options.txt index 2fd4a05b25..70991af912 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -93,6 +93,11 @@ option('opencl', value: true, description: 'Enable OpenCL backend') +option('dx', + type: 'boolean', + value: true, + description: 'Enable DirectX12 backend') + option('tensorflow', type: 'boolean', value: false, diff --git a/src/neural/cuda/network_cudnn.cc b/src/neural/cuda/network_cudnn.cc index 78af0551d7..94a6d46bd6 100644 --- a/src/neural/cuda/network_cudnn.cc +++ b/src/neural/cuda/network_cudnn.cc @@ -70,7 +70,8 @@ void dumpTensor(void *memory, int elements, char *message, bool fp16 = false) float *arr = (float *)temp; val = arr[i]; } - printf("%10.4f ", val); + printf("%8.4f ", val); + if ((i % 8) == 7) printf("\n"); } free(temp); printf("\n"); @@ -484,7 +485,7 @@ class CudnnNetwork : public Network { } // debug code example - // dumpTensor(tensor_mem_[0], 512, "After expand Planes", fp16); + // dumpTensor(tensor_mem_[0], 1024, "After expand Planes", fp16); float* opPol = io->op_policy_mem_gpu_; float* opVal = io->op_value_mem_gpu_; @@ -524,22 +525,22 @@ class CudnnNetwork : public Network { if (conv_policy_) { network_[l++]->Eval(batchSize, tensor_mem_[0], tensor_mem_[2], nullptr, scratch_mem_, scratch_size_, cudnn_, - cublas_); // conv1 + cublas_); // policy conv1 network_[l++]->Eval(batchSize, tensor_mem_[1], tensor_mem_[0], nullptr, scratch_mem_, scratch_size_, cudnn_, - cublas_); // conv1 + cublas_); // policy conv2 if (fp16) { network_[l++]->Eval(batchSize, tensor_mem_[0], tensor_mem_[1], nullptr, scratch_mem_, scratch_size_, cudnn_, - cublas_); // pol FC + cublas_); // policy map layer copyTypeConverted(opPol, (half*)(tensor_mem_[0]), - batchSize * kNumOutputPolicy); // POLICY + batchSize * kNumOutputPolicy); // POLICY output } else { network_[l++]->Eval(batchSize, (DataType*)opPol, tensor_mem_[1], nullptr, scratch_mem_, scratch_size_, cudnn_, - cublas_); // pol FC // POLICY + cublas_); //policy map layer // POLICY output } } else { network_[l++]->Eval(batchSize, tensor_mem_[0], tensor_mem_[2], nullptr, @@ -550,6 +551,7 @@ class CudnnNetwork : public Network { network_[l++]->Eval(batchSize, tensor_mem_[1], tensor_mem_[0], nullptr, scratch_mem_, scratch_size_, cudnn_, cublas_); // pol FC + copyTypeConverted(opPol, (half*)(tensor_mem_[1]), batchSize * kNumOutputPolicy); // POLICY } else { @@ -597,6 +599,7 @@ class CudnnNetwork : public Network { network_[l++]->Eval(batchSize, tensor_mem_[2], tensor_mem_[1], nullptr, scratch_mem_, scratch_size_, cudnn_, cublas_); // value FC2 + copyTypeConverted(opVal, (half*)(tensor_mem_[2]), batchSize); // VALUE } else { network_[l++]->Eval(batchSize, (DataType*)opVal, tensor_mem_[1], diff --git a/src/neural/dx/MetaCommand.h b/src/neural/dx/MetaCommand.h new file mode 100644 index 0000000000..c7bf003e66 --- /dev/null +++ b/src/neural/dx/MetaCommand.h @@ -0,0 +1,140 @@ +/* + This file is part of Leela Chess Zero. + Copyright (C) 2020 The LCZero Authors + + Leela Chess is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Leela Chess is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Leela Chess. If not, see . + + Additional permission under GNU GPL version 3 section 7 + + If you modify this Program, or any covered work, by linking or + combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA + Toolkit and the NVIDIA CUDA Deep Neural Network library (or a + modified version of those libraries), containing parts covered by the + terms of the respective license agreement, the licensors of this + Program grant you additional permission to convey the resulting work. +*/ + +#pragma once + +namespace lczero { +// Obtained by using EnumerateMetaCommands and EnumerateMetaCommandParameters +// calls. Simplified a bit. + +struct TensorDesc { + uint64_t DataType; + uint64_t Flags; + uint64_t DimensionCount; + uint64_t Size[5]; + uint64_t Stride[5]; + uint64_t StrideAlignment[5]; + uint64_t BaseAlignmentInBytes; + uint64_t PhysicalSizeInElements; +}; + +//----------------------------------------------------------------------------------// +// GEMM (Matrix multiply) +//----------------------------------------------------------------------------------// + +constexpr GUID GemmGuid = {0x1e52ebab, + 0x25ba, + 0x463a, + {0xa2, 0x85, 0x0a, 0x78, 0x8e, 0xef, 0x5d, 0x01}}; + +struct GemmCreateDesc { + TensorDesc DescA; + TensorDesc DescB; + TensorDesc DescC; + uint64_t cMatrixNull; + TensorDesc DescOut; + + uint64_t Precision; + uint64_t TransA; + uint64_t TransB; + float Alpha; + float Beta; + + uint64_t ActivationFunction; + float ActivationParam1, ActivationParam2; + uint64_t ActivationIsNull; + uint64_t BindFlags; +}; + +struct GemmInitDesc { + D3D12_GPU_DESCRIPTOR_HANDLE AResource; + D3D12_GPU_DESCRIPTOR_HANDLE BResource; + D3D12_GPU_DESCRIPTOR_HANDLE CResource; + D3D12_GPU_DESCRIPTOR_HANDLE PersistentResource; + D3D12_GPU_DESCRIPTOR_HANDLE TemporaryResource; +}; + +struct GemmExecuteDesc { + D3D12_GPU_DESCRIPTOR_HANDLE AResource; + D3D12_GPU_DESCRIPTOR_HANDLE BResource; + D3D12_GPU_DESCRIPTOR_HANDLE CResource; + D3D12_GPU_DESCRIPTOR_HANDLE OutputResource; + + D3D12_GPU_DESCRIPTOR_HANDLE PersistentResource; + D3D12_GPU_DESCRIPTOR_HANDLE TemporaryResource; +}; + +//----------------------------------------------------------------------------------// +// Convolution +//----------------------------------------------------------------------------------// + +constexpr GUID ConvGuid = {0x17804d6b, + 0xebfe, + 0x426f, + {0x88, 0xfc, 0xfe, 0xa7, 0x2e, 0x3f, 0x33, 0x56}}; + +struct ConvCreateDesc { + TensorDesc InputDesc; + TensorDesc FilterDesc; + TensorDesc BiasDesc; + uint64_t BiasNull; + TensorDesc OutputDesc; + + uint64_t Mode; + uint64_t Direction; + uint64_t Precision; + uint64_t Stride[3]; + uint64_t Dilation[3]; + uint64_t StartPadding[3]; + uint64_t EndPadding[3]; + uint64_t DimensionCount; + uint64_t OutputPadding[5]; + uint64_t GroupCount; + uint64_t ActivationFunction; + float ActivationParam1, ActivationParam2; + uint64_t ActivationIsNull; + uint64_t BindFlags; +}; + +struct InitConvDesc { + D3D12_GPU_DESCRIPTOR_HANDLE InputResource; + D3D12_GPU_DESCRIPTOR_HANDLE FilterResource; + D3D12_GPU_DESCRIPTOR_HANDLE BiasResource; + D3D12_GPU_DESCRIPTOR_HANDLE PersistentResource; + D3D12_GPU_DESCRIPTOR_HANDLE TemporaryResource; +}; + +struct ExecuteConvDesc { + D3D12_GPU_DESCRIPTOR_HANDLE InputResource; + D3D12_GPU_DESCRIPTOR_HANDLE FilterResource; + D3D12_GPU_DESCRIPTOR_HANDLE BiasResource; + D3D12_GPU_DESCRIPTOR_HANDLE OutputResource; + D3D12_GPU_DESCRIPTOR_HANDLE PersistentResource; + D3D12_GPU_DESCRIPTOR_HANDLE TemporaryResource; +}; + +}; // namespace lczero \ No newline at end of file diff --git a/src/neural/dx/dx_common.h b/src/neural/dx/dx_common.h new file mode 100644 index 0000000000..5365f08eed --- /dev/null +++ b/src/neural/dx/dx_common.h @@ -0,0 +1,73 @@ +/* + This file is part of Leela Chess Zero. + Copyright (C) 2020 The LCZero Authors + + Leela Chess is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Leela Chess is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Leela Chess. If not, see . + + Additional permission under GNU GPL version 3 section 7 + + If you modify this Program, or any covered work, by linking or + combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA + Toolkit and the NVIDIA CUDA Deep Neural Network library (or a + modified version of those libraries), containing parts covered by the + terms of the respective license agreement, the licensors of this + Program grant you additional permission to convey the resulting work. +*/ +#pragma once +#include +#include +#include +#include +#include "d3dx12.h" +#include "fp16_utils.h" + +#define DEFAULT_FP16 true + +// To debug in case some GPUs can't read from sysmem directly in shader. +//#define COPY_BEFORE_SHADER_READ +// Dump per-layer debug data to find where things go wrong. +//#define DEBUG_DUMP_PER_LAYER_DATA + +namespace lczero { + +namespace dx_backend { + +void DxError(HRESULT status, const char* file, const int& line); +#define ReportDxErrors(status) DxError(status, __FILE__, __LINE__) + +struct DXAlloc { + ID3D12Resource* resource; + uint32_t offset; + // Various ways of binding an allocation to shader: + // 1. RAW/Structured buffer bound as root UAV, use gpu_va directly. + // 2. Typed buffer UAV bound as 4-component typed format (e.g: + // R16G16B16A16_FLOAT) + // 3. Typed buffer UAV bound as single component scalar typed format (e.g: + // R16_FLOAT) + + uint64_t gpu_va; + + // Handle of UAV created as 4-component vector type. + D3D12_GPU_DESCRIPTOR_HANDLE desc_handle_vector; + + // Handle of UAV created as scalar type. + D3D12_GPU_DESCRIPTOR_HANDLE desc_handle_scalar; +}; + +typedef uint16_t dx_half; + +inline int DivUp(int a, int b) { return (a + b - 1) / b; } + +} // namespace dx_backend +} // namespace lczero diff --git a/src/neural/dx/fp16_utils.cc b/src/neural/dx/fp16_utils.cc new file mode 100644 index 0000000000..c60b95375c --- /dev/null +++ b/src/neural/dx/fp16_utils.cc @@ -0,0 +1,105 @@ +/* + This file is part of Leela Chess Zero. + Copyright (C) 2020 The LCZero Authors + + Leela Chess is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Leela Chess is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Leela Chess. If not, see . + + Additional permission under GNU GPL version 3 section 7 + + If you modify this Program, or any covered work, by linking or + combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA + Toolkit and the NVIDIA CUDA Deep Neural Network library (or a + modified version of those libraries), containing parts covered by the + terms of the respective license agreement, the licensors of this + Program grant you additional permission to convey the resulting work. +*/ + +#include +#include + +// Define NO_F16C to avoid the F16C intrinsics. Also disabled with NO_POPCNT +// since it catches most processors without F16C instructions. + +#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || \ + defined(__x86_64__) +#include +#else +#define NO_F16C +#endif + +namespace lczero { + +uint16_t FP32toFP16(float f32) { +#if defined(NO_POPCNT) || defined(NO_F16C) + unsigned int x; + unsigned int sign = 0; + memcpy(&x, &f32, sizeof(float)); + if (x & 0x80000000) sign = 0x8000; + x &= 0x7fffffff; + if (x >= 0x477ff000) { + if ((x & 0x7f800000) == 0x7f800000 && (x & 0x7fffff)) { + x = ((x >> 13) - 0x38000) | 0x200; + } else { + x = 0x7c00; + } + } else if (x <= 0x33000000) + x = 0; + else if (x <= 0x387fefff) { + int shift = 126 - ((x >> 23) & 0xff); + x = (x & 0x7fffff) | 0x800000; + if (x & (0x17fffff >> (24 - shift))) x += 0x800000 >> (24 - shift); + x >>= shift; + } else { + // Adjust exponent and round to nearest even. + if (x & 0x2fff) { + x -= 0x37fff000; + } else { + x -= 0x38000000; + } + x >>= 13; + } + return x | sign; +#else + __m128 A = _mm_set_ss(f32); + __m128i H = _mm_cvtps_ph(A, 0); + return _mm_extract_epi16(H, 0); +#endif +} + +float FP16toFP32(uint16_t f16) { +#if defined(NO_POPCNT) || defined(NO_F16C) + unsigned int x; + float f; + x = f16 & 0x7fff; + if ((x & 0x7c00) == 0) { + f = 5.9604645e-8f * x; + memcpy(&x, &f, sizeof(float)); + } else if (x >= 0x7c00) { + if (x & 0x1ff) x |= 0x200; + x = (x + 0x38000) << 13; + } else { + x = (x + 0x1c000) << 13; + } + if (f16 & 0x8000) x |= 0x80000000; + memcpy(&f, &x, sizeof(float)); + return f; +#else + __m128i H = _mm_setzero_si128(); + H = _mm_insert_epi16(H, f16, 0); + __m128 A = _mm_cvtph_ps(H); + return _mm_cvtss_f32(A); +#endif +} + +}; // namespace lczero diff --git a/src/neural/dx/fp16_utils.h b/src/neural/dx/fp16_utils.h new file mode 100644 index 0000000000..6c03c84408 --- /dev/null +++ b/src/neural/dx/fp16_utils.h @@ -0,0 +1,33 @@ +/* + This file is part of Leela Chess Zero. + Copyright (C) 2020 The LCZero Authors + + Leela Chess is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Leela Chess is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Leela Chess. If not, see . + + Additional permission under GNU GPL version 3 section 7 + + If you modify this Program, or any covered work, by linking or + combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA + Toolkit and the NVIDIA CUDA Deep Neural Network library (or a + modified version of those libraries), containing parts covered by the + terms of the respective license agreement, the licensors of this + Program grant you additional permission to convey the resulting work. +*/ +#pragma once +namespace lczero { + +uint16_t FP32toFP16(float f32); +float FP16toFP32(uint16_t f16); + +}; // namespace lczero diff --git a/src/neural/dx/layers_dx.cc b/src/neural/dx/layers_dx.cc new file mode 100644 index 0000000000..c9d58a753f --- /dev/null +++ b/src/neural/dx/layers_dx.cc @@ -0,0 +1,764 @@ +/* + This file is part of Leela Chess Zero. + Copyright (C) 2020 The LCZero Authors + + Leela Chess is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Leela Chess is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Leela Chess. If not, see . + + Additional permission under GNU GPL version 3 section 7 + + If you modify this Program, or any covered work, by linking or + combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA + Toolkit and the NVIDIA CUDA Deep Neural Network library (or a + modified version of those libraries), containing parts covered by the + terms of the respective license agreement, the licensors of this + Program grant you additional permission to convey the resulting work. +*/ + +#include "layers_dx.h" +#include +#include +#include +#include "comdef.h" +#include "utils/exception.h" + +#include "MetaCommand.h" +#include "network_dx.h" + +#include + +namespace lczero { +namespace dx_backend { + +// Utility functions used only in this file. +namespace { + +static void CopyFloatToHalf(dx_half* out, const float* in, size_t elements) { + for (int i = 0; i < elements; i++) { + out[i] = FP32toFP16(in[i]); + } +} + +static void CpuTranspose(float* op, float* ip, size_t rows, size_t cols) { + for (size_t i = 0; i < rows; i++) + for (size_t j = 0; j < cols; j++) op[j * rows + i] = ip[i * cols + j]; +} + + +template +static void MatrixMulCPU(float* c, const float* a, const float* b) { + for (int i = 0; i < M; ++i) + for (int j = 0; j < N; ++j) { + float S = 0; + for (int k = 0; k < K; ++k) + S += a[i * K + k] * b[k * N + j]; + c[i * N + j] = S; + } +} + +static void FilterTransform4x4(float* transformed_filter, const float* filter) { + // transform applied to filter (of size 3x3) + float G[6 * 3] = { 1.0f / 4, 0, 0, -1.0f / 6, -1.0f / 6, -1.0f / 6, + -1.0f / 6, 1.0f / 6, -1.0f / 6, 1.0f / 24, 1.0f / 12, 1.0f / 6, + 1.0f / 24, -1.0f / 12, 1.0f / 6, 0, 0, 1}; + + float Gt[3 * 6] = {1.0f / 4, -1.0f / 6, -1.0f / 6, 1.0f / 24, 1.0f / 24, 0, + 0, -1.0f / 6, 1.0f / 6, 1.0f / 12, -1.0f / 12, 0, + 0, -1.0f / 6, -1.0f / 6, 1.0f / 6, 1.0f / 6, 1}; + + float temp_filter[6 * 3]; + MatrixMulCPU<6, 3, 3>(temp_filter, G, filter); + MatrixMulCPU<6, 6, 3>(transformed_filter, temp_filter, Gt); +} + +#define FILTER_IDX_NCHW(k, c, h, w) ((k)*C * S * R + (c)*S * R + (h)*R + w) + +// Transform filter for winograd. +// (e.g: for K C H W - 256x256x3x3, filter output is 6x6x256x256 - H W K C) +static void TransformFilterTensor_Winograd4x4(int K, int C, float* transformed_filter, + const float* weight) { + constexpr int S = 3; + constexpr int R = 3; + + for (int k = 0; k < K; k++) { + for (int c = 0; c < C; c++) { + // 1. read single filter from memory + float filter_tile[3][3]; + for (int s = 0; s < S; s++) + for (int r = 0; r < R; r++) { + filter_tile[s][r] = weight[FILTER_IDX_NCHW(k, c, s, r)]; + } + + // 2. transform it + float transformed_tile[6][6]; + FilterTransform4x4(&(transformed_tile[0][0]), &(filter_tile[0][0])); + + // 3. write it back to memory (in HWCK layout) + for (int i = 0; i < 6; i++) + for (int j = 0; j < 6; j++) { + transformed_filter[i * 6 * C * K + j * C * K + c * K + k] = + transformed_tile[i][j]; + } + } + } +} + + +static void GetGemmTensorDesc(TensorDesc* out_desc, int batch_size, int rows, + int cols, bool fp16) { + memset(out_desc, 0, sizeof(TensorDesc)); + out_desc->DimensionCount = 4; + out_desc->DataType = fp16 ? 1 : 0; + + out_desc->Size[0] = batch_size; + out_desc->Size[1] = 1; + out_desc->Size[2] = rows; + out_desc->Size[3] = cols; + + // row-major by default + out_desc->Stride[3] = 1; + out_desc->Stride[2] = cols; + out_desc->Stride[1] = rows * cols; + out_desc->Stride[0] = rows * cols; + + for (int i = 0; i < 4; i++) out_desc->StrideAlignment[i] = 1; + + out_desc->BaseAlignmentInBytes = 4096; // arbitary + out_desc->PhysicalSizeInElements = batch_size * rows * cols; +} + +static void GetConvTensorDesc(TensorDesc* out_desc, int N, int C, + int H, int W, bool fp16) { + memset(out_desc, 0, sizeof(TensorDesc)); + out_desc->DimensionCount = 4; + out_desc->DataType = fp16 ? 1 : 0; + + out_desc->Size[0] = N; + out_desc->Size[1] = C; + out_desc->Size[2] = H; + out_desc->Size[3] = W; + + // NCHW layout + out_desc->Stride[3] = 1; + out_desc->Stride[2] = W; + out_desc->Stride[1] = H * W; + out_desc->Stride[0] = C * H * W; + + for (int i = 0; i < 4; i++) out_desc->StrideAlignment[i] = 1; + + out_desc->BaseAlignmentInBytes = 4096; // arbitary + out_desc->PhysicalSizeInElements = N * C * H * W; +} + +}; // namespace + +GemmMetaCommand::GemmMetaCommand(DxContext* dx_context, int rows, int cols, int K, + int gemm_batch, bool fp16, bool a_transpose, + bool b_transpose) { + memset(scratch_data_persistent_, 0, sizeof(scratch_data_persistent_)); + memset(scratch_data_temporary_, 0, sizeof(scratch_data_temporary_)); + memset(meta_commands_, 0, sizeof(meta_commands_)); + + // Note: the way GEMM is used, the 'rows'/M - dimension is a function of + // batch size. gemm_batch is different and unrelated (either 36 for Winograd, + // or 1 for other FC layers) + int num_meta_commands = 1; + if (rows == 0) { + // Create metacommands for each 'rows' that is multiple of 8. + num_meta_commands = kMaxMetacommands; + rows_known_ = false; + } else { + rows_known_ = true; + } + + for (int i = 0; i < num_meta_commands; i++) { + int num_rows = rows ? rows : (i + 1) * kMetacommandGranulity; + + GemmCreateDesc createDesc = {}; + GetGemmTensorDesc(&createDesc.DescOut, gemm_batch, num_rows, cols, fp16); + GetGemmTensorDesc(&createDesc.DescA, gemm_batch, a_transpose ? K : num_rows, + a_transpose ? num_rows : K, fp16); + GetGemmTensorDesc(&createDesc.DescB, gemm_batch, b_transpose ? cols : K, + b_transpose ? K : cols, fp16); + createDesc.cMatrixNull = 1; + createDesc.ActivationIsNull = 1; + createDesc.Alpha = 1.0; + createDesc.Beta = 0.0; + createDesc.Precision = fp16 ? 1 : 0; // 0 - fp32, 1 - fp16 + createDesc.TransA = a_transpose; + createDesc.TransB = b_transpose; + + ID3D12MetaCommand* pMetacommand = nullptr; + HRESULT hr = dx_context->getDevice()->CreateMetaCommand( + GemmGuid, 1, &createDesc, sizeof(createDesc), + IID_PPV_ARGS(&pMetacommand)); + + if (hr != S_OK) { +#ifdef DEBUG_DUMP_PER_LAYER_DATA + printf( + "\nCan't create GEMM Metacommand for " + "rows: %d, cols: %d, K: %d, batch: " + "%d\n", + num_rows, cols, K, gemm_batch); +#endif + create_succeeded_ = false; + return; + } + + meta_commands_[i] = pMetacommand; + + size_t persistent_size = pMetacommand->GetRequiredParameterResourceSize( + D3D12_META_COMMAND_PARAMETER_STAGE_EXECUTION, 4); + size_t temp_size = pMetacommand->GetRequiredParameterResourceSize( + D3D12_META_COMMAND_PARAMETER_STAGE_EXECUTION, 5); + + if (persistent_size) { + dx_context->CreateAlloc(persistent_size, D3D12_HEAP_TYPE_DEFAULT, + scratch_data_persistent_[i], fp16); + } + + if (temp_size) { + dx_context->CreateAlloc(temp_size, D3D12_HEAP_TYPE_DEFAULT, + scratch_data_temporary_[i], fp16); + } + + GemmInitDesc initDesc = {}; + initDesc.PersistentResource = scratch_data_persistent_[i].desc_handle_scalar; + initDesc.TemporaryResource = scratch_data_temporary_[i].desc_handle_scalar; + + dx_context->getCommandList()->InitializeMetaCommand( + meta_commands_[i], &initDesc, sizeof(initDesc)); + } + + create_succeeded_ = true; +} + +void GemmMetaCommand::PerformGemm(int rows, DXAlloc A, DXAlloc B, + DXAlloc output, + ID3D12GraphicsCommandList4* command_list) { + if (!create_succeeded_) + throw Exception("Metacommand not created"); + + int index = 0; + if (!rows_known_) { + index = DivUp(rows, 8) - 1; + } + + ID3D12MetaCommand* meta_command = meta_commands_[index]; + DXAlloc& scratch_persistent = scratch_data_persistent_[index]; + DXAlloc& scratch_temporary = scratch_data_temporary_[index]; + + GemmExecuteDesc exec_desc = {}; + exec_desc.AResource = A.desc_handle_scalar; + exec_desc.BResource = B.desc_handle_scalar; + exec_desc.OutputResource = output.desc_handle_scalar; + exec_desc.PersistentResource = scratch_persistent.desc_handle_scalar; + exec_desc.TemporaryResource = scratch_temporary.desc_handle_scalar; + + command_list->ExecuteMetaCommand(meta_command, &exec_desc, sizeof(exec_desc)); +} + +GemmMetaCommand::~GemmMetaCommand() { + for (int i = 0; i < kMaxMetacommands; i++) { + if (scratch_data_temporary_[i].resource) + scratch_data_temporary_[i].resource->Release(); + if (scratch_data_persistent_[i].resource) + scratch_data_persistent_[i].resource->Release(); + if (meta_commands_[i]) meta_commands_[i]->Release(); + } +} + +ConvMetaCommand::ConvMetaCommand(DxContext* dx_context, int C, int K, int H, + int W, int F, bool relu, bool bias, + bool fp16) { + memset(scratch_data_persistent_, 0, sizeof(scratch_data_persistent_)); + memset(scratch_data_temporary_, 0, sizeof(scratch_data_temporary_)); + memset(meta_commands_, 0, sizeof(meta_commands_)); + + for (int i = 0; i < kMaxMetacommands; i++) { + int n = (i + 1) * kMetacommandGranulity; + + ConvCreateDesc createDesc = {}; + GetConvTensorDesc(&createDesc.InputDesc, n, C, H, W, fp16); + GetConvTensorDesc(&createDesc.OutputDesc, n, K, H, W, fp16); + GetConvTensorDesc(&createDesc.FilterDesc, K, C, F, F, fp16); + GetConvTensorDesc(&createDesc.BiasDesc, K, 1, 1, 1, fp16); + createDesc.BiasNull = bias ? 0 : 1; + createDesc.Mode = 1; // 1 is for cross-correlation (0 - conv) + + createDesc.Direction = 0; // forward + createDesc.DimensionCount = 2; // 2D conv + createDesc.Stride[0] = 1; + createDesc.Stride[1] = 1; + createDesc.Dilation[0] = 1; + createDesc.Dilation[1] = 1; + + int pad = (F - 1) / 2; + createDesc.StartPadding[0] = pad; + createDesc.StartPadding[1] = pad; + createDesc.EndPadding[0] = pad; + createDesc.EndPadding[1] = pad; + createDesc.GroupCount = 1; + if (relu) { + createDesc.ActivationFunction = 9; // relu (guess?) + createDesc.ActivationIsNull = 0; + } else { + createDesc.ActivationIsNull = 1; + } + createDesc.Precision = fp16 ? 1 : 0; // 0 - fp32, 1 - fp16 + + ID3D12MetaCommand* pMetacommand = nullptr; + HRESULT hr = dx_context->getDevice()->CreateMetaCommand( + ConvGuid, 1, &createDesc, sizeof(createDesc), + IID_PPV_ARGS(&pMetacommand)); + + if (hr != S_OK) { +#ifdef DEBUG_DUMP_PER_LAYER_DATA + printf( + "\nCan't create Conv Metacommand for " + "N, C, K, H, W, f: %d %d %d %d %d %d\n", + n, C, K, H, W, F); +#endif + create_succeeded_ = false; + return; + } + + meta_commands_[i] = pMetacommand; + + size_t persistent_size = pMetacommand->GetRequiredParameterResourceSize( + D3D12_META_COMMAND_PARAMETER_STAGE_EXECUTION, 4); + size_t temp_size = pMetacommand->GetRequiredParameterResourceSize( + D3D12_META_COMMAND_PARAMETER_STAGE_EXECUTION, 5); + + if (persistent_size) { + dx_context->CreateAlloc(persistent_size, D3D12_HEAP_TYPE_DEFAULT, + scratch_data_persistent_[i], fp16); + } + + if (temp_size) { + dx_context->CreateAlloc(temp_size, D3D12_HEAP_TYPE_DEFAULT, + scratch_data_temporary_[i], fp16); + } + + InitConvDesc initDesc = {}; + initDesc.PersistentResource = scratch_data_persistent_[i].desc_handle_scalar; + initDesc.TemporaryResource = scratch_data_temporary_[i].desc_handle_scalar; + + dx_context->getCommandList()->InitializeMetaCommand( + meta_commands_[i], &initDesc, sizeof(initDesc)); + } + use_bias_ = bias; + create_succeeded_ = true; +} + +void ConvMetaCommand::PerformConv(int batch, DXAlloc input, DXAlloc filter, + DXAlloc bias, DXAlloc output, + ID3D12GraphicsCommandList4* command_list) { + if (!create_succeeded_) throw Exception("Metacommand not created"); + + int index = DivUp(batch, 8) - 1; + + ID3D12MetaCommand* meta_command = meta_commands_[index]; + DXAlloc& scratch_persistent = scratch_data_persistent_[index]; + DXAlloc& scratch_temporary = scratch_data_temporary_[index]; + + ExecuteConvDesc exec_desc = {}; + exec_desc.InputResource = input.desc_handle_scalar; + exec_desc.FilterResource = filter.desc_handle_scalar; + if (use_bias_) + exec_desc.BiasResource = bias.desc_handle_scalar; + exec_desc.OutputResource = output.desc_handle_scalar; + exec_desc.PersistentResource = scratch_persistent.desc_handle_scalar; + exec_desc.TemporaryResource = scratch_temporary.desc_handle_scalar; + + command_list->ExecuteMetaCommand(meta_command, &exec_desc, sizeof(exec_desc)); +} + +ConvMetaCommand::~ConvMetaCommand() { + for (int i = 0; i < kMaxMetacommands; i++) { + if (scratch_data_temporary_[i].resource) + scratch_data_temporary_[i].resource->Release(); + if (scratch_data_persistent_[i].resource) + scratch_data_persistent_[i].resource->Release(); + if (meta_commands_[i]) meta_commands_[i]->Release(); + } +} + + +BaseLayer::BaseLayer(int c, int h, int w, BaseLayer* ip, DxContext* dx_context, + bool fp16) + : input_(ip), C(c), H(h), W(w), dx_context_(dx_context), fp16_(fp16) {} + +ConvLayer::ConvLayer(bool fp16, GemmMetaCommand* pMetaCommandGemm, + ConvMetaCommand* pMetaCommandConv, + DxContext* dx_context, BaseLayer* ip, int C, int H, int W, + int filter, int Cin, bool bias, bool relu, bool skipAdd, + bool se, int se_k) + : BaseLayer(C, H, W, ip, dx_context, fp16), + meta_command_gemm_(pMetaCommandGemm), + meta_command_conv_(pMetaCommandConv), + c_input_(Cin), + filter_size_(filter), + use_relu_(relu), + use_bias_(bias), + skip_add_(skipAdd), + has_se_(se), + se_k_(se_k), + weights_(), + transformed_weights_(), + biases_(), + w1_(), + w2_(), + b1_(), + b2_() { + size_t element_size = fp16 ? sizeof(dx_half) : sizeof(float); + size_t weight_size = element_size * C * Cin * filter * filter; + size_t blas_size = element_size * C; + + dx_context->CreateAlloc(weight_size, D3D12_HEAP_TYPE_DEFAULT, weights_, fp16); + + if (filter == 3) { + // 6x6 transformed filter size, for 3x3 convolution + dx_context->CreateAlloc(weight_size * 4, D3D12_HEAP_TYPE_DEFAULT, + transformed_weights_, fp16); + } + + if (use_bias_) { + dx_context->CreateAlloc(blas_size, D3D12_HEAP_TYPE_DEFAULT, biases_, fp16); + } + + if (has_se_) + { + const size_t num_weights1 = C * se_k_; + const size_t num_weights2 = num_weights1 * 2; + const size_t num_biases1 = se_k_; + const size_t num_biases2 = 2 * C; + + const size_t weight_size1 = element_size * num_weights1; + const size_t weight_size2 = element_size * num_weights2; + const size_t biases_size1 = element_size * num_biases1; + const size_t biases_size2 = element_size * num_biases2; + + dx_context->CreateAlloc(weight_size1, D3D12_HEAP_TYPE_DEFAULT, w1_, fp16); + dx_context->CreateAlloc(weight_size2, D3D12_HEAP_TYPE_DEFAULT, w2_, fp16); + dx_context->CreateAlloc(biases_size1, D3D12_HEAP_TYPE_DEFAULT, b1_, fp16); + dx_context->CreateAlloc(biases_size2, D3D12_HEAP_TYPE_DEFAULT, b2_, fp16); + } + + shader_wrapper_ = dx_context->getShaderWrapper(); +} + +void ConvLayer::LoadWeights(float* cpu_filter, float* cpu_bias, DxContext* dx_context) { + int num_weights = c_input_ * C * filter_size_ * filter_size_; + size_t element_size = fp16_ ? sizeof(dx_half) : sizeof(float); + size_t weight_size = element_size * num_weights; + size_t bias_size = element_size * C; + + std::vector temp(num_weights); + if (fp16_) { + CopyFloatToHalf(temp.data(), cpu_filter, num_weights); + dx_context->ScheduleUpload(weights_, temp.data(), weight_size); + } else { + dx_context->ScheduleUpload(weights_, cpu_filter, weight_size); + } + + if (cpu_bias) { + if (fp16_) { + CopyFloatToHalf(temp.data(), cpu_bias, C); + dx_context->ScheduleUpload(biases_, temp.data(), bias_size); + } else { + dx_context->ScheduleUpload(biases_, cpu_bias, bias_size); + } + } + + if (filter_size_ == 3) { + std::vector temp_transformed(num_weights * 4); + TransformFilterTensor_Winograd4x4(C, c_input_, temp_transformed.data(), + cpu_filter); + if (fp16_) { + std::vector temp_transformed_half(num_weights * 4); + CopyFloatToHalf(temp_transformed_half.data(), temp_transformed.data(), + num_weights * 4); + dx_context->ScheduleUpload(transformed_weights_, + temp_transformed_half.data(), weight_size * 4); + } else { + dx_context->ScheduleUpload(transformed_weights_, temp_transformed.data(), + weight_size * 4); + } + } +} + +void ConvLayer::LoadSEWeights(float* w1, float* b1, float* w2, float* b2) { + size_t element_size = fp16_ ? sizeof(dx_half) : sizeof(float); + const size_t num_weights1 = C * se_k_; + const size_t num_weights2 = num_weights1 * 2; + const size_t weight_size1 = element_size * num_weights1; + const size_t weight_size2 = element_size * num_weights2; + + const size_t num_biases1 = se_k_; + const size_t biases_size1 = element_size * num_biases1; + const size_t num_biases2 = 2*C; + const size_t biases_size2 = element_size * num_biases2; + + // The shader uses transposed weight matrices. + + std::vector temp_transposed(num_weights2); + std::vector temp_half(num_weights2); + + CpuTranspose(temp_transposed.data(), w1, se_k_, C); + if (fp16_) { + CopyFloatToHalf(temp_half.data(), temp_transposed.data(), num_weights1); + dx_context_->ScheduleUpload(w1_, temp_half.data(), weight_size1); + } else { + dx_context_->ScheduleUpload(w1_, temp_transposed.data(), weight_size1); + } + + CpuTranspose(temp_transposed.data(), w2, 2*C, se_k_); + if (fp16_) { + CopyFloatToHalf(temp_half.data(), temp_transposed.data(), num_weights2); + dx_context_->ScheduleUpload(w2_, temp_half.data(), weight_size2); + } else { + dx_context_->ScheduleUpload(w2_, temp_transposed.data(), weight_size2); + } + + if (fp16_) { + CopyFloatToHalf(temp_half.data(), b1, num_biases1); + dx_context_->ScheduleUpload(b1_, temp_half.data(), biases_size1); + } else { + dx_context_->ScheduleUpload(b1_, b1, biases_size1); + } + + if (fp16_) { + CopyFloatToHalf(temp_half.data(), b2, num_biases2); + dx_context_->ScheduleUpload(b2_, temp_half.data(), biases_size2); + } else { + dx_context_->ScheduleUpload(b2_, b2, biases_size2); + } +} + +void ConvLayer::Eval(int N, DXAlloc output, DXAlloc input, DXAlloc input2, + DXAlloc scratch, DXAlloc scratch2, + ID3D12GraphicsCommandList4* command_list) { + + // Use winograd for filter size of 3, when GEMM metacommand is available, + // Or when GEMM metacommand isn't available but Convolution metacommand is + // also not available (compute shader matrix multiply path). + bool useWinograd = + (filter_size_ == 3) && + ((meta_command_gemm_ && meta_command_gemm_->IsAvailable()) || + !meta_command_conv_ || !meta_command_conv_->IsAvailable()); + + if (useWinograd) { + // Need to pad up the input to gemm too (i.e, the transformed Input tensor)! + // It's in HWNC layout, and 'N'/GemmN needs to be padded up (HW = 6x6) + // to make it simple, just pad up N to multiple of 2 here (so that gemmN is + // multiple of 8). + // TODO: figure out why padding up by 4 is needed (instead of 2!) + //N = ((N + 1) / 2) * 2; + N = ((N + 3) / 4) * 4; + + // 1. Input transform (input->scratch) + shader_wrapper_->InputTransform(command_list, scratch, input, N, c_input_, + fp16_); + + dx_context_->UavBarrier(command_list); + + // 2. Gemm (scratch -> scratch2) + if (meta_command_gemm_ && meta_command_gemm_->IsAvailable()) + meta_command_gemm_->PerformGemm(N * 4, scratch, transformed_weights_, scratch2, + command_list); + else + shader_wrapper_->MatrixMultiply(command_list, scratch2, scratch, + transformed_weights_, N * 4, C, c_input_, + 36, fp16_); + + dx_context_->UavBarrier(command_list); + + // 3. Output transform (scratch2 -> output) + shader_wrapper_->OutputTransform( + command_list, output, scratch2, input2, biases_, w1_, b1_, w2_, b2_, N, + C, use_relu_, use_bias_, skip_add_, has_se_, se_k_, fp16_); + + } + else if (meta_command_conv_ && meta_command_conv_->IsAvailable()) { + if (skip_add_ || has_se_) + meta_command_conv_->PerformConv(N, input, weights_, biases_, scratch, + command_list); + else + meta_command_conv_->PerformConv(N, input, weights_, biases_, output, + command_list); + if (has_se_) { + dx_context_->UavBarrier(command_list); + shader_wrapper_->Se(command_list, output, scratch, input2, biases_, w1_, + b1_, w2_, b2_, N, C, use_relu_, false, skip_add_, + se_k_, fp16_); + } else if (skip_add_) { + // Need seperate pass for skip connection addition as Metacommand API + // doesn't allow it to be fused with convolution. + dx_context_->UavBarrier(command_list); + shader_wrapper_->AddVectors(command_list, output, scratch, input2, + N * C * H * W, N * C * H * W, N * C * H * W, + use_relu_, false, fp16_); + } + } + else if (filter_size_ == 1) { + shader_wrapper_->Conv1x1(command_list, output, input, weights_, biases_, N, + c_input_, C, use_relu_, use_bias_, fp16_); + } else { + throw Exception("Unsupported filter shape for convolution! "); + } +} + +ConvLayer::~ConvLayer() { + if (weights_.resource) weights_.resource->Release(); + if (biases_.resource) biases_.resource->Release(); + if (transformed_weights_.resource) transformed_weights_.resource->Release(); + + if (w1_.resource) w1_.resource->Release(); + if (w2_.resource) w2_.resource->Release(); + if (b1_.resource) b1_.resource->Release(); + if (b2_.resource) b2_.resource->Release(); +} + +FCLayer::FCLayer(bool fp16, DxContext* dx_context, BaseLayer* ip, int C, int H, + int W, bool bias, bool relu, bool tanh) + : BaseLayer(C, H, W, ip, dx_context, fp16), + use_bias_(bias), + use_relu_(relu), + meta_command_(), + use_tanh_(tanh) { + size_t element_size = fp16_ ? sizeof(dx_half) : sizeof(float); + size_t weight_size = + element_size * C * H * W * ip->GetC() * ip->GetH() * ip->GetW(); + size_t blas_size = element_size * C * H * W; + + dx_context->CreateAlloc(weight_size, D3D12_HEAP_TYPE_DEFAULT, weights_, fp16); + if (use_bias_) + dx_context->CreateAlloc(blas_size, D3D12_HEAP_TYPE_DEFAULT, biases_, fp16); + + shader_wrapper_ = dx_context->getShaderWrapper(); + + // Create metacommand object + int rows = 0; // batch size + int cols = C * H * W; // cols of the output matrix + int K = ip->GetC() * ip->GetH() * ip->GetW(); // cols of input matrix + // We do Out = A * weight. + // The weight matrix need to be transpsoed before it can be multiplied. + // The transpose is done on CPU when loading weights + meta_command_ = std::make_unique(dx_context, rows, cols, K, 1, + fp16, false, false); +} + +void FCLayer::LoadWeights(float* cpuWeight, float* cpuBias, + DxContext* dx_context) { + size_t rows = C * H * W; + size_t cols = input_->GetC() * input_->GetH() * input_->GetW(); + size_t num_weights = + rows * cols; + + size_t element_size = fp16_ ? sizeof(dx_half) : sizeof(float); + size_t weight_size = element_size * num_weights; + size_t num_biases = C * H * W; + size_t bias_size = element_size * num_biases; + + std::vector temp_transposed(num_weights); + CpuTranspose(temp_transposed.data(), cpuWeight, rows, cols); + std::vector temp(num_weights); + if (fp16_) { + CopyFloatToHalf(temp.data(), temp_transposed.data(), num_weights); + dx_context->ScheduleUpload(weights_, temp.data(), weight_size); + } else { + dx_context->ScheduleUpload(weights_, temp_transposed.data(), weight_size); + } + + if (cpuBias) { + if (fp16_) { + CopyFloatToHalf(temp.data(), cpuBias, C); + dx_context->ScheduleUpload(biases_, temp.data(), bias_size); + } else { + dx_context->ScheduleUpload(biases_, cpuBias, bias_size); + } + } +} + +void FCLayer::Eval(int N, DXAlloc output, DXAlloc input, DXAlloc /*input2*/, + DXAlloc /*scratch*/, DXAlloc /*scratch2*/, + ID3D12GraphicsCommandList4* command_list) { + int num_outputs = C * H * W; + int num_inputs = input_->GetC() * input_->GetH() * input_->GetW(); + + if (meta_command_->IsAvailable()) + meta_command_->PerformGemm(N, input, weights_, output, command_list); + else + shader_wrapper_->MatrixMultiply(command_list, output, input, weights_, + DivUp(N, 8) * 8, num_outputs, num_inputs, 1, + fp16_); + + if (use_bias_ || use_relu_ || use_tanh_) { + dx_context_->UavBarrier(command_list); + shader_wrapper_->AddVectors(command_list, output, output, biases_, + N * num_outputs, N * num_outputs, num_outputs, + use_relu_, use_tanh_, fp16_); + } +} + +FCLayer::~FCLayer() { + if (weights_.resource) weights_.resource->Release(); + if (biases_.resource) biases_.resource->Release(); +} + + +PolicyMapLayer::PolicyMapLayer(bool fp16, DxContext* dx_context, BaseLayer* ip, + int C, int H, int W, int usedSize) + : BaseLayer(C, H, W, ip, dx_context, fp16), + used_size_(usedSize) { + size_t weight_size = sizeof(int) * used_size_; + dx_context->CreateAlloc(weight_size, D3D12_HEAP_TYPE_DEFAULT, weights_, fp16); +} + +void PolicyMapLayer::LoadWeights(const short* cpuWeights) { + // convert from short to int (as HLSL might have trouble reading short) + std::vector temp(used_size_); + for (int i = 0; i < used_size_; i++) temp[i] = (int)cpuWeights[i]; + dx_context_->ScheduleUpload(weights_, temp.data(), sizeof(int) * used_size_); +} + +void PolicyMapLayer::Eval(int N, DXAlloc output, DXAlloc input, DXAlloc /*input2*/, + DXAlloc /*scratch*/, DXAlloc /*scratch2*/, + ID3D12GraphicsCommandList4* command_list) { + int inputSize = + this->input_->GetC() * this->input_->GetH() * this->input_->GetW(); + int outputSize = this->C * this->H * this->W; + dx_context_->getShaderWrapper()->PolicyMap(command_list, output, input, + weights_, N, inputSize, outputSize, + used_size_, fp16_); +} + +PolicyMapLayer::~PolicyMapLayer() { + if (weights_.resource) weights_.resource->Release(); +} + + +void DxError(HRESULT status, const char* file, const int& line) { + if (FAILED(status)) { + assert(0); + char message[512]; + _com_error err(status); + LPCTSTR errMsg = err.ErrorMessage(); + sprintf_s(message, "Dx error: %s (%s:%d) ", errMsg, file, line); + throw Exception(message); + } +} + +} // namespace dx_backend +} // namespace lczero diff --git a/src/neural/dx/layers_dx.h b/src/neural/dx/layers_dx.h new file mode 100644 index 0000000000..1503f23d98 --- /dev/null +++ b/src/neural/dx/layers_dx.h @@ -0,0 +1,212 @@ +/* + This file is part of Leela Chess Zero. + Copyright (C) 2020 The LCZero Authors + + Leela Chess is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Leela Chess is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Leela Chess. If not, see . + + Additional permission under GNU GPL version 3 section 7 + + If you modify this Program, or any covered work, by linking or + combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA + Toolkit and the NVIDIA CUDA Deep Neural Network library (or a + modified version of those libraries), containing parts covered by the + terms of the respective license agreement, the licensors of this + Program grant you additional permission to convey the resulting work. +*/ + +#include +#include "dx_common.h" +#include "shader_wrapper.h" +#include + +namespace lczero { +class DxContext; + +namespace dx_backend { +constexpr int kMaxSupportedBatchSize = 256; + +// The Layer objects only hold memory for weights, biases, etc +// memory for input and output tensors is provided by caller of Eval. + +class BaseLayer { + public: + int GetC() const { return C; } + int GetH() const { return H; } + int GetW() const { return W; } + + BaseLayer(int c, int h, int w, BaseLayer* ip, DxContext* dx_context, bool fp16); + virtual ~BaseLayer() = default; + size_t GetOutputSize(int N) const { + return (fp16_ ? sizeof(dx_half) : sizeof(float)) * N * C * H * W; + } + + // input2 is optional (skip connection). + virtual void Eval(int N, DXAlloc output, DXAlloc input, DXAlloc input2, + DXAlloc scratch, DXAlloc scratch2, + ID3D12GraphicsCommandList4* command_list) = 0; + + protected: + BaseLayer* input_; + DxContext* dx_context_; + + bool fp16_; + + // Output tensor dimensions. + int C; + int H; + int W; +}; + +// Holds Metacommand objects and their scratch space for all allowed batch +// sizes. +class GemmMetaCommand { + private: + // Need to create a Metacommand object for each batch size unfortunately! + // Some hw vendors don't support arbitary sizes anyway, so we create only + // multiples of 8 in no. of rows (when M is 0). + + static constexpr int kMetacommandGranulity = 8; + static constexpr int kMaxMetacommands = + (kMaxSupportedBatchSize * 4) / kMetacommandGranulity; + ID3D12MetaCommand* meta_commands_[kMaxMetacommands]; + + DXAlloc scratch_data_persistent_[kMaxMetacommands]; + DXAlloc scratch_data_temporary_[kMaxMetacommands]; + + bool rows_known_; + bool create_succeeded_; + + public: + GemmMetaCommand(DxContext* dx_context, int M, int N, int K, int gemm_batch, + bool fp16, bool a_transpose, bool b_transpose); + ~GemmMetaCommand(); + + void PerformGemm(int rows, DXAlloc A, DXAlloc B, DXAlloc Output, + ID3D12GraphicsCommandList4* command_list); + + bool IsAvailable() { return create_succeeded_; } +}; + +class ConvMetaCommand { + private: + // Metacommand objects for each multiple of 8 batch size + static constexpr int kMetacommandGranulity = 8; + static constexpr int kMaxMetacommands = + kMaxSupportedBatchSize / kMetacommandGranulity; + ID3D12MetaCommand* meta_commands_[kMaxMetacommands]; + + DXAlloc scratch_data_persistent_[kMaxMetacommands]; + DXAlloc scratch_data_temporary_[kMaxMetacommands]; + bool create_succeeded_; + bool use_bias_; + + public: + ConvMetaCommand(DxContext* dx_context, int C, int K, int H, int W, int F, + bool relu, bool bias, bool fp16); + ~ConvMetaCommand(); + + void PerformConv(int batch, DXAlloc input, DXAlloc filter, DXAlloc bias, + DXAlloc output, ID3D12GraphicsCommandList4* command_list); + + bool IsAvailable() { return create_succeeded_; } +}; + + +class ConvLayer : public BaseLayer { + using BaseLayer::C; + using BaseLayer::GetC; + using BaseLayer::GetH; + using BaseLayer::GetW; + using BaseLayer::H; + using BaseLayer::W; + + public: + ConvLayer(bool fp16, GemmMetaCommand* meta_command_gemm, + ConvMetaCommand* meta_command_conv, DxContext* dx_context, + BaseLayer* ip, int C, int H, int W, int size, int Cin, bool bias, + bool relu, bool skipAdd = false, bool se = false, int se_k = 0); + ~ConvLayer(); + + // returns space in uploadBuffer used for loading weights + void LoadWeights(float* filter, float* bias, DxContext* dx_context); + void LoadSEWeights(float* w1, float* b1, float* w2, float* b2); + void Eval(int N, DXAlloc output, DXAlloc input, DXAlloc input2, + DXAlloc scratch, DXAlloc scratch2, + ID3D12GraphicsCommandList4* command_list) override; + + private: + const int c_input_; + const int filter_size_; + const bool use_relu_; + const bool use_bias_; + const bool skip_add_; + const bool has_se_; + const int se_k_; + + DXAlloc biases_; + DXAlloc weights_; + DXAlloc transformed_weights_; // After winograd transform. + + // Weights and Biases for (optional) SE. + DXAlloc w1_; + DXAlloc w2_; + DXAlloc b1_; + DXAlloc b2_; + + ShaderWrapper* shader_wrapper_; + GemmMetaCommand* meta_command_gemm_; + ConvMetaCommand* meta_command_conv_; +}; + +class FCLayer : public BaseLayer { + public: + FCLayer(bool fp16, DxContext* dx_context, BaseLayer* ip, int C, int H, int W, + bool bias, bool relu, bool tanh); + ~FCLayer(); + + // returns space in uploadBuffer used for loading weights + void LoadWeights(float* cpu_weight, float* cpu_bias, DxContext* dx_context); + void Eval(int N, DXAlloc output, DXAlloc input, DXAlloc input2, + DXAlloc scratch, DXAlloc scratch2, + ID3D12GraphicsCommandList4* command_list) override; + + private: + const bool use_bias_; + + // Only one of the below 2 activation functions should be enabled. + const bool use_relu_; + const bool use_tanh_; + + DXAlloc biases_; + DXAlloc weights_; + ShaderWrapper* shader_wrapper_; + std::unique_ptr meta_command_; +}; + +class PolicyMapLayer : public BaseLayer { + public: + PolicyMapLayer(bool fp16, DxContext* dx_context, BaseLayer* ip, int C, int H, + int W, int used_size); + ~PolicyMapLayer(); + void LoadWeights(const short* cpu_weights); + void Eval(int N, DXAlloc output, DXAlloc input, DXAlloc input2, + DXAlloc scratch, DXAlloc scratch2, + ID3D12GraphicsCommandList4* command_list) override; + private: + const int used_size_; + DXAlloc weights_; +}; + +} // namespace dx_backend +} // namespace dx_backend diff --git a/src/neural/dx/network_dx.cc b/src/neural/dx/network_dx.cc new file mode 100644 index 0000000000..45d61dcf4e --- /dev/null +++ b/src/neural/dx/network_dx.cc @@ -0,0 +1,971 @@ +/* + This file is part of Leela Chess Zero. + Copyright (C) 2020 The LCZero Authors + + Leela Chess is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Leela Chess is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Leela Chess. If not, see . + + Additional permission under GNU GPL version 3 section 7 + + If you modify this Program, or any covered work, by linking or + combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA + Toolkit and the NVIDIA CUDA Deep Neural Network library (or a + modified version of those libraries), containing parts covered by the + terms of the respective license agreement, the licensors of this + Program grant you additional permission to convey the resulting work. +*/ +#include +#include +#include +#include +#include +#include +#include + +#include "layers_dx.h" +#include "network_dx.h" +#include "neural/shared/policy_map.h" +#include "shader_wrapper.h" +#include "utils/bititer.h" +#include "utils/exception.h" + +namespace lczero { + +using namespace dx_backend; + +uint64_t DxContext::FlushCL(ID3D12GraphicsCommandList4* cl) { + if (!cl) cl = command_list_; + cl->Close(); + command_queue_->ExecuteCommandLists(1, (ID3D12CommandList**)&cl); + command_queue_->Signal(fence_, ++fence_val_); + return fence_val_; +} + +void DxContext::WaitForGpu(uint64_t fence_val) { + if (!fence_val) fence_val = fence_val_; + // Wait for commands to finish on GPU. + // (spinloop has lowest latency, we can try event based signal if CPU + // overhead becomes a bottleneck). + while (fence_->GetCompletedValue() != fence_val) + ; + upload_scratch_mem_.offset = 0; +} + +void DxContext::ResetCL(ID3D12GraphicsCommandList4* cl, + ID3D12CommandAllocator* ca, bool reset) { + if (!cl) cl = command_list_; + if (!ca) ca = command_allocator_; + if (reset) { + ca->Reset(); + cl->Reset(ca, NULL); + } + cl->SetDescriptorHeaps(1, &desc_heap_); +} + +void DxContext::FlushAndWait() { + FlushCL(); + WaitForGpu(); + ResetCL(); +} + +void DxContext::UavBarrier(ID3D12GraphicsCommandList4* command_list) { + if (!command_list) command_list = command_list_; + CD3DX12_RESOURCE_BARRIER uav_barrier = CD3DX12_RESOURCE_BARRIER::UAV(nullptr); + command_list->ResourceBarrier(1, &uav_barrier); +} + +void DxContext::DumpFp32(float* buf, int elements) { + printf("\n"); + for (int i = 0; i < elements; i++) { + printf("%8.4f ", buf[i]); + if ((i % 8) == 7) printf("\n"); + } + printf("\n"); +} + +void DxContext::CopyTensor(DXAlloc dst, DXAlloc src, int bytes) { + CD3DX12_RESOURCE_BARRIER barrier; + + barrier = CD3DX12_RESOURCE_BARRIER::Transition( + src.resource, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, + D3D12_RESOURCE_STATE_COPY_SOURCE); + command_list_->ResourceBarrier(1, &barrier); + + barrier = CD3DX12_RESOURCE_BARRIER::Transition( + dst.resource, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, + D3D12_RESOURCE_STATE_COPY_DEST); + command_list_->ResourceBarrier(1, &barrier); + + command_list_->CopyBufferRegion(dst.resource, dst.offset, src.resource, + src.offset, bytes); + + barrier = CD3DX12_RESOURCE_BARRIER::Transition( + src.resource, D3D12_RESOURCE_STATE_COPY_SOURCE, + D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + command_list_->ResourceBarrier(1, &barrier); + + barrier = CD3DX12_RESOURCE_BARRIER::Transition( + dst.resource, D3D12_RESOURCE_STATE_COPY_DEST, + D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + command_list_->ResourceBarrier(1, &barrier); +} + +void DxContext::DumpCpuTensor(void* data, int size, bool fp16, + bool allnewline) { + printf("\n"); + float* fp32arr = (float*)data; + uint16_t* arr = (uint16_t*)data; + + for (int i = 0; i < size; i++) { + printf("%8.4f ", fp16 ? FP16toFP32(arr[i]) : fp32arr[i]); + if (allnewline || ((i % 8) == 7)) printf("\n"); + } + printf("\n"); +} + +#ifdef DEBUG_DUMP_PER_LAYER_DATA +void DxContext::DumpTensor(const char* message, DXAlloc alloc, int size, + bool fp16, bool allnewline) { + printf("\n%s", message); + int bytes = size * (fp16 ? sizeof(dx_half) : sizeof(float)); + CD3DX12_RESOURCE_BARRIER barrier; + barrier = CD3DX12_RESOURCE_BARRIER::Transition( + alloc.resource, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, + D3D12_RESOURCE_STATE_COPY_SOURCE); + command_list_->ResourceBarrier(1, &barrier); + + command_list_->CopyBufferRegion(readback_scratch_mem_.resource, + readback_scratch_mem_.offset, alloc.resource, + alloc.offset, bytes); + + barrier = CD3DX12_RESOURCE_BARRIER::Transition( + alloc.resource, D3D12_RESOURCE_STATE_COPY_SOURCE, + D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + command_list_->ResourceBarrier(1, &barrier); + + FlushAndWait(); + void* cpuPtr; + readback_scratch_mem_.resource->Map(0, nullptr, &cpuPtr); + DumpCpuTensor(cpuPtr, size, fp16, allnewline); + readback_scratch_mem_.resource->Unmap(0, nullptr); +} +#else +void DxContext::DumpTensor(const char*, DXAlloc, int, bool, bool) {} +#endif + +DxContext::DxContext(const OptionsDict& options) { + gpu_id_ = options.GetOrDefault("gpu", 0); + + IDXGIFactory4* pFactory = nullptr; + IDXGIAdapter* pAdapter = nullptr; + ReportDxErrors(CreateDXGIFactory2(0, IID_PPV_ARGS(&pFactory))); + ReportDxErrors(pFactory->EnumAdapters(gpu_id_, &pAdapter)); + pFactory->Release(); + + if (!pAdapter) throw Exception("Invalid GPU Id: " + std::to_string(gpu_id_)); + + ReportDxErrors(D3D12CreateDevice(pAdapter, D3D_FEATURE_LEVEL_11_0, + IID_PPV_ARGS(&device_))); + pAdapter->Release(); + + D3D12_COMMAND_QUEUE_DESC commandqueueDesc; + commandqueueDesc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE; + commandqueueDesc.NodeMask = 0; + commandqueueDesc.Priority = 0; + commandqueueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT; + ReportDxErrors(device_->CreateCommandQueue(&commandqueueDesc, + IID_PPV_ARGS(&command_queue_))); + + ReportDxErrors(device_->CreateCommandAllocator( + D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&command_allocator_))); + + ReportDxErrors(device_->CreateCommandList(1, D3D12_COMMAND_LIST_TYPE_DIRECT, + command_allocator_, NULL, + IID_PPV_ARGS(&command_list_))); + + D3D12_DESCRIPTOR_HEAP_DESC heapDesc = {}; + heapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; + heapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; + heapDesc.NumDescriptors = kNumDescHeapSlots; + ReportDxErrors( + device_->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&desc_heap_))); + + command_list_->SetDescriptorHeaps(1, &desc_heap_); + + next_slot_in_desc_heap_ = 0; + + fence_val_ = 0ull; + ReportDxErrors(device_->CreateFence(fence_val_, D3D12_FENCE_FLAG_NONE, + IID_PPV_ARGS(&fence_))); + + shader_wrapper_.Init(device_); + + // Allocate scratch space for uploads and read-back. + CreateAlloc(kUploadDownloadScratchSize, D3D12_HEAP_TYPE_UPLOAD, + upload_scratch_mem_, false); + CreateAlloc(kUploadDownloadScratchSize, D3D12_HEAP_TYPE_READBACK, + readback_scratch_mem_, false); +} + +DxContext::~DxContext() { + // Make sure nothing is in flight + FlushAndWait(); + + upload_scratch_mem_.resource->Release(); + readback_scratch_mem_.resource->Release(); + + shader_wrapper_.Destroy(); + command_list_->Release(); + command_allocator_->Release(); + command_queue_->Release(); + fence_->Release(); + desc_heap_->Release(); + device_->Release(); +} + +void DxContext::CreateAlloc(size_t size, D3D12_HEAP_TYPE type, DXAlloc& alloc, + bool fp16) { + // some alignment + int factor = DivUp((int)size, 4); + size = factor * 4; + + D3D12_HEAP_PROPERTIES heapDesc = {}; + heapDesc.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + heapDesc.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + heapDesc.CreationNodeMask = 1; + heapDesc.VisibleNodeMask = 1; + + if (type == D3D12_HEAP_TYPE_CUSTOM) { + // Use custom heap type to allow GPU writing to system memory directly + heapDesc.MemoryPoolPreference = D3D12_MEMORY_POOL_L0; + heapDesc.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_WRITE_BACK; + } + + heapDesc.Type = type; + + D3D12_RESOURCE_DESC bufferDesc = {}; + bufferDesc.MipLevels = 1; + bufferDesc.Format = DXGI_FORMAT_UNKNOWN; + bufferDesc.Height = 1; + if (type == D3D12_HEAP_TYPE_DEFAULT || type == D3D12_HEAP_TYPE_CUSTOM) + bufferDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + bufferDesc.DepthOrArraySize = 1; + bufferDesc.SampleDesc.Count = 1; + bufferDesc.SampleDesc.Quality = 0; + bufferDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + bufferDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + + D3D12_RESOURCE_STATES resourceState = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; + if (type == D3D12_HEAP_TYPE_UPLOAD) + resourceState = D3D12_RESOURCE_STATE_GENERIC_READ; + else if (type == D3D12_HEAP_TYPE_READBACK) + resourceState = D3D12_RESOURCE_STATE_COPY_DEST; + + bufferDesc.Width = size; + ReportDxErrors(device_->CreateCommittedResource( + &heapDesc, D3D12_HEAP_FLAG_NONE, &bufferDesc, resourceState, nullptr, + IID_PPV_ARGS(&alloc.resource))); + + alloc.offset = 0; + alloc.gpu_va = alloc.resource->GetGPUVirtualAddress(); + + // Create desc heap entries for UAV resources. + if (resourceState == D3D12_RESOURCE_STATE_UNORDERED_ACCESS) { + int handleIncrementSize = device_->GetDescriptorHandleIncrementSize( + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + + size_t element_size = fp16 ? sizeof(dx_half) : sizeof(float); + + // Scalar UAV. + { + int slot = next_slot_in_desc_heap_++; + + CD3DX12_CPU_DESCRIPTOR_HANDLE cpuDescHandle( + desc_heap_->GetCPUDescriptorHandleForHeapStart(), slot, + handleIncrementSize); + + CD3DX12_GPU_DESCRIPTOR_HANDLE gpuDescHandle( + desc_heap_->GetGPUDescriptorHandleForHeapStart(), slot, + handleIncrementSize); + + D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; + uavDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; + uavDesc.Format = fp16 ? DXGI_FORMAT_R16_FLOAT : DXGI_FORMAT_R32_FLOAT; + uavDesc.Buffer.FirstElement = 0; + uavDesc.Buffer.NumElements = (UINT)(size / element_size); + + device_->CreateUnorderedAccessView(alloc.resource, nullptr, &uavDesc, + cpuDescHandle); + + alloc.desc_handle_scalar = gpuDescHandle; + } + + // 4-component vector UAV. + { + int slot = next_slot_in_desc_heap_++; + + CD3DX12_CPU_DESCRIPTOR_HANDLE cpuDescHandle( + desc_heap_->GetCPUDescriptorHandleForHeapStart(), slot, + handleIncrementSize); + + CD3DX12_GPU_DESCRIPTOR_HANDLE gpuDescHandle( + desc_heap_->GetGPUDescriptorHandleForHeapStart(), slot, + handleIncrementSize); + + D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; + uavDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; + uavDesc.Format = fp16 ? DXGI_FORMAT_R16G16B16A16_FLOAT + : DXGI_FORMAT_R32G32B32A32_FLOAT; + uavDesc.Buffer.FirstElement = 0; + uavDesc.Buffer.NumElements = (UINT)(size / (4 * element_size)); + + device_->CreateUnorderedAccessView(alloc.resource, nullptr, &uavDesc, + cpuDescHandle); + + alloc.desc_handle_vector = gpuDescHandle; + } + } +} + +void DxContext::ScheduleUpload(DXAlloc alloc, const void* data, size_t size) { + // Make sure enough space is available in the upload scratch buffer + assert(size <= kUploadDownloadScratchSize); + if (upload_scratch_mem_.offset + size > kUploadDownloadScratchSize) + FlushAndWait(); + + uint8_t* temp; + upload_scratch_mem_.resource->Map(0, nullptr, (void**)&temp); + + dx_half* cpuPtr = (dx_half*)(temp + upload_scratch_mem_.offset); + memcpy(cpuPtr, data, size); + + CD3DX12_RESOURCE_BARRIER barrier; + + barrier = CD3DX12_RESOURCE_BARRIER::Transition( + alloc.resource, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, + D3D12_RESOURCE_STATE_COPY_DEST); + command_list_->ResourceBarrier(1, &barrier); + + command_list_->CopyBufferRegion(alloc.resource, alloc.offset, + upload_scratch_mem_.resource, + upload_scratch_mem_.offset, size); + + barrier = CD3DX12_RESOURCE_BARRIER::Transition( + alloc.resource, D3D12_RESOURCE_STATE_COPY_DEST, + D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + command_list_->ResourceBarrier(1, &barrier); + + upload_scratch_mem_.resource->Unmap(0, nullptr); + + // reset at flush and wait + upload_scratch_mem_.offset += (uint32_t)size; +} + +DxNetwork::DxNetwork(const WeightsFile& file, const OptionsDict& options) + : dx_context_(options), + capabilities_{file.format().network_format().input()} { + LegacyWeights weights(file.weights()); + + has_conv_policy_ = file.format().network_format().policy() == + pblczero::NetworkFormat::POLICY_CONVOLUTION; + max_batch_size_ = options.GetOrDefault("max_batch", 1024); + + // Default is fp16, to use fp32: --backend-opts=fp16=false. + fp16_ = options.GetOrDefault("fp16", DEFAULT_FP16); + + // Default is to attempt using Winograd algorithm for Convolutions using GEMM + // Metacommand first, if not available - attempt using Convolution Metacommand + // directly (whatever algorithm HW vendor is providing), and if neither is + // available use winograd algorithm with our own GEMM compute shader. + // The below backend options can be used to override this for testing. + bool enable_gemm_metacommand = + options.GetOrDefault("enable-gemm-metacommand", true); + bool enable_conv_metacommand = + options.GetOrDefault("enable-conv-metacommand", true); + + const int kNumFilters = (int)weights.input.biases.size(); + + num_blocks_ = (int)weights.residual.size(); + has_se_ = weights.residual[0].has_se; + int pol_channels = (int)weights.policy.biases.size(); + + // Build the network, and copy the weights to GPU memory. + + // Unique GEMMs for winograd required by the network. + if (enable_gemm_metacommand) { + input_conv_gemm_metacommand_ = std::make_unique( + &dx_context_, 0, kNumFilters, kInputPlanes, 36, fp16_, false, false); + + residual_block_gemm_metacommand_ = std::make_unique( + &dx_context_, 0, kNumFilters, kNumFilters, 36, fp16_, false, false); + + if (has_conv_policy_) { + policy_conv_gemm_metacommand_ = std::make_unique( + &dx_context_, 0, pol_channels, kNumFilters, 36, fp16_, false, false); + } + } + + // Unique Conv metacommands required by the network. + if (enable_conv_metacommand) { + // Create only if we were not able to create GEMM metacommands for some + // reason 3x3, 112 channels -> kNumFilters channels, relu, bias. + if (!input_conv_gemm_metacommand_ || + !input_conv_gemm_metacommand_->IsAvailable()) + input_conv_metacommand_ = std::make_unique( + &dx_context_, kInputPlanes, kNumFilters, 8, 8, 3, true, true, fp16_); + + if (!residual_block_gemm_metacommand_ || + !residual_block_gemm_metacommand_->IsAvailable()) { + // 3x3, kNumFilters channels -> kNumFilters channels, relu, bias. + resi_block_conv_1_metacommand_ = std::make_unique( + &dx_context_, kNumFilters, kNumFilters, 8, 8, 3, true, true, fp16_); + + // 3x3, kNumFilters channels -> kNumFilters channels, no relu + // relu needs to be done after SE and/or skip connection add. + resi_block_conv_2_metacommand_ = std::make_unique( + &dx_context_, kNumFilters, kNumFilters, 8, 8, 3, false, true, fp16_); + } + + if (has_conv_policy_ && (!policy_conv_gemm_metacommand_ || + !policy_conv_gemm_metacommand_->IsAvailable())) + policy_conv_metacommand_ = std::make_unique( + &dx_context_, kNumFilters, pol_channels, 8, 8, 3, false, true, fp16_); + } + + // input + { + auto inputConv = std::make_unique( + fp16_, input_conv_gemm_metacommand_.get(), + input_conv_metacommand_.get(), &dx_context_, nullptr, kNumFilters, 8, 8, + 3, kInputPlanes, true, true); + + inputConv->LoadWeights(&weights.input.weights[0], &weights.input.biases[0], + &dx_context_); + + network_.emplace_back(std::move(inputConv)); + } + + // residual block + for (size_t block = 0; block < weights.residual.size(); block++) { + auto conv1 = std::make_unique( + fp16_, residual_block_gemm_metacommand_.get(), + resi_block_conv_1_metacommand_.get(), &dx_context_, getLastLayer(), + kNumFilters, 8, 8, 3, kNumFilters, true, true); + + conv1->LoadWeights(&weights.residual[block].conv1.weights[0], + &weights.residual[block].conv1.biases[0], &dx_context_); + + network_.emplace_back(std::move(conv1)); + + int se_k = 0; + if (has_se_) se_k = (int)weights.residual[block].se.b1.size(); + + auto conv2 = std::make_unique( + fp16_, residual_block_gemm_metacommand_.get(), + resi_block_conv_2_metacommand_.get(), &dx_context_, getLastLayer(), + kNumFilters, 8, 8, 3, kNumFilters, true, true, true, has_se_, se_k); + + conv2->LoadWeights(&weights.residual[block].conv2.weights[0], + &weights.residual[block].conv2.biases[0], &dx_context_); + + if (has_se_) { + conv2->LoadSEWeights( + &weights.residual[block].se.w1[0], &weights.residual[block].se.b1[0], + &weights.residual[block].se.w2[0], &weights.residual[block].se.b2[0]); + } + network_.emplace_back(std::move(conv2)); + } + + BaseLayer* resi_last = getLastLayer(); + + // policy head + if (has_conv_policy_) { + // conv1 is same as residual block convolution. + auto conv1 = std::make_unique( + fp16_, residual_block_gemm_metacommand_.get(), + resi_block_conv_1_metacommand_.get(), &dx_context_, getLastLayer(), + kNumFilters, 8, 8, 3, kNumFilters, true, true); + conv1->LoadWeights(&weights.policy1.weights[0], &weights.policy1.biases[0], + &dx_context_); + network_.emplace_back(std::move(conv1)); + + // conv2 has different no. of output filters (pol_channels). No relu. + auto conv2 = std::make_unique( + fp16_, policy_conv_gemm_metacommand_.get(), + policy_conv_metacommand_.get(), &dx_context_, getLastLayer(), + pol_channels, 8, 8, 3, kNumFilters, true, false); + + conv2->LoadWeights(&weights.policy.weights[0], &weights.policy.biases[0], + &dx_context_); + network_.emplace_back(std::move(conv2)); + + // Policy map layer + auto policyMap = + std::make_unique(fp16_, &dx_context_, getLastLayer(), + kNumOutputPolicy, 1, 1, 73 * 8 * 8); + policyMap->LoadWeights(kConvPolicyMap); + network_.emplace_back(std::move(policyMap)); + + } else { + // 1x1 convolution, pol_channels output filters + auto convPol = std::make_unique( + fp16_, nullptr, nullptr, &dx_context_, getLastLayer(), pol_channels, 8, + 8, 1, kNumFilters, true, true); + convPol->LoadWeights(&weights.policy.weights[0], &weights.policy.biases[0], + &dx_context_); + network_.emplace_back(std::move(convPol)); + + // FC with bias, no activation + // pad up kNumOutputPolicy to be a multiple of 8 + assert(weights.ip_pol_b.size() == kNumOutputPolicy); + auto FCPol = std::make_unique(fp16_, &dx_context_, getLastLayer(), + kNumOutputPolicyPadded8, 1, 1, true, + false, false); + // Copy weights to temp space which is padded in size. + std::vector tempBias(kNumOutputPolicyPadded8); + std::vector tempWeight(kNumOutputPolicyPadded8 * + weights.ip_pol_w.size() / kNumOutputPolicy); + memcpy(tempBias.data(), weights.ip_pol_b.data(), + weights.ip_pol_b.size() * sizeof(float)); + memcpy(tempWeight.data(), weights.ip_pol_w.data(), + weights.ip_pol_w.size() * sizeof(float)); + + FCPol->LoadWeights(tempWeight.data(), tempBias.data(), &dx_context_); + network_.emplace_back(std::move(FCPol)); + } + + // value head + { + int val_channels = (int)weights.value.biases.size(); + + // 1x1 convolution, val_channels output filters + auto convVal = std::make_unique( + fp16_, nullptr, nullptr, &dx_context_, getLastLayer(), val_channels, 8, + 8, 1, kNumFilters, true, true); + convVal->LoadWeights(&weights.value.weights[0], &weights.value.biases[0], + &dx_context_); + network_.emplace_back(std::move(convVal)); + + // Bias and relu activation. + auto FCVal1 = std::make_unique(fp16_, &dx_context_, getLastLayer(), + (int)weights.ip1_val_b.size(), 1, 1, + true, true, false); + FCVal1->LoadWeights(&weights.ip1_val_w[0], &weights.ip1_val_b[0], + &dx_context_); + network_.emplace_back(std::move(FCVal1)); + + has_wdl_ = file.format().network_format().value() == + pblczero::NetworkFormat::VALUE_WDL; + + // Fully connected layer with Bias. + // tanh activation for non wdl nets, no activation for wdl. + auto fc2_tanh = !has_wdl_; + auto FCVal2 = std::make_unique(fp16_, &dx_context_, getLastLayer(), + kNumOutputValuePadded8, 1, 1, true, + false, fc2_tanh); + // Pad up the weights + std::vector tempBias(kNumOutputValuePadded8); + std::vector tempWeight(kNumOutputValuePadded8 * + weights.ip2_val_w.size() / + weights.ip2_val_b.size()); + memcpy(tempBias.data(), weights.ip2_val_b.data(), + weights.ip2_val_b.size() * sizeof(float)); + memcpy(tempWeight.data(), weights.ip2_val_w.data(), + weights.ip2_val_w.size() * sizeof(float)); + FCVal2->LoadWeights(tempWeight.data(), tempBias.data(), &dx_context_); + network_.emplace_back(std::move(FCVal2)); + } + + dx_context_.FlushAndWait(); + + // Allocate GPU memory for running the network + // 4 buffers of max size are enough: + // * one to hold input, + // * second to hold output + // * third to hold skip connection's input + // * and fourth to act as scratch space needed by some layers. + size_t max_size = resi_last->GetOutputSize(max_batch_size_); + + // Winograd transformed inputs/outputs need more space. + // Every 4x4 block of input/output is transfored to 6x6 block. + max_size *= (size_t)ceil(36.0 / 16.0); + + for (auto& mem : tensor_mem_) { + dx_context_.CreateAlloc(max_size, D3D12_HEAP_TYPE_DEFAULT, mem, fp16_); + } +} + +void DxNetwork::Eval(InputsOutputsDx* io, int batch_size) { + if (batch_size > kMaxSupportedBatchSize) + throw Exception("Unsupported batch size: " + std::to_string(batch_size)); + +#ifdef DEBUG_DUMP_PER_LAYER_DATA + lock_.lock(); + ID3D12GraphicsCommandList4* cl = dx_context_.getCommandList(); +#else + ID3D12GraphicsCommandList4* cl = io->command_list_; + dx_context_.ResetCL(cl, io->command_allocator_, io->needs_reset_); +#endif + + // Expand packed board representation into full planes. + +#ifdef COPY_BEFORE_SHADER_READ + // First copy from upload heap to scratch mem + CD3DX12_RESOURCE_BARRIER barrier; + + barrier = CD3DX12_RESOURCE_BARRIER::Transition( + tensor_mem_[1].resource, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, + D3D12_RESOURCE_STATE_COPY_DEST); + cl->ResourceBarrier(1, &barrier); + + barrier = CD3DX12_RESOURCE_BARRIER::Transition( + tensor_mem_[2].resource, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, + D3D12_RESOURCE_STATE_COPY_DEST); + cl->ResourceBarrier(1, &barrier); + + cl->CopyBufferRegion(tensor_mem_[1].resource, 0, + io->input_masks_mem_gpu_.resource, 0, + sizeof(uint64_t) * batch_size * kInputPlanes); + cl->CopyBufferRegion(tensor_mem_[2].resource, 0, + io->input_val_mem_gpu_.resource, 0, + sizeof(float) * batch_size * kInputPlanes); + + barrier = CD3DX12_RESOURCE_BARRIER::Transition( + tensor_mem_[1].resource, D3D12_RESOURCE_STATE_COPY_DEST, + D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + cl->ResourceBarrier(1, &barrier); + + barrier = CD3DX12_RESOURCE_BARRIER::Transition( + tensor_mem_[2].resource, D3D12_RESOURCE_STATE_COPY_DEST, + D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + cl->ResourceBarrier(1, &barrier); + + dx_context_.UavBarrier(cl); + + dx_context_.getShaderWrapper()->ExpandPlanes( + cl, tensor_mem_[0], tensor_mem_[1], tensor_mem_[2], batch_size, fp16_); + +#else + dx_context_.getShaderWrapper()->ExpandPlanes( + cl, tensor_mem_[0], io->input_masks_mem_gpu_, io->input_val_mem_gpu_, + batch_size, fp16_); +#endif + + dx_context_.UavBarrier(cl); + + // Debug logging (not compiled by default) + dx_context_.DumpTensor("After expand planes", tensor_mem_[0], 1024, fp16_); + + int l = 0; + + //-----------------------------------///--------------------------------------- + // Input Conv + network_[l++]->Eval(batch_size, tensor_mem_[2], tensor_mem_[0], DXAlloc(), + tensor_mem_[1], tensor_mem_[3], cl); + dx_context_.UavBarrier(cl); + + dx_context_.DumpTensor("After input conv", tensor_mem_[2], 1024, fp16_); + + //-----------------------------------///--------------------------------------- + + // Residual tower. + for (int block = 0; block < num_blocks_; block++) { + // conv1 + network_[l++]->Eval(batch_size, tensor_mem_[0], tensor_mem_[2], DXAlloc(), + tensor_mem_[1], tensor_mem_[3], cl); + dx_context_.UavBarrier(cl); + + // conv2 + network_[l++]->Eval(batch_size, tensor_mem_[2], tensor_mem_[0], + tensor_mem_[2], tensor_mem_[1], tensor_mem_[3], cl); + dx_context_.UavBarrier(cl); + } + + dx_context_.DumpTensor("After Residual tower", tensor_mem_[2], 1024, fp16_); + + //-----------------------------------///--------------------------------------- + + // Policy head. + if (has_conv_policy_) { + // Policy conv1. + network_[l++]->Eval(batch_size, tensor_mem_[0], tensor_mem_[2], DXAlloc(), + tensor_mem_[1], tensor_mem_[3], cl); + dx_context_.UavBarrier(cl); + + dx_context_.DumpTensor("After policy conv1", tensor_mem_[0], 1024, fp16_); + + // Policy conv2 + network_[l++]->Eval(batch_size, tensor_mem_[1], tensor_mem_[0], DXAlloc(), + tensor_mem_[1], tensor_mem_[3], cl); + + dx_context_.UavBarrier(cl); + + dx_context_.DumpTensor("After policy conv2", tensor_mem_[1], 1024, fp16_); + + // Policy Map layer (writes directly to system memory). + network_[l++]->Eval(batch_size, io->op_policy_mem_gpu_, tensor_mem_[1], + DXAlloc(), DXAlloc(), DXAlloc(), cl); + + // Output of policy map layer is always FP32. + dx_context_.DumpTensor("After policy map", io->op_policy_mem_gpu_, 1024, + false); + + } else { + // Policy conv. + network_[l++]->Eval(batch_size, tensor_mem_[0], tensor_mem_[2], DXAlloc(), + tensor_mem_[1], tensor_mem_[3], cl); + dx_context_.UavBarrier(cl); + + // Policy FC (writes directly to system memory). + network_[l++]->Eval(batch_size, io->op_policy_mem_gpu_, tensor_mem_[0], + DXAlloc(), tensor_mem_[1], tensor_mem_[3], cl); + } + + //-----------------------------------///--------------------------------------- + + // Value head. + + // Value conv. + network_[l++]->Eval(batch_size, tensor_mem_[0], tensor_mem_[2], DXAlloc(), + tensor_mem_[1], tensor_mem_[3], cl); + dx_context_.UavBarrier(cl); + + dx_context_.DumpTensor("After value conv", tensor_mem_[0], 1024, fp16_); + + // value FC1. + network_[l++]->Eval(batch_size, tensor_mem_[1], tensor_mem_[0], DXAlloc(), + tensor_mem_[2], tensor_mem_[3], cl); + dx_context_.UavBarrier(cl); + + dx_context_.DumpTensor("After value fc1", tensor_mem_[1], 128, fp16_); + + // value FC2. + network_[l++]->Eval(batch_size, io->op_value_mem_gpu_, tensor_mem_[1], + DXAlloc(), tensor_mem_[2], tensor_mem_[3], cl); + + dx_context_.DumpTensor("After value fc2", io->op_value_mem_gpu_, 8, fp16_); + + //-----------------------------------///--------------------------------------- +#ifdef DEBUG_DUMP_PER_LAYER_DATA + dx_context_.FlushAndWait(); + lock_.unlock(); +#else + // TODO: Get rid of this lock once we move the Command Queue also to + // InputsOutputs structure This isn't a bottleneck anyway (for CPU side perf). + // The hope is that we will get some GPU side parallelism with multiple async + // compute queues. + lock_.lock(); + uint64_t fence = dx_context_.FlushCL(cl); + lock_.unlock(); + + dx_context_.WaitForGpu(fence); + io->needs_reset_ = true; +#endif + + // Do some simple post-processing operations on CPU: + // - un-padding of policy and value heads. + // - value head softmax (for wdl enabled nets) + // We do them outside the lock to get some more parallelism. + int val_vector_size = has_wdl_ ? 3 : 1; + if (fp16_) { + // Policy: + // Un-pad policy output, and convert to fp32. + if (!has_conv_policy_) { + dx_half* padded_pol_fp16 = (dx_half*)io->op_policy_mem_; + for (int n = 0; n < batch_size; n++) + for (int i = 0; i < kNumOutputPolicy; i++) + io->op_policy_mem_final_[n * kNumOutputPolicy + i] = + FP16toFP32(padded_pol_fp16[n * kNumOutputPolicyPadded8 + i]); + } + // Value: + // Un-pad value output, converting it to fp32. + dx_half* padded_val_fp16 = (dx_half*)io->op_value_mem_; + for (int n = 0; n < batch_size; n++) + for (int i = 0; i < val_vector_size; i++) + io->op_value_mem_final_[n * val_vector_size + i] = + FP16toFP32(padded_val_fp16[n * kNumOutputValuePadded8 + i]); + + } else { + // Policy: + // Un-pad policy output. + if (!has_conv_policy_) { + for (int i = 0; i < batch_size; i++) + memcpy(io->op_policy_mem_final_ + kNumOutputPolicy * i, + io->op_policy_mem_ + kNumOutputPolicyPadded8 * i, + kNumOutputPolicy * sizeof(float)); + } + + // Value: + // Un-pad value output. + for (int i = 0; i < batch_size; i++) + memcpy(io->op_value_mem_final_ + val_vector_size * i, + io->op_value_mem_ + kNumOutputValuePadded8 * i, + val_vector_size * sizeof(float)); + } + + // Softmax on value head for wdl enabled networks. + if (has_wdl_) { + for (int i = 0; i < batch_size; i++) { + float w_val = io->op_value_mem_final_[i * 3 + 0]; + float d_val = io->op_value_mem_final_[i * 3 + 1]; + float l_val = io->op_value_mem_final_[i * 3 + 2]; + + w_val = exp(w_val); + d_val = exp(d_val); + l_val = exp(l_val); + float S = w_val + d_val + l_val; + w_val /= S; + d_val /= S; + l_val /= S; + + io->op_value_mem_final_[i * 3 + 0] = w_val; + io->op_value_mem_final_[i * 3 + 1] = d_val; + io->op_value_mem_final_[i * 3 + 2] = l_val; + } + } +} + +DxNetwork::~DxNetwork() { + dx_context_.FlushAndWait(); + // Free memory and destroy all dx objects. + for (auto mem : tensor_mem_) { + mem.resource->Release(); + } +} + +std::unique_ptr DxNetwork::NewComputation() { + return std::make_unique(this, has_wdl_); +} + +std::unique_ptr DxNetwork::GetInputsOutputs() { + std::lock_guard lock(inputs_outputs_lock_); + if (free_inputs_outputs_.empty()) { + return std::make_unique(max_batch_size_, &dx_context_, + has_wdl_, has_conv_policy_, fp16_); + } else { + std::unique_ptr resource = + std::move(free_inputs_outputs_.front()); + free_inputs_outputs_.pop_front(); + return resource; + } +} + +void DxNetwork::ReleaseInputsOutputs( + std::unique_ptr resource) { + std::lock_guard lock(inputs_outputs_lock_); + free_inputs_outputs_.push_back(std::move(resource)); +} + +DxNetworkComputation::DxNetworkComputation(DxNetwork* network, bool wdl) + : network_(network), wdl_(wdl) { + batch_size_ = 0; + inputs_outputs_ = network_->GetInputsOutputs(); +} + +DxNetworkComputation::~DxNetworkComputation() { + network_->ReleaseInputsOutputs(std::move(inputs_outputs_)); +} + +void DxNetworkComputation::AddInput(InputPlanes&& input) { + auto iter_mask = + &inputs_outputs_->input_masks_mem_[batch_size_ * kInputPlanes]; + auto iter_val = &inputs_outputs_->input_val_mem_[batch_size_ * kInputPlanes]; + + int i = 0; + for (const auto& plane : input) { + iter_mask[i] = plane.mask; + iter_val[i] = plane.value; + i++; + } + + batch_size_++; +} + +void DxNetworkComputation::ComputeBlocking() { + network_->Eval(inputs_outputs_.get(), GetBatchSize()); +} + +InputsOutputsDx::InputsOutputsDx(int maxBatchSize, DxContext* dx_context, + bool wdl, bool policy_map, bool fp16) + : uses_policy_map_(policy_map), needs_reset_(false) { + // CPU accesses on Default heap doesn't work. + // GPU accesses on Upload heap works. + dx_context->CreateAlloc(maxBatchSize * kInputPlanes * sizeof(uint64_t), + D3D12_HEAP_TYPE_UPLOAD /*D3D12_HEAP_TYPE_DEFAULT*/, + input_masks_mem_gpu_, fp16); + + dx_context->CreateAlloc(maxBatchSize * kInputPlanes * sizeof(float), + D3D12_HEAP_TYPE_UPLOAD /*D3D12_HEAP_TYPE_DEFAULT*/, + input_val_mem_gpu_, fp16); + + // CUSTOM heap created to have GPU directly write to system memory + dx_context->CreateAlloc( + maxBatchSize * kNumOutputPolicyPadded8 * sizeof(float), + D3D12_HEAP_TYPE_CUSTOM, op_policy_mem_gpu_, fp16); + + dx_context->CreateAlloc(maxBatchSize * kNumOutputValuePadded8 * sizeof(float), + D3D12_HEAP_TYPE_CUSTOM, op_value_mem_gpu_, fp16); + + ReportDxErrors(input_masks_mem_gpu_.resource->Map(0, nullptr, + (void**)&input_masks_mem_)); + + ReportDxErrors( + input_val_mem_gpu_.resource->Map(0, nullptr, (void**)&input_val_mem_)); + + ReportDxErrors( + op_policy_mem_gpu_.resource->Map(0, nullptr, (void**)&op_policy_mem_)); + + ReportDxErrors( + op_value_mem_gpu_.resource->Map(0, nullptr, (void**)&op_value_mem_)); + + // When policy map is enabled, GPU writes directly to the final policy output. + if (uses_policy_map_) + op_policy_mem_final_ = op_policy_mem_; + else + op_policy_mem_final_ = new float[maxBatchSize * kNumOutputPolicy]; + op_value_mem_final_ = new float[maxBatchSize * (wdl ? 3 : 1)]; + + ReportDxErrors(dx_context->getDevice()->CreateCommandAllocator( + D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&command_allocator_))); + + ReportDxErrors(dx_context->getDevice()->CreateCommandList( + 1, D3D12_COMMAND_LIST_TYPE_DIRECT, command_allocator_, NULL, + IID_PPV_ARGS(&command_list_))); +} + +InputsOutputsDx::~InputsOutputsDx() { + input_masks_mem_gpu_.resource->Unmap(0, nullptr); + input_val_mem_gpu_.resource->Unmap(0, nullptr); + op_policy_mem_gpu_.resource->Unmap(0, nullptr); + op_value_mem_gpu_.resource->Unmap(0, nullptr); + + input_masks_mem_gpu_.resource->Release(); + input_val_mem_gpu_.resource->Release(); + op_policy_mem_gpu_.resource->Release(); + op_value_mem_gpu_.resource->Release(); + + command_allocator_->Release(); + command_list_->Release(); + + if (!uses_policy_map_) delete[] op_policy_mem_final_; + delete[] op_value_mem_final_; +} + +std::unique_ptr MakeDxNetwork(const WeightsFile& weights, + const OptionsDict& options) { + return std::make_unique(weights, options); +} + +REGISTER_NETWORK("dx", MakeDxNetwork, 120) + +} // namespace lczero diff --git a/src/neural/dx/network_dx.h b/src/neural/dx/network_dx.h new file mode 100644 index 0000000000..e2c47091c8 --- /dev/null +++ b/src/neural/dx/network_dx.h @@ -0,0 +1,238 @@ +/* + This file is part of Leela Chess Zero. + Copyright (C) 2020 The LCZero Authors + + Leela Chess is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Leela Chess is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Leela Chess. If not, see . + + Additional permission under GNU GPL version 3 section 7 + + If you modify this Program, or any covered work, by linking or + combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA + Toolkit and the NVIDIA CUDA Deep Neural Network library (or a + modified version of those libraries), containing parts covered by the + terms of the respective license agreement, the licensors of this + Program grant you additional permission to convey the resulting work. +*/ +#pragma once + +#include "dx_common.h" +#include "neural/factory.h" +#include "neural/network_legacy.h" + +// TODO: Consider refactoring common part of this backend's code and cudnn +// backend into some base class(es). + +namespace lczero { + +using namespace dx_backend; +class DxNetwork; + +static constexpr int kNumOutputPolicy = 1858; + +// Padding needed because on some HW (e.g: NV) fp16 requires gemm matrix dimensions +// to be multiples of 8 +static constexpr int kNumOutputPolicyPadded8 = + ((kNumOutputPolicy - 1) / 8 + 1) * 8; + +// Normally 3 when using wdl, and 1 without +static constexpr int kNumOutputValuePadded8 = 8; + +struct InputsOutputsDx { + InputsOutputsDx(int maxBatchSize, DxContext* dx_context, bool wdl, + bool conv_policy, bool fp16); + ~InputsOutputsDx(); + + // Wanted to put these in default heap (video memory, mapped to support CPU writes too). + // - but this isn't supported by DX12 API! + // So right now we have it in upload ueap (system memory mapped for both CPU and GPU). + DXAlloc input_masks_mem_gpu_; + DXAlloc input_val_mem_gpu_; + + // In readback heap (system memory mapped for both CPU and GPU). + DXAlloc op_policy_mem_gpu_; + DXAlloc op_value_mem_gpu_; + + // CPU pointers of the above allocations. + uint64_t* input_masks_mem_; + float* input_val_mem_; + float* op_policy_mem_; + float* op_value_mem_; + + // Separate copy, un-padded and always in fp32 + float* op_policy_mem_final_; + float* op_value_mem_final_; + + // For recording GPU commands. + ID3D12GraphicsCommandList4* command_list_; + ID3D12CommandAllocator* command_allocator_; + + // Always need to reset command list / allocator after first time. + bool needs_reset_; + + const bool uses_policy_map_; +}; + +class DxNetworkComputation : public NetworkComputation { + public: + DxNetworkComputation(DxNetwork* network, bool wdl); + ~DxNetworkComputation(); + + void AddInput(InputPlanes&& input) override; + + void ComputeBlocking() override; + + int GetBatchSize() const override { return batch_size_; } + + float GetQVal(int sample) const override { + if (wdl_) { + auto w = inputs_outputs_->op_value_mem_final_[3 * sample + 0]; + auto l = inputs_outputs_->op_value_mem_final_[3 * sample + 2]; + return w - l; + } else { + return inputs_outputs_->op_value_mem_final_[sample]; + } + } + + float GetDVal(int sample) const override { + if (wdl_) { + auto d = inputs_outputs_->op_value_mem_final_[3 * sample + 1]; + return d; + } else { + return 0.0f; + } + } + + float GetPVal(int sample, int move_id) const override { + return inputs_outputs_ + ->op_policy_mem_final_[sample * kNumOutputPolicy + move_id]; + } + + private: + // Memory holding inputs, outputs. + std::unique_ptr inputs_outputs_; + int batch_size_; + bool wdl_; + + DxNetwork* network_; +}; + +// D3D12 related stuff. +class DxContext { + private: + // Should be enough to hold descriptors for all resources. + static constexpr int kNumDescHeapSlots = 65536; + + ID3D12Device5* device_; + ID3D12CommandQueue* command_queue_; + ID3D12GraphicsCommandList4* command_list_; + ID3D12CommandAllocator* command_allocator_; + ID3D12DescriptorHeap* desc_heap_; + ID3D12Fence* fence_; + uint64_t fence_val_; + ShaderWrapper shader_wrapper_; + + std::atomic next_slot_in_desc_heap_; + + // Scratch space in system memory (used to copy to/from CPU data). + // 256 MB should be enough for uploading weights, etc. + constexpr static size_t kUploadDownloadScratchSize = 256 * 1024 * 1024; + DXAlloc upload_scratch_mem_; + DXAlloc readback_scratch_mem_; + + int gpu_id_; + + public: + DxContext(const OptionsDict& options); + ~DxContext(); + + ID3D12Device5* getDevice() { return device_; } + ID3D12GraphicsCommandList4* getCommandList() { return command_list_; } + ShaderWrapper* getShaderWrapper() { return &shader_wrapper_; } + + // util functions + void CreateAlloc(size_t size, D3D12_HEAP_TYPE type, DXAlloc& alloc, + bool fp16); + void UavBarrier(ID3D12GraphicsCommandList4* cl = nullptr); + uint64_t FlushCL(ID3D12GraphicsCommandList4 *cl = nullptr); + void WaitForGpu(uint64_t fence_val = 0); + void ResetCL(ID3D12GraphicsCommandList4* cl = nullptr, + ID3D12CommandAllocator* ca = nullptr, bool reset = true); + + void FlushAndWait(); + void ScheduleUpload(DXAlloc alloc, const void* data, size_t size); + void DumpFp32(float* buf, int elements); + void CopyTensor(DXAlloc dst, DXAlloc src, int bytes); + void DumpTensor(const char* message, DXAlloc alloc, int size, + bool fp16 = true, bool allnewline = false); + void DumpCpuTensor(void* data, int size, bool fp16 = true, + bool allnewline = false); +}; + +class DxNetwork : public Network { + friend struct InputsOutputsDx; + + public: + DxNetwork(const WeightsFile& file, const OptionsDict& options); + ~DxNetwork(); + + void Eval(InputsOutputsDx* io, int batchSize); + std::unique_ptr NewComputation() override; + const NetworkCapabilities& GetCapabilities() const override { + return capabilities_; + } + + std::unique_ptr GetInputsOutputs(); + void ReleaseInputsOutputs(std::unique_ptr resource); + + private: + const NetworkCapabilities capabilities_; + DxContext dx_context_; + int max_batch_size_; + + // Currently only one NN Eval can happen a time (we can fix this if needed + // by allocating more memory). + mutable std::mutex lock_; + + // Network Properties. + int num_blocks_; + bool has_se_; + bool has_wdl_; + bool has_conv_policy_; + bool fp16_; + + std::vector> network_; + BaseLayer* getLastLayer() { return network_.back().get(); } + + // Unique Metacommands used multiple times in the network. + + // GEMM metacommands needed by winograd algorithm. + std::unique_ptr input_conv_gemm_metacommand_; + std::unique_ptr residual_block_gemm_metacommand_; + std::unique_ptr policy_conv_gemm_metacommand_; + + // Convolution metacommands to directly perform convolution. + // - used only when GEMM metacommand isn't supported. + std::unique_ptr input_conv_metacommand_; + std::unique_ptr resi_block_conv_1_metacommand_; + std::unique_ptr resi_block_conv_2_metacommand_; + std::unique_ptr policy_conv_metacommand_; + + // In device memory. + DXAlloc tensor_mem_[4]; + + mutable std::mutex inputs_outputs_lock_; + std::list> free_inputs_outputs_; +}; + +}; // namespace lczero diff --git a/src/neural/dx/shader_wrapper.cc b/src/neural/dx/shader_wrapper.cc new file mode 100644 index 0000000000..fccdd849b7 --- /dev/null +++ b/src/neural/dx/shader_wrapper.cc @@ -0,0 +1,485 @@ +/* + This file is part of Leela Chess Zero. + Copyright (C) 2020 The LCZero Authors + + Leela Chess is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Leela Chess is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Leela Chess. If not, see . + + Additional permission under GNU GPL version 3 section 7 + + If you modify this Program, or any covered work, by linking or + combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA + Toolkit and the NVIDIA CUDA Deep Neural Network library (or a + modified version of those libraries), containing parts covered by the + terms of the respective license agreement, the licensors of this + Program grant you additional permission to convey the resulting work. +*/ +#include "shader_wrapper.h" +#include +#include +#include "comdef.h" +#include "neural/network.h" +#include "shaders/shader_shared.h" +#include "shaders/shaders.h" + +#define ARR_ELEMENT_COUNT(x) (sizeof(x) / sizeof(x[0])) +namespace lczero { +namespace dx_backend { + +// Helper macros to reduce copy-paste. +#define CREATE_WINOGRAD_SE_PSO(channels) \ + state_desc.CS = {g_output_transform_shader_fp32_se_##channels, \ + sizeof(g_output_transform_shader_fp32_se_##channels)}; \ + ReportDxErrors(device->CreateComputePipelineState( \ + &state_desc, \ + IID_PPV_ARGS(&winograd_output_transform_se_##channels##_))); + +#define CREATE_SE_PSO(channels) \ + state_desc.CS = {g_se_##channels, sizeof(g_se_##channels)}; \ + ReportDxErrors(device->CreateComputePipelineState( \ + &state_desc, IID_PPV_ARGS(&se_##channels##_))); + + +#define SET_WINOGRAD_SE_PSO(channels) \ + command_list->SetPipelineState( \ + winograd_output_transform_se_##channels##_); + +void ShaderWrapper::Init(ID3D12Device* device) { + // Create root signature - common for all shaders. + + // 8+1+8 slots + // slot 0 to 7 -> root UAV slots 0 to 7 (all in space 0) + // slot 8 -> root constants (16 constants - should be enough) + // slot 9 to 16 -> descriptor UAVs of same allocations as slots 0-7, bound + // at shader slots 8-15 + + D3D12_ROOT_PARAMETER root_parameter[kUavSlots + 1 + kUavSlots]; + for (int i = 0; i < kUavSlots; i++) { + root_parameter[i].ParameterType = D3D12_ROOT_PARAMETER_TYPE_UAV; + root_parameter[i].Descriptor.RegisterSpace = 0; + root_parameter[i].Descriptor.ShaderRegister = i; + root_parameter[i].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + } + + root_parameter[kUavSlots].ParameterType = + D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; + root_parameter[kUavSlots].Constants.RegisterSpace = 0; + root_parameter[kUavSlots].Constants.ShaderRegister = 0; + root_parameter[kUavSlots].Constants.Num32BitValues = 16; + root_parameter[kUavSlots].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + + D3D12_DESCRIPTOR_RANGE descRange[kUavSlots] = {}; + for (int i = 0; i < kUavSlots; i++) { + descRange[i].BaseShaderRegister = i + kUavSlots; + descRange[i].NumDescriptors = 1; + descRange[i].OffsetInDescriptorsFromTableStart = 0; + descRange[i].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV; + descRange[i].RegisterSpace = 0; + + root_parameter[i + kUavSlots + 1].ParameterType = + D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + root_parameter[i + kUavSlots + 1].DescriptorTable.NumDescriptorRanges = 1; + root_parameter[i + kUavSlots + 1].DescriptorTable.pDescriptorRanges = + &descRange[i]; + root_parameter[i + kUavSlots + 1].ShaderVisibility = + D3D12_SHADER_VISIBILITY_ALL; + } + + D3D12_ROOT_SIGNATURE_DESC root_sig_desc = {kUavSlots + 1 + kUavSlots, + root_parameter, 0, NULL, + D3D12_ROOT_SIGNATURE_FLAG_NONE}; + + ID3DBlob* serialized_layout = NULL; + D3D12SerializeRootSignature(&root_sig_desc, D3D_ROOT_SIGNATURE_VERSION_1_0, + &serialized_layout, NULL); + + ReportDxErrors(device->CreateRootSignature( + 1, serialized_layout->GetBufferPointer(), + serialized_layout->GetBufferSize(), IID_PPV_ARGS(&root_sign_))); + + serialized_layout->Release(); + + // Create PSO objects for each shader. + // PSO basically holds the compiled shader object. + + // Expand planes shaders. + D3D12_COMPUTE_PIPELINE_STATE_DESC state_desc = {}; + state_desc.pRootSignature = root_sign_; + + state_desc.CS = {g_ExpandPlanes_shader_fp16, + sizeof(g_ExpandPlanes_shader_fp16)}; + ReportDxErrors(device->CreateComputePipelineState( + &state_desc, IID_PPV_ARGS(&expand_planes_fp16_))); + + state_desc.CS = {g_ExpandPlanes_shader_fp32, + sizeof(g_ExpandPlanes_shader_fp32)}; + ReportDxErrors(device->CreateComputePipelineState( + &state_desc, IID_PPV_ARGS(&expand_planes_fp32_))); + + // Winograd Input Transform shader. + state_desc.CS = {g_input_transform_shader_fp32, + sizeof(g_input_transform_shader_fp32)}; + ReportDxErrors(device->CreateComputePipelineState( + &state_desc, IID_PPV_ARGS(&winograd_input_transform_))); + + // Winograd Output Transform shader. + state_desc.CS = {g_output_transform_shader_fp32, + sizeof(g_output_transform_shader_fp32)}; + ReportDxErrors(device->CreateComputePipelineState( + &state_desc, IID_PPV_ARGS(&winograd_output_transform_))); + + // 1x1 convolution shader. + state_desc.CS = {g_conv_1x1_shader_fp32, sizeof(g_conv_1x1_shader_fp32)}; + ReportDxErrors(device->CreateComputePipelineState( + &state_desc, IID_PPV_ARGS(&conv_1x1_))); + + // policy map shader. + state_desc.CS = {g_policy_map_shader_fp32, sizeof(g_policy_map_shader_fp32)}; + ReportDxErrors(device->CreateComputePipelineState( + &state_desc, IID_PPV_ARGS(&policy_map_))); + + state_desc.CS = {g_MatrixMul_Fp32, sizeof(g_MatrixMul_Fp32)}; + ReportDxErrors(device->CreateComputePipelineState(&state_desc, + IID_PPV_ARGS(&gemm_))); + + // Add vectors shader. + state_desc.CS = {g_add_vectors_shader, sizeof(g_add_vectors_shader)}; + ReportDxErrors(device->CreateComputePipelineState( + &state_desc, IID_PPV_ARGS(&add_vectors_))); + + // Various SE shaders. + CREATE_SE_PSO(128); + CREATE_SE_PSO(256); + CREATE_SE_PSO(320); + CREATE_SE_PSO(384); + CREATE_SE_PSO(512); + CREATE_SE_PSO(640); + CREATE_SE_PSO(768); + CREATE_SE_PSO(1024); + + // Various output-transform fused with SE shaders. + CREATE_WINOGRAD_SE_PSO(128) + CREATE_WINOGRAD_SE_PSO(256) + CREATE_WINOGRAD_SE_PSO(320) + CREATE_WINOGRAD_SE_PSO(384) + CREATE_WINOGRAD_SE_PSO(512) + CREATE_WINOGRAD_SE_PSO(640) + CREATE_WINOGRAD_SE_PSO(768) + CREATE_WINOGRAD_SE_PSO(1024) +} + +void ShaderWrapper::Destroy() { + expand_planes_fp16_->Release(); + expand_planes_fp32_->Release(); + + winograd_input_transform_->Release(); + winograd_output_transform_->Release(); + conv_1x1_->Release(); + policy_map_->Release(); + gemm_->Release(); + add_vectors_->Release(); + + se_128_->Release(); + se_256_->Release(); + se_320_->Release(); + se_384_->Release(); + se_512_->Release(); + se_640_->Release(); + se_768_->Release(); + se_1024_->Release(); + + winograd_output_transform_se_128_->Release(); + winograd_output_transform_se_256_->Release(); + winograd_output_transform_se_320_->Release(); + winograd_output_transform_se_384_->Release(); + winograd_output_transform_se_512_->Release(); + winograd_output_transform_se_640_->Release(); + winograd_output_transform_se_768_->Release(); + winograd_output_transform_se_1024_->Release(); + + root_sign_->Release(); +} + +void ShaderWrapper::ExpandPlanes(ID3D12GraphicsCommandList4* command_list, + DXAlloc output_tensor, DXAlloc masks, + DXAlloc values, int batchSize, bool fp16) { + const int N = batchSize * kInputPlanes; + int consts[] = {N, kInputPlanes}; + command_list->SetComputeRootSignature(root_sign_); + command_list->SetPipelineState(fp16 ? expand_planes_fp16_ + : expand_planes_fp32_); + command_list->SetComputeRootUnorderedAccessView(0, output_tensor.gpu_va); + command_list->SetComputeRootUnorderedAccessView(1, masks.gpu_va); + command_list->SetComputeRootUnorderedAccessView(2, values.gpu_va); + command_list->SetComputeRoot32BitConstants(kUavSlots, 2, &consts, 0); + + int elements = batchSize * kInputPlanes * 8 * 8; + int blocks = DivUp(elements, kExpandPlanesElementsPerBlock); + command_list->Dispatch(blocks, 1, 1); +} + +void ShaderWrapper::InputTransform(ID3D12GraphicsCommandList4* command_list, + DXAlloc transformed_input, DXAlloc input, + int N, int C, bool /*fp16*/) { + int consts[] = {N, C}; + command_list->SetComputeRootSignature(root_sign_); + command_list->SetPipelineState(winograd_input_transform_); + command_list->SetComputeRootUnorderedAccessView(0, input.gpu_va); + command_list->SetComputeRootUnorderedAccessView(1, transformed_input.gpu_va); + command_list->SetComputeRoot32BitConstants( + kUavSlots, ARR_ELEMENT_COUNT(consts), &consts, 0); + command_list->SetComputeRootDescriptorTable(kUavSlots + 1 + 0, + input.desc_handle_vector); + command_list->SetComputeRootDescriptorTable( + kUavSlots + 1 + 1, transformed_input.desc_handle_scalar); + + int blocks = DivUp(N * C, kWinogradTransformShaderBlockSize); + command_list->Dispatch(blocks, 1, 1); +} + +void ShaderWrapper::Se(ID3D12GraphicsCommandList4* command_list, DXAlloc output, + DXAlloc input, DXAlloc skip_connection, DXAlloc bias, + DXAlloc se_w1, DXAlloc se_b1, DXAlloc se_w2, + DXAlloc se_b2, int N, int K, bool relu, bool bias_add, + bool skip_add, int se_k, bool /*fp16*/) { + int consts[] = {N, K, relu, bias_add, skip_add, se_k}; + command_list->SetComputeRootSignature(root_sign_); + command_list->SetComputeRootUnorderedAccessView(0, input.gpu_va); + command_list->SetComputeRootUnorderedAccessView(1, output.gpu_va); + if (bias_add) command_list->SetComputeRootUnorderedAccessView(2, bias.gpu_va); + if (skip_add) + command_list->SetComputeRootUnorderedAccessView(3, skip_connection.gpu_va); + command_list->SetComputeRootUnorderedAccessView(4, se_w1.gpu_va); + command_list->SetComputeRootUnorderedAccessView(5, se_b1.gpu_va); + command_list->SetComputeRootUnorderedAccessView(6, se_w2.gpu_va); + command_list->SetComputeRootUnorderedAccessView(7, se_b2.gpu_va); + + command_list->SetComputeRoot32BitConstants( + kUavSlots, ARR_ELEMENT_COUNT(consts), &consts, 0); + + command_list->SetComputeRootDescriptorTable( + kUavSlots + 1 + 0, input.desc_handle_vector); + command_list->SetComputeRootDescriptorTable(kUavSlots + 1 + 1, + output.desc_handle_vector); + if (bias_add) + command_list->SetComputeRootDescriptorTable(kUavSlots + 1 + 2, + bias.desc_handle_scalar); + if (skip_add) + command_list->SetComputeRootDescriptorTable( + kUavSlots + 1 + 3, skip_connection.desc_handle_vector); + + command_list->SetComputeRootDescriptorTable(kUavSlots + 1 + 4, + se_w1.desc_handle_scalar); + command_list->SetComputeRootDescriptorTable(kUavSlots + 1 + 5, + se_b1.desc_handle_scalar); + command_list->SetComputeRootDescriptorTable(kUavSlots + 1 + 6, + se_w2.desc_handle_scalar); + command_list->SetComputeRootDescriptorTable(kUavSlots + 1 + 7, + se_b2.desc_handle_scalar); + + int blocks = N; + if (K <= 128) + command_list->SetPipelineState(se_128_); + else if (K <= 256) + command_list->SetPipelineState(se_256_); + else if (K <= 320) + command_list->SetPipelineState(se_320_); + else if (K <= 384) + command_list->SetPipelineState(se_384_); + else if (K <= 512) + command_list->SetPipelineState(se_512_); + else if (K <= 640) + command_list->SetPipelineState(se_640_); + else if (K <= 768) + command_list->SetPipelineState(se_768_); + else if (K <= 1024) + command_list->SetPipelineState(se_1024_); + else + throw Exception("Unsupported channel count for SE"); + + command_list->Dispatch(blocks, 1, 1); +} + +void ShaderWrapper::OutputTransform(ID3D12GraphicsCommandList4* command_list, + DXAlloc output, DXAlloc transformed_output, + DXAlloc skip_connection, DXAlloc bias, + DXAlloc se_w1, DXAlloc se_b1, DXAlloc se_w2, + DXAlloc se_b2, int N, int K, bool relu, + bool bias_add, bool skip_add, bool fused_se, + int se_k, bool /*fp16*/) { + int consts[] = {N, K, relu, bias_add, skip_add, fused_se, se_k}; + command_list->SetComputeRootSignature(root_sign_); + command_list->SetComputeRootUnorderedAccessView(0, transformed_output.gpu_va); + command_list->SetComputeRootUnorderedAccessView(1, output.gpu_va); + if (bias_add) command_list->SetComputeRootUnorderedAccessView(2, bias.gpu_va); + if (skip_add) + command_list->SetComputeRootUnorderedAccessView(3, skip_connection.gpu_va); + if (fused_se) { + command_list->SetComputeRootUnorderedAccessView(4, se_w1.gpu_va); + command_list->SetComputeRootUnorderedAccessView(5, se_b1.gpu_va); + command_list->SetComputeRootUnorderedAccessView(6, se_w2.gpu_va); + command_list->SetComputeRootUnorderedAccessView(7, se_b2.gpu_va); + } + command_list->SetComputeRoot32BitConstants( + kUavSlots, ARR_ELEMENT_COUNT(consts), &consts, 0); + + command_list->SetComputeRootDescriptorTable( + kUavSlots + 1 + 0, transformed_output.desc_handle_scalar); + command_list->SetComputeRootDescriptorTable(kUavSlots + 1 + 1, + output.desc_handle_vector); + if (bias_add) + command_list->SetComputeRootDescriptorTable(kUavSlots + 1 + 2, + bias.desc_handle_scalar); + if (skip_add) + command_list->SetComputeRootDescriptorTable( + kUavSlots + 1 + 3, skip_connection.desc_handle_vector); + if (fused_se) { + command_list->SetComputeRootDescriptorTable(kUavSlots + 1 + 4, + se_w1.desc_handle_scalar); + command_list->SetComputeRootDescriptorTable(kUavSlots + 1 + 5, + se_b1.desc_handle_scalar); + command_list->SetComputeRootDescriptorTable(kUavSlots + 1 + 6, + se_w2.desc_handle_scalar); + command_list->SetComputeRootDescriptorTable(kUavSlots + 1 + 7, + se_b2.desc_handle_scalar); + } + + int blocks = 0; + if (fused_se) { + blocks = N; + if (K <= 128) + SET_WINOGRAD_SE_PSO(128) + else if (K <= 256) + SET_WINOGRAD_SE_PSO(256) + else if (K <= 320) + SET_WINOGRAD_SE_PSO(320) + else if (K <= 384) + SET_WINOGRAD_SE_PSO(384) + else if (K <= 512) + SET_WINOGRAD_SE_PSO(512) + else if (K <= 640) + SET_WINOGRAD_SE_PSO(640) + else if (K <= 768) + SET_WINOGRAD_SE_PSO(768) + else if (K <= 1024) + SET_WINOGRAD_SE_PSO(1024) + else + throw Exception("Unsupported channel count for SE"); + + } else { + blocks = DivUp(N * K, kWinogradTransformShaderBlockSize); + command_list->SetPipelineState(winograd_output_transform_); + } + + command_list->Dispatch(blocks, 1, 1); +} + +void ShaderWrapper::Conv1x1(ID3D12GraphicsCommandList4* command_list, + DXAlloc output, DXAlloc input, DXAlloc weight, + DXAlloc bias, int N, int C, int K, bool relu, + bool useBias, bool /*fp16*/) { + int consts[] = {N, K, C, useBias, relu}; + command_list->SetComputeRootSignature(root_sign_); + command_list->SetPipelineState(conv_1x1_); + command_list->SetComputeRootUnorderedAccessView(0, output.gpu_va); + command_list->SetComputeRootUnorderedAccessView(1, input.gpu_va); + command_list->SetComputeRootUnorderedAccessView(2, weight.gpu_va); + command_list->SetComputeRootUnorderedAccessView(3, bias.gpu_va); + command_list->SetComputeRoot32BitConstants( + kUavSlots, ARR_ELEMENT_COUNT(consts), &consts, 0); + + command_list->SetComputeRootDescriptorTable(kUavSlots + 1 + 0, + output.desc_handle_scalar); + command_list->SetComputeRootDescriptorTable(kUavSlots + 1 + 1, + input.desc_handle_scalar); + command_list->SetComputeRootDescriptorTable(kUavSlots + 1 + 2, + weight.desc_handle_scalar); + command_list->SetComputeRootDescriptorTable(kUavSlots + 1 + 3, + bias.desc_handle_scalar); + + command_list->Dispatch(K, N, 1); +} + +void ShaderWrapper::AddVectors(ID3D12GraphicsCommandList4* command_list, + DXAlloc C, DXAlloc A, DXAlloc B, int c_size, + int a_size, int b_size, bool relu, bool tanh, + bool fp16) { + if (fp16) { + // Shader handles 2 elements per thread in fp16 mode. + assert(a_size % 2 == 0); + assert(b_size % 2 == 0); + assert(c_size % 2 == 0); + a_size /= 2; + b_size /= 2; + c_size /= 2; + } + int consts[] = {a_size, b_size, c_size, relu, tanh, fp16}; + command_list->SetComputeRootSignature(root_sign_); + command_list->SetPipelineState(add_vectors_); + command_list->SetComputeRootUnorderedAccessView(0, A.gpu_va); + command_list->SetComputeRootUnorderedAccessView(1, B.gpu_va); + command_list->SetComputeRootUnorderedAccessView(2, C.gpu_va); + command_list->SetComputeRoot32BitConstants( + kUavSlots, ARR_ELEMENT_COUNT(consts), &consts, 0); + + int blocks = DivUp(c_size, kAddVectorsBlockSize); + command_list->Dispatch(blocks, 1, 1); +} + +void ShaderWrapper::PolicyMap(ID3D12GraphicsCommandList4* command_list, + DXAlloc output, DXAlloc input, DXAlloc weights, + int N, int input_size, int output_size, + int used_size, bool /*fp16*/) { + int consts[] = {N, input_size, used_size, output_size}; + command_list->SetComputeRootSignature(root_sign_); + command_list->SetPipelineState(policy_map_); + command_list->SetComputeRootUnorderedAccessView(0, input.gpu_va); + command_list->SetComputeRootUnorderedAccessView(1, output.gpu_va); + command_list->SetComputeRootUnorderedAccessView(2, weights.gpu_va); + command_list->SetComputeRoot32BitConstants(kUavSlots, ARR_ELEMENT_COUNT(consts), &consts, 0); + command_list->SetComputeRootDescriptorTable(kUavSlots+1, input.desc_handle_scalar); + + int blocks = DivUp(N * used_size, kPolicyMapBlockSize); + command_list->Dispatch(blocks, 1, 1); +} + +void ShaderWrapper::MatrixMultiply(ID3D12GraphicsCommandList4* command_list, + DXAlloc output, DXAlloc A, DXAlloc B, int M, + int N, int K, int batch, bool /*fp16*/) { + int Consts[] = {M, N, K, batch}; + command_list->SetComputeRootSignature(root_sign_); + + command_list->SetPipelineState(gemm_); + + command_list->SetComputeRootUnorderedAccessView(0, A.gpu_va); + command_list->SetComputeRootUnorderedAccessView(1, B.gpu_va); + command_list->SetComputeRootUnorderedAccessView(2, output.gpu_va); + command_list->SetComputeRoot32BitConstants( + kUavSlots, ARR_ELEMENT_COUNT(Consts), &Consts, 0); + command_list->SetComputeRootDescriptorTable(kUavSlots + 1 + 0, + A.desc_handle_vector); + command_list->SetComputeRootDescriptorTable(kUavSlots + 1 + 1, + B.desc_handle_vector); + command_list->SetComputeRootDescriptorTable(kUavSlots + 1 + 2, + output.desc_handle_vector); + + + int blocksX = DivUp(N, kGemmElPerBlockX); + int blocksY = DivUp(M, kGemmElPerBlockY); + int blocksZ = batch; + + command_list->Dispatch(blocksX, blocksY, blocksZ); +} + +} // namespace dx_backend +} // namespace lczero diff --git a/src/neural/dx/shader_wrapper.h b/src/neural/dx/shader_wrapper.h new file mode 100644 index 0000000000..ac30a6d68a --- /dev/null +++ b/src/neural/dx/shader_wrapper.h @@ -0,0 +1,140 @@ +/* + This file is part of Leela Chess Zero. + Copyright (C) 2020 The LCZero Authors + + Leela Chess is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Leela Chess is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Leela Chess. If not, see . + + Additional permission under GNU GPL version 3 section 7 + + If you modify this Program, or any covered work, by linking or + combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA + Toolkit and the NVIDIA CUDA Deep Neural Network library (or a + modified version of those libraries), containing parts covered by the + terms of the respective license agreement, the licensors of this + Program grant you additional permission to convey the resulting work. +*/ + +#pragma once +#include "dx_common.h" + +namespace lczero { +namespace dx_backend { + +class ShaderWrapper { + private: + ID3D12RootSignature* root_sign_; + static constexpr int kUavSlots = 8; + + // Various shaders used by the backend: + // + // 1. Expand planes: Used to convert packed bit-board representation to + // 'planes' that is input to NN + // 2. Winograd Input transform. + // 3. Winograd Output transform. + // - Fused with bias add, skip connection add, and relu. + // - Fused with SE, bias add, skip connection add and relu. + // 4. Policy Map layer. (can also be done on CPU side) + // 5. 1x1 convolution custom kernel (used by policy and value heads). + // - TODO: Try replacing this with conv metacommand when available. + // + // For best performance it would seem that we would need two copies of all the + // shaders - fp16 and fp32 versions. However 2 copies not needed for now, as: + // i) We should use typed UAVs for resource access as they seem to be + // faster. With Typed UAVs, the shader automatically recieves datatype converted + // values (e.g: in fp32 even when the allocation was in fp16) + // ii) Most of these operations are memory bound - except for the GEMM, but: + // iii) Due to driver/compiler bugs or lack of optimizations fp16 path seems + // slower on both Nvidia and AMD for most of the shaders - even GEMM on AMD + // is way slower with fp16 math than with fp32. + + // Only expand planes has different shaders for different datatypes. + // - Mostly a meaningless 'early' optimization as this shouldn't be the bottleneck. + ID3D12PipelineState* expand_planes_fp16_; + ID3D12PipelineState* expand_planes_fp32_; + + ID3D12PipelineState* winograd_input_transform_; + ID3D12PipelineState* winograd_output_transform_; + ID3D12PipelineState* conv_1x1_; + ID3D12PipelineState* policy_map_; + + // Gemm shaders (used when gemm Metacommand isn't supported by the HW vendor) + ID3D12PipelineState* gemm_; + + // Another simple shader to add bias, apply relu/tanh, etc. + ID3D12PipelineState* add_vectors_; + + // Fused SE shaders for various standard channel counts. + ID3D12PipelineState* se_128_; + ID3D12PipelineState* se_256_; + ID3D12PipelineState* se_320_; + ID3D12PipelineState* se_384_; + ID3D12PipelineState* se_512_; + ID3D12PipelineState* se_640_; + ID3D12PipelineState* se_768_; + ID3D12PipelineState* se_1024_; + + // Winograd output transform fused with SE for various standard channel + // counts. + ID3D12PipelineState* winograd_output_transform_se_128_; + ID3D12PipelineState* winograd_output_transform_se_256_; + ID3D12PipelineState* winograd_output_transform_se_320_; + ID3D12PipelineState* winograd_output_transform_se_384_; + ID3D12PipelineState* winograd_output_transform_se_512_; + ID3D12PipelineState* winograd_output_transform_se_640_; + ID3D12PipelineState* winograd_output_transform_se_768_; + ID3D12PipelineState* winograd_output_transform_se_1024_; + + public: + void Init(ID3D12Device* pDevice); + void Destroy(); + + void ExpandPlanes(ID3D12GraphicsCommandList4* command_list, + DXAlloc output_tensor, DXAlloc masks, DXAlloc values, + int batchSize, bool fp16); + + void InputTransform(ID3D12GraphicsCommandList4* command_list, + DXAlloc transformed_input, DXAlloc input, int N, int C, + bool fp16); + + void OutputTransform(ID3D12GraphicsCommandList4* command_list, DXAlloc output, + DXAlloc transformed_output, DXAlloc skip_connection, + DXAlloc bias, DXAlloc se_w1, DXAlloc se_b1, + DXAlloc se_w2, DXAlloc se_b2, int N, int K, bool relu, + bool bias_add, bool skip_add, bool fused_se, int se_k, + bool fp16); + + void Se(ID3D12GraphicsCommandList4* command_list, DXAlloc output, + DXAlloc input, DXAlloc skip_connection, DXAlloc bias, DXAlloc se_w1, + DXAlloc se_b1, DXAlloc se_w2, DXAlloc se_b2, int N, int K, bool relu, + bool bias_add, bool skip_add, int se_k, bool fp16); + + void Conv1x1(ID3D12GraphicsCommandList4* command_list, DXAlloc output, + DXAlloc input, DXAlloc weight, DXAlloc bias, int N, int C, int K, + bool relu, bool useBias, bool fp16); + + void AddVectors(ID3D12GraphicsCommandList4* command_list, DXAlloc C, + DXAlloc A, DXAlloc B, int c_size, int b_size, int a_size, + bool relu, bool tanh, bool fp16); + + void PolicyMap(ID3D12GraphicsCommandList4* command_list, DXAlloc output, + DXAlloc input, DXAlloc weights, int N, int input_size, + int output_size, int used_size, bool fp16); + + void MatrixMultiply(ID3D12GraphicsCommandList4* command_list, DXAlloc output, + DXAlloc A, DXAlloc B, int M, int N, int K, int batch, + bool fp16); +}; + +} // namespace dx_backend +} // namespace lczero diff --git a/src/neural/dx/shaders/AddVectors.hlsl b/src/neural/dx/shaders/AddVectors.hlsl new file mode 100644 index 0000000000..9f92c35378 --- /dev/null +++ b/src/neural/dx/shaders/AddVectors.hlsl @@ -0,0 +1,87 @@ +/* + This file is part of Leela Chess Zero. + Copyright (C) 2020 The LCZero Authors + + Leela Chess is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Leela Chess is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Leela Chess. If not, see . + + Additional permission under GNU GPL version 3 section 7 + + If you modify this Program, or any covered work, by linking or + combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA + Toolkit and the NVIDIA CUDA Deep Neural Network library (or a + modified version of those libraries), containing parts covered by the + terms of the respective license agreement, the licensors of this + Program grant you additional permission to convey the resulting work. +*/ + +#include "shader_shared.h" + +// ------------------- Add Vectors kernel -----------------------------// +// +// C = act(A + B) +// A and B can have different lengths, mod size is used to pick the required +// element. +// fp16 version processes 2 elements at a time. + +RWStructuredBuffer A : register(u0); +RWStructuredBuffer B : register(u1); +RWStructuredBuffer C : register(u2); + +cbuffer AddVectorConsts : register(b0) { + // sizes are /2 for fp16 + uint a_size; + uint b_size; + uint c_size; + uint relu; + uint act_tanh; + uint fp16; +}; + +float2 extractElements(uint packedVal) { + return float2(f16tof32(packedVal & 0xFFFF), + f16tof32((packedVal >> 16) & 0xFFFF)); +} + +[numthreads(kAddVectorsBlockSize, 1, 1)] +void add_vectors_shader +( + uint3 globalThreadIdx : SV_DispatchThreadID +) +{ + int index = globalThreadIdx.x; + if (index >= c_size) return; + uint a = A[index % a_size]; + uint b = B[index % b_size]; + uint opVal; + if (fp16) { + float2 f2a = extractElements(a); + float2 f2b = extractElements(b); + float2 f2c = f2a + f2b; + if (relu) { + if (f2c.x < 0) f2c.x = 0; + if (f2c.y < 0) f2c.y = 0; + } + if (act_tanh) { + f2c = tanh(f2c); + } + uint2 opu = f32tof16(f2c); + opVal = opu.x | (opu.y << 16); + } else { + float c = asfloat(a) + asfloat(b); + if (relu && c < 0) c = 0; + if (act_tanh) c = tanh(c); + opVal = asuint(c); + } + C[index] = opVal; +} diff --git a/src/neural/dx/shaders/Conv1x1.hlsl b/src/neural/dx/shaders/Conv1x1.hlsl new file mode 100644 index 0000000000..039fe1a90a --- /dev/null +++ b/src/neural/dx/shaders/Conv1x1.hlsl @@ -0,0 +1,93 @@ +/* + This file is part of Leela Chess Zero. + Copyright (C) 2020 The LCZero Authors + + Leela Chess is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Leela Chess is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Leela Chess. If not, see . + + Additional permission under GNU GPL version 3 section 7 + + If you modify this Program, or any covered work, by linking or + combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA + Toolkit and the NVIDIA CUDA Deep Neural Network library (or a + modified version of those libraries), containing parts covered by the + terms of the respective license agreement, the licensors of this + Program grant you additional permission to convey the resulting work. +*/ + +#include "shader_shared.h" + +// ------------------- Simple 1x1 convolution shader -----------------------------// +RWBuffer output : register(u8); +RWBuffer input : register(u9); +RWBuffer filter : register(u10); +RWBuffer bias : register(u11); + +cbuffer ConvConsts : register(b0) { + uint N, K, C; + uint useBias; + uint relu; +}; + + +#define MAX_FILTERS 1024 +groupshared float sh_filter[MAX_FILTERS]; +groupshared float sh_bias; + + +// N*K thread blocks launched (groupIdx.y, and groupIdx.x resp.) +// Each block has 64 (8x8) thread. +// Each thread writes single output element. +[numthreads(kConv1x1BlockSize, 1, 1)] +#if FP16_IO == 1 +void conv_1x1_shader_fp16 +#else +void conv_1x1_shader_fp32 +#endif +( + uint3 gtid : SV_DispatchThreadID, + uint3 tid : SV_GroupThreadID, + uint3 gid : SV_GroupID +) +{ + int k = gid.x; + int n = gid.y; + + // load bias into shared memory + if (tid.x == 0) + sh_bias = useBias ? bias[k] : 0; + + // load filter into shared memory + const int iterations = (C - 1) / kConv1x1BlockSize + 1; + for (int i = 0; i < iterations; i++) + { + int c = i * kConv1x1BlockSize + tid.x; + if (c < C) + sh_filter[c] = filter[k * C + c]; + } + + GroupMemoryBarrierWithGroupSync(); + + float op = sh_bias; + for (int c = 0; c < C; c++) + { + float ip = input[n * C * 64 + c * 64 + tid.x]; + float filter = sh_filter[c]; + op += ip * filter; + } + + if (relu && op < 0) op = 0; + + output[n * K * 64 + k * 64 + tid.x] = op; +} + diff --git a/src/neural/dx/shaders/ExpandPlanes.hlsl b/src/neural/dx/shaders/ExpandPlanes.hlsl new file mode 100644 index 0000000000..ba65b5ab49 --- /dev/null +++ b/src/neural/dx/shaders/ExpandPlanes.hlsl @@ -0,0 +1,130 @@ +/* + This file is part of Leela Chess Zero. + Copyright (C) 2020 The LCZero Authors + + Leela Chess is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Leela Chess is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Leela Chess. If not, see . + + Additional permission under GNU GPL version 3 section 7 + + If you modify this Program, or any covered work, by linking or + combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA + Toolkit and the NVIDIA CUDA Deep Neural Network library (or a + modified version of those libraries), containing parts covered by the + terms of the respective license agreement, the licensors of this + Program grant you additional permission to convey the resulting work. +*/ + +#include "shader_shared.h" + +// ------------------- Expand Planes Shader -----------------------------// + + +RWStructuredBuffer output_fp32 : register(u0); +RWStructuredBuffer output_fp16 : register(u0); +RWStructuredBuffer masks : register(u1); +RWStructuredBuffer values : register(u2); + +cbuffer ExpandPlanesConsts : register(b0) { + uint N; // total no of planes to process + uint kInputPlanes; // no of planes per position +}; + + +// Block size of 256, same mask/val for 64 consecutive threads. +#define kNumShmemElements (kExpandPlanesElementsPerBlock / 64) +groupshared uint64_t sh_masks[kNumShmemElements]; +groupshared float sh_vals[kNumShmemElements]; + +[numthreads(kExpandPlanesFp32BlockSize, 1, 1)] +void ExpandPlanes_shader_fp32 +( + uint3 globalThreadIdx : SV_DispatchThreadID, + uint3 threadIdxInGroup : SV_GroupThreadID +) +{ + + int global_index = globalThreadIdx.x; + int local_index = threadIdxInGroup.x; + + int plane_index = global_index >> 6; + + if (plane_index >= N) return; + + // Load inputs to shared memory. + if (local_index < kNumShmemElements) { + sh_masks[local_index] = masks[plane_index + local_index]; + sh_vals[local_index] = values[plane_index + local_index]; + } + + GroupMemoryBarrierWithGroupSync(); + + uint64_t mask = sh_masks[local_index >> 6]; + + int sq_index = global_index & 0x3F; + float op = 0; + + bool set = !!(mask & (1ull << sq_index)); + if (set) { + op = sh_vals[local_index >> 6]; + } + output_fp32[global_index] = op; +} + + +// every thread writes two consecutive elements +// NCHW means that the consecutive elements are in W dimension +[numthreads(kExpandPlanesFp16BlockSize, 1, 1)] +void ExpandPlanes_shader_fp16 +( + uint3 globalThreadIdx : SV_DispatchThreadID, + uint3 threadIdxInGroup : SV_GroupThreadID +) +{ + int global_index = globalThreadIdx.x * 2; + int local_index = threadIdxInGroup.x * 2; + + int plane_index = global_index >> 6; + + if (plane_index >= N) return; + + // Load inputs to shared memory. + if (threadIdxInGroup.x < kNumShmemElements) { + sh_masks[threadIdxInGroup.x] = masks[plane_index + threadIdxInGroup.x]; + sh_vals[threadIdxInGroup.x] = values[plane_index + threadIdxInGroup.x]; + } + + GroupMemoryBarrierWithGroupSync(); + + uint64_t mask = sh_masks[local_index >> 6]; + + int sq_index0 = global_index & 0x3F; + int sq_index1 = sq_index0 + 1; + + bool set0 = !!(mask & (1ull << sq_index0)); + bool set1 = !!(mask & (1ull << sq_index1)); + + float2 opf = 0; + + if (set0) { + opf.x = sh_vals[local_index >> 6]; + } + + if (set1) { + opf.y = sh_vals[local_index >> 6]; + } + + uint2 opu = f32tof16(opf); + uint opVal = opu.x | (opu.y << 16); + output_fp16[globalThreadIdx.x] = opVal; +} diff --git a/src/neural/dx/shaders/Gemm.hlsl b/src/neural/dx/shaders/Gemm.hlsl new file mode 100644 index 0000000000..cd080e1e1f --- /dev/null +++ b/src/neural/dx/shaders/Gemm.hlsl @@ -0,0 +1,206 @@ +/* + This file is part of Leela Chess Zero. + Copyright (C) 2020 The LCZero Authors + + Leela Chess is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Leela Chess is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Leela Chess. If not, see . + + Additional permission under GNU GPL version 3 section 7 + + If you modify this Program, or any covered work, by linking or + combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA + Toolkit and the NVIDIA CUDA Deep Neural Network library (or a + modified version of those libraries), containing parts covered by the + terms of the respective license agreement, the licensors of this + Program grant you additional permission to convey the resulting work. +*/ + +#include "shader_shared.h" + +// ------------------- Matrix Multiply Shader -------------------------// + +#define MAT_A_INDEX(b, y, x) ((b)*M*K + (y)*K + (x)) +#define MAT_B_INDEX(b, y, x) ((b)*K*N + (y)*N + (x)) +#define MAT_C_INDEX(b, y, x) ((b)*M*N + (y)*N + (x)) + +#if USE_FP16_MATH == 1 +RWBuffer matrixA : register(u8); +RWBuffer matrixB : register(u9); +RWBuffer matrixC : register(u10); +#else +RWBuffer matrixA : register(u8); +RWBuffer matrixB : register(u9); +RWBuffer matrixC : register(u10); +#endif + +cbuffer consts : register(b0) { + uint M, N, K; + uint batch; +}; + +// All matrices are row-major. + + +// Use shared memory to load inputs. +// Also multiple elements per thread. + +// Double-buffered shared memory buffers +// (so that the GPU can overlap loads into shared memory with the math) +#if USE_FP16_MATH==1 +groupshared float16_t sharedA[2][kGemmElPerBlockY][kGemmShMemKChunk]; +groupshared float16_t sharedB[2][kGemmShMemKChunk][kGemmElPerBlockX]; +#else +groupshared float sharedA[2][kGemmElPerBlockY][kGemmShMemKChunk]; +groupshared float sharedB[2][kGemmShMemKChunk][kGemmElPerBlockX]; +#endif + +#define divUp(a, b) (((a)-1)/(b) + 1) + +void loadShmemBuffers(int batch, int hs, int ws, int ks, int tidy, int tidx, int bufIndex) +{ +#if USE_FP16_MATH==1 + float16_t4 temp; +#else + float4 temp; +#endif + + const int iterationsA = divUp((kGemmElPerBlockY * kGemmShMemKChunk), + (kGemmBlockWidth * kGemmBlockHeight * 4)); + int i; + [unroll] + for (i = 0; i < iterationsA; i++) + { + int index = (kGemmBlockWidth * kGemmBlockHeight) * i + + kGemmBlockHeight * tidy + tidx; + index *= 4; + int lx = index % kGemmShMemKChunk; + int ly = index / kGemmShMemKChunk; + if ((hs + ly < M) && (ks + lx < K)) + { + temp = matrixA[MAT_A_INDEX(batch, hs + ly, ks + lx) / 4]; + } + else + { + temp = 0; + } + sharedA[bufIndex][ly][lx + 0] = temp.x; + sharedA[bufIndex][ly][lx + 1] = temp.y; + sharedA[bufIndex][ly][lx + 2] = temp.z; + sharedA[bufIndex][ly][lx + 3] = temp.w; + + } + + const int iterationsB = divUp((kGemmShMemKChunk * kGemmElPerBlockX), + (kGemmBlockWidth * kGemmBlockHeight * 4)); + [unroll] + for (i = 0; i < iterationsB; i++) + { + int index = (kGemmBlockWidth * kGemmBlockHeight) * i + + kGemmBlockHeight * tidy + tidx; + index *= 4; + int lx = index % kGemmElPerBlockX; + int ly = index / kGemmElPerBlockX; + if ((ks + ly < K) && (ws + lx < N)) + { + temp = matrixB[MAT_B_INDEX(batch, ks + ly, ws + lx) / 4]; + } + else + { + temp = 0; + } + sharedB[bufIndex][ly][lx + 0] = temp.x; + sharedB[bufIndex][ly][lx + 1] = temp.y; + sharedB[bufIndex][ly][lx + 2] = temp.z; + sharedB[bufIndex][ly][lx + 3] = temp.w; + } +} + + +[numthreads(kGemmBlockWidth, kGemmBlockHeight, 1)] +void MatrixMul( + uint3 g_tid : SV_DispatchThreadID, + uint3 gid : SV_GroupID, + uint3 l_tid : SV_GroupThreadID +) +{ + int x, y; + +#if USE_FP16_MATH==1 + float16_t S[kGemmElPerThreadY][kGemmElPerThreadX]; +#else + float S[kGemmElPerThreadY][kGemmElPerThreadX]; +#endif + + [unroll] for (y = 0; y < kGemmElPerThreadY; y++) + [unroll] for (x = 0; x < kGemmElPerThreadX; x++) + S[y][x] = 0; + + int wStartThread = g_tid.x * kGemmElPerThreadX; + int hStartThread = g_tid.y * kGemmElPerThreadY; + + int wStartBlock = gid.x * kGemmBlockWidth * kGemmElPerThreadX; + int hStartBlock = gid.y * kGemmBlockHeight * kGemmElPerThreadY; + + for (int ks = 0, index = 0; ks < K; ks += kGemmShMemKChunk, index++) + { + int shIndex = index & 1; + // Load chunks of matrices A and B into shared memory. + loadShmemBuffers(gid.z, hStartBlock, wStartBlock, ks, l_tid.y, l_tid.x, shIndex); + + GroupMemoryBarrierWithGroupSync(); + + // Do the Multiplication for the Tile. + // Removing this unroll improves performance on Nvidia Turing but makes it slightly slower on AMD Vega 7. + [unroll] + for (int k = 0; k < kGemmShMemKChunk; k++) + [unroll] + for (y = 0; y < kGemmElPerThreadY; y++) + [unroll] + for (x = 0; x < kGemmElPerThreadX; x++) + { + int shy = y + l_tid.y * kGemmElPerThreadY; + int shx = x + l_tid.x * kGemmElPerThreadX; + S[y][x] += sharedA[shIndex][shy][k] * sharedB[shIndex][k][shx]; + } + } + + // Write results to output. +#if USE_FP16_MATH==1 + float16_t4 temp; +#else + float4 temp; +#endif + + [unroll] + for (y = 0; y < kGemmElPerThreadY; y++) + { + int w = wStartThread; + int h = hStartThread + y; + + if (h < M && w < N) + { + temp.x = S[y][0]; + temp.y = S[y][1]; + temp.z = S[y][2]; + temp.w = S[y][3]; + matrixC[MAT_C_INDEX(gid.z, h, w + 0) / 4] = temp; + + temp.x = S[y][4]; + temp.y = S[y][5]; + temp.z = S[y][6]; + temp.w = S[y][7]; + matrixC[MAT_C_INDEX(gid.z, h, w + 4) / 4] = temp; + } + } +} + diff --git a/src/neural/dx/shaders/PolicyMap.hlsl b/src/neural/dx/shaders/PolicyMap.hlsl new file mode 100644 index 0000000000..f77d8476b8 --- /dev/null +++ b/src/neural/dx/shaders/PolicyMap.hlsl @@ -0,0 +1,63 @@ +/* + This file is part of Leela Chess Zero. + Copyright (C) 2020 The LCZero Authors + + Leela Chess is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Leela Chess is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Leela Chess. If not, see . + + Additional permission under GNU GPL version 3 section 7 + + If you modify this Program, or any covered work, by linking or + combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA + Toolkit and the NVIDIA CUDA Deep Neural Network library (or a + modified version of those libraries), containing parts covered by the + terms of the respective license agreement, the licensors of this + Program grant you additional permission to convey the resulting work. +*/ + +#include "shader_shared.h" + +// ------------------- Policy Map Shader -----------------------------// +RWBuffer input : register(u8); + +// Output is always fp32. +RWStructuredBuffer output : register(u1); + +// Weights are always int32. +RWStructuredBuffer indices : register(u2); + +cbuffer PolicyMapConsts : register(b0) { + uint N; + uint inputSize; + uint usedSize; + uint outputSize; +}; + +[numthreads(kPolicyMapBlockSize, 1, 1)] +void PolicyMapShader +( + uint3 globalThreadIdx : SV_DispatchThreadID +) +{ + int tid = globalThreadIdx.x; + int n = tid / usedSize; + int i = tid % usedSize; + + if (n >= N) return; + + int j = indices[i]; + + if (j >= 0) { + output[n * outputSize + j] = input[n * inputSize + i]; + } +} diff --git a/src/neural/dx/shaders/SE.hlsl b/src/neural/dx/shaders/SE.hlsl new file mode 100644 index 0000000000..f5129863fb --- /dev/null +++ b/src/neural/dx/shaders/SE.hlsl @@ -0,0 +1,193 @@ +/* + This file is part of Leela Chess Zero. + Copyright (C) 2020 The LCZero Authors + + Leela Chess is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Leela Chess is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Leela Chess. If not, see . + + Additional permission under GNU GPL version 3 section 7 + + If you modify this Program, or any covered work, by linking or + combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA + Toolkit and the NVIDIA CUDA Deep Neural Network library (or a + modified version of those libraries), containing parts covered by the + terms of the respective license agreement, the licensors of this + Program grant you additional permission to convey the resulting work. +*/ + +cbuffer consts : register(b0) { + uint N, C; + + // Additional fused ops. + // Used only by output transform shader. + uint relu; + uint useBias; + uint skipAdd; + + // The channel count after SE (C / se-ratio) + uint se_K; +}; + + +RWBuffer input : register(u8); +RWBuffer output : register(u9); +RWBuffer bias : register(u10); +RWBuffer skipConnection : register(u11); +RWBuffer se_w1 : register(u12); +RWBuffer se_b1 : register(u13); +RWBuffer se_w2 : register(u14); +RWBuffer se_b2 : register(u15); + +// index in input/output tensors +#define INDEX_NCHW(n, c, h, w) ((n)*C * H * W + (c)*H * W + (h)*W + w) + +// Fused SE layer implementation +// Each thread block processes entire 'C' dimension worth of data. +// N thread blocks launched. +// As DX Compute block size needs to be known in advance, we compile multiple +// versions of this same HLSL shader with various standard channel counts (128, +// 256, 320, 384, 512, 640, 768 and 1024) and use the next bigger channel count +// version to handle channel counts not in the list of standard counts. + +// Note that the weight matrices are transposed. +#define readw1(row, col) (se_w1[(row)*se_K + (col)]) +#define readw2(row, col) (se_w2[(row)*2 * C + (col)]) + +groupshared float sharedData[BLOCK_SIZE]; + +[numthreads(BLOCK_SIZE, 1, 1)] +void SE +( + uint3 gid : SV_GroupID, + uint3 tid : SV_GroupThreadID +) +{ + const int H = 8, W = 8; + + int n = gid.x; + int k = tid.x; + + // C is set to K in the constant buffer + if (k >= C) return; + + // TODO: Try float4 board_r1[8], board_r2[8]; + // Also try reading skip connection tensor early to get more + // math-memory access overlap. + float board[8][8]; + float b = useBias ? bias[k] : 0; + + // Read input tensor. + int h; + [unroll] + for (h = 0; h < 8; h++) { + int index = INDEX_NCHW(n, k, h, 0) / 4; + float4 r1 = input[index]; + float4 r2 = input[index + 1]; + board[h][0] = r1.x; + board[h][1] = r1.y; + board[h][2] = r1.z; + board[h][3] = r1.w; + board[h][4] = r2.x; + board[h][5] = r2.y; + board[h][6] = r2.z; + board[h][7] = r2.w; + } + + // Add bias, and compute the average for SE. + float S = 0; + [unroll] + for (int y = 0; y < 8; y++) + [unroll] + for (int x = 0; x < 8; x++) + { + board[y][x] += b; + S += board[y][x]; + } + float avg = S / 64; + sharedData[k] = avg; + + GroupMemoryBarrierWithGroupSync(); + + // First fully-connected layer for SE + if (k < se_K) { + S = 0; + + for (int i = 0; i < C; i++) { + S += sharedData[i] * readw1(i, k); + } + + S += se_b1[k]; + + // relu + if (S < 0) S = 0; + + sharedData[k] = S; + } + + GroupMemoryBarrierWithGroupSync(); + + // Second fully-connected layer for SE + S = 0; + float B = 0; + for (int i = 0; i < se_K; i++) { + float val = sharedData[i]; + S += val * readw2(i, k); + B += val * readw2(i, k + C); + } + S += se_b2[k]; + B += se_b2[k + C]; + + // Sigmoid (only on the scale part). + S = 1.0 / (1.0 + exp(-S)); + + + // Scale, add skip connection, perform relu, and write to output. + [unroll] + for (h = 0; h < 8; h++) + { + int index = INDEX_NCHW(n, k, h, 0) / 4; + // can possibly use uint4 to write entire row at a time? + // couldn't find half2 to uint re-interpret functions :( + // same issue for reads. + float4 r1; + float4 r2; + r1.x = board[h][0]; + r1.y = board[h][1]; + r1.z = board[h][2]; + r1.w = board[h][3]; + r2.x = board[h][4]; + r2.y = board[h][5]; + r2.z = board[h][6]; + r2.w = board[h][7]; + + // SE scale and bias + r1 = r1*S + B; + r2 = r2*S + B; + + // residual add + if (skipAdd) { + r1 += skipConnection[index]; + r2 += skipConnection[index+1]; + } + + // relu + if (relu) { + float4 zeros = float4(0, 0, 0, 0); + r1 = max(r1, zeros); + r2 = max(r2, zeros); + } + + output[index] = r1; + output[index + 1] = r2; + } +} diff --git a/src/neural/dx/shaders/WinogradCommon.h b/src/neural/dx/shaders/WinogradCommon.h new file mode 100644 index 0000000000..08ccfd3561 --- /dev/null +++ b/src/neural/dx/shaders/WinogradCommon.h @@ -0,0 +1,290 @@ +/* + This file is part of Leela Chess Zero. + Copyright (C) 2020 The LCZero Authors + + Leela Chess is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Leela Chess is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Leela Chess. If not, see . + + Additional permission under GNU GPL version 3 section 7 + + If you modify this Program, or any covered work, by linking or + combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA + Toolkit and the NVIDIA CUDA Deep Neural Network library (or a + modified version of those libraries), containing parts covered by the + terms of the respective license agreement, the licensors of this + Program grant you additional permission to convey the resulting work. +*/ + +#include "shader_shared.h" + +#if USE_FP16_MATH == 1 + +RWBuffer input : register(u8); +RWBuffer transformedInput : register(u9); + +RWBuffer transformedOutput : register(u8); +RWBuffer output : register(u9); +RWBuffer bias : register(u10); +RWBuffer skipConnection : register(u11); +RWBuffer se_w1 : register(u12); +RWBuffer se_b1 : register(u13); +RWBuffer se_w2 : register(u14); +RWBuffer se_b2 : register(u15); + +#else + +RWBuffer input : register(u8); +RWBuffer transformedInput : register(u9); + +RWBuffer transformedOutput : register(u8); +RWBuffer output : register(u9); +RWBuffer bias : register(u10); +RWBuffer skipConnection : register(u11); +RWBuffer se_w1 : register(u12); +RWBuffer se_b1 : register(u13); +RWBuffer se_w2 : register(u14); +RWBuffer se_b2 : register(u15); +#endif + + +cbuffer consts : register(b0) { + uint N, C; + + // Additional fused ops. + // Used only by output transform shader. + uint relu; + uint useBias; + uint skipAdd; + uint fusedSe; + + // The channel count after SE (C / se-ratio) + uint se_K; +}; + + +// index in input/output tensors +#define INDEX_NCHW(n,c,h,w) ((n)*C*H*W + (c)*H*W + (h)*W + w) + +// index in intermediate/temp tensor +// W, H == 6 here! (6x6 transformed blocks) +// N also includes part of dimension (2x2) +#define GemmN (N * 4) +#define TEMP_INDEX_HWNC(h,w,n,c) ((h)*6*GemmN*C + (w)*GemmN*C + (n)*C + c) + +//----------------------------- Utility functions for Winograd transform ------------------------------// + +// fp16/half math seems a bit slow! - on both Nvidia Turing and AMD Vega 7 (Bugs? Lack of optimizations?) +// These are memory bandwidth bound shaders anyway. +#if USE_FP16_MATH == 1 + +void matrixMul_gpu_serial_6x6x6(out float16_t c[6][6], in float16_t a[6][6], in float16_t b[6][6]) +{ + [unroll] + for (int i = 0; i < 6; ++i) + [unroll] + for (int j = 0; j < 6; ++j) + { + float16_t S = 0; + [unroll] + for (int k = 0; k < 6; ++k) + S += a[i][k] * b[k][j]; + c[i][j] = S; + } +} + +void matrixMul_gpu_serial_4x6x6(out float16_t c[4][6], in float16_t a[4][6], in float16_t b[6][6]) +{ + [unroll] + for (int i = 0; i < 4; ++i) + [unroll] + for (int j = 0; j < 6; ++j) + { + float16_t S = 0; + [unroll] + for (int k = 0; k < 6; ++k) + S += a[i][k] * b[k][j]; + c[i][j] = S; + } +} + +void matrixMul_gpu_serial_4x4x6(out float16_t c[4][4], in float16_t a[4][6], in float16_t b[6][4]) +{ + [unroll] + for (int i = 0; i < 4; ++i) + [unroll] + for (int j = 0; j < 4; ++j) + { + float16_t S = 0; + [unroll] + for (int k = 0; k < 6; ++k) + S += a[i][k] * b[k][j]; + c[i][j] = S; + } +} + +void inputTransform4x4_gpu(out float16_t op[6][6], in const float16_t ip[6][6]) +{ + // transform applied to input tile (of size 4x4 - padded up to 6x6) + const float16_t Bt[6][6] = + { + 4, 0, -5, 0, 1, 0, + 0, -4, -4, 1, 1, 0, + 0, 4, -4, -1, 1, 0, + 0, -2, -1, 2, 1, 0, + 0, 2, -1, -2, 1, 0, + 0, 4, 0, -5, 0, 1 + }; + + const float16_t B[6][6] = + { + 4, 0, 0, 0, 0, 0, + 0, -4, 4, -2, 2, 4, + -5, -4, -4, -1, -1, 0, + 0, 1, -1, 2, -2, -5, + 1, 1, 1, 1, 1, 0, + 0, 0, 0, 0, 0, 1 + }; + + float16_t tempIp1[6][6]; + matrixMul_gpu_serial_6x6x6(tempIp1, Bt, ip); + matrixMul_gpu_serial_6x6x6(op, tempIp1, B); +} + +void outputTransform4x4_gpu(out float16_t output[4][4], in const float16_t transformedOutput[6][6]) +{ + // transform applied to result + const float16_t At[4][6] = { + 1, 1, 1, 1, 1, 0, + 0, 1,-1, 2,-2, 0, + 0, 1, 1, 4, 4, 0, + 0, 1,-1, 8,-8, 1 + }; + + const float16_t A[6][4] = { + 1, 0, 0, 0, + 1, 1, 1, 1, + 1,-1, 1,-1, + 1, 2, 4, 8, + 1,-2, 4,-8, + 0, 0, 0, 1 + }; + + float16_t tempOp[4][6]; + matrixMul_gpu_serial_4x6x6(tempOp, At, transformedOutput); + matrixMul_gpu_serial_4x4x6(output, tempOp, A); +} + +#else + + +//----------------------------- FP32 versions of the same code above ------------------------------// + +void matrixMul_gpu_serial_6x6x6(out float c[6][6], in float a[6][6], in float b[6][6]) +{ + [unroll] + for (int i = 0; i < 6; ++i) + [unroll] + for (int j = 0; j < 6; ++j) + { + float S = 0; + [unroll] + for (int k = 0; k < 6; ++k) + S += a[i][k] * b[k][j]; + c[i][j] = S; + } +} + +void matrixMul_gpu_serial_4x6x6(out float c[4][6], in float a[4][6], in float b[6][6]) +{ + [unroll] + for (int i = 0; i < 4; ++i) + [unroll] + for (int j = 0; j < 6; ++j) + { + float S = 0; + [unroll] + for (int k = 0; k < 6; ++k) + S += a[i][k] * b[k][j]; + c[i][j] = S; + } +} + +void matrixMul_gpu_serial_4x4x6(out float c[4][4], in float a[4][6], in float b[6][4]) +{ + [unroll] + for (int i = 0; i < 4; ++i) + [unroll] + for (int j = 0; j < 4; ++j) + { + float S = 0; + [unroll] + for (int k = 0; k < 6; ++k) + S += a[i][k] * b[k][j]; + c[i][j] = S; + } +} + + +void inputTransform4x4_gpu(out float op[6][6], in const float ip[6][6]) +{ + // transform applied to input tile (of size 4x4 - padded up to 6x6) + const float Bt[6][6] = + { + 4, 0, -5, 0, 1, 0, + 0, -4, -4, 1, 1, 0, + 0, 4, -4, -1, 1, 0, + 0, -2, -1, 2, 1, 0, + 0, 2, -1, -2, 1, 0, + 0, 4, 0, -5, 0, 1 + }; + + const float B[6][6] = + { + 4, 0, 0, 0, 0, 0, + 0, -4, 4, -2, 2, 4, + -5, -4, -4, -1, -1, 0, + 0, 1, -1, 2, -2, -5, + 1, 1, 1, 1, 1, 0, + 0, 0, 0, 0, 0, 1 + }; + + float tempIp1[6][6]; + matrixMul_gpu_serial_6x6x6(tempIp1, Bt, ip); + matrixMul_gpu_serial_6x6x6(op, tempIp1, B); +} + +void outputTransform4x4_gpu(out float output[4][4], in const float transformedOutput[6][6]) +{ + // transform applied to result + const float At[4][6] = { + 1, 1, 1, 1, 1, 0, + 0, 1,-1, 2,-2, 0, + 0, 1, 1, 4, 4, 0, + 0, 1,-1, 8,-8, 1 + }; + + const float A[6][4] = { + 1, 0, 0, 0, + 1, 1, 1, 1, + 1,-1, 1,-1, + 1, 2, 4, 8, + 1,-2, 4,-8, + 0, 0, 0, 1 + }; + + float tempOp[4][6]; + matrixMul_gpu_serial_4x6x6(tempOp, At, transformedOutput); + matrixMul_gpu_serial_4x4x6(output, tempOp, A); +} + +#endif \ No newline at end of file diff --git a/src/neural/dx/shaders/WinogradTransform.hlsl b/src/neural/dx/shaders/WinogradTransform.hlsl new file mode 100644 index 0000000000..eee95ab999 --- /dev/null +++ b/src/neural/dx/shaders/WinogradTransform.hlsl @@ -0,0 +1,451 @@ +/* + This file is part of Leela Chess Zero. + Copyright (C) 2020 The LCZero Authors + + Leela Chess is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Leela Chess is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Leela Chess. If not, see . + + Additional permission under GNU GPL version 3 section 7 + + If you modify this Program, or any covered work, by linking or + combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA + Toolkit and the NVIDIA CUDA Deep Neural Network library (or a + modified version of those libraries), containing parts covered by the + terms of the respective license agreement, the licensors of this + Program grant you additional permission to convey the resulting work. +*/ + +#include "WinogradCommon.h" + +// ------------------- Winograd Transform shaders -------------------------// + +// fp16/half math seems a bit slow! - on both nvidia Turing and AMD Vega 7 +#if USE_FP16_MATH == 1 + +[numthreads(kWinogradTransformShaderBlockSize, 1, 1)] +void input_transform_shader_fp16 +( + uint3 tid : SV_DispatchThreadID +) +{ + const int H = 8, W = 8; + int c = tid.x % C; + int n = tid.x / C; + if (n > N) return; + + float16_t board[8][8]; + + // read the board (a row at a time) + [unroll] + for (int y = 0; y < 8; y++) + { + int index = INDEX_NCHW(n, c, y, 0) / 4; + float16_t4 r1 = input[index]; + float16_t4 r2 = input[index + 1]; + board[y][0] = r1.x; + board[y][1] = r1.y; + board[y][2] = r1.z; + board[y][3] = r1.w; + board[y][4] = r2.x; + board[y][5] = r2.y; + board[y][6] = r2.z; + board[y][7] = r2.w; + } + + // top-left + { + float16_t inEl[6][6] = {0,0,0,0,0,0, 0,0,0,0,0,0, 0,0,0,0,0,0, 0,0,0,0,0,0, 0,0,0,0,0,0, 0,0,0,0,0,0}; + + [unroll] + for (int i = 0; i < 5; i++) + [unroll] + for (int j = 0; j < 5; j++) + inEl[i + 1][j + 1] = board[i][j]; + + // ii) transform it + inputTransform4x4_gpu(inEl, inEl); + + // iii) write to output + [unroll] + for (int y = 0; y < 6; y++) + [unroll] + for (int x = 0; x < 6; x++) + transformedInput[TEMP_INDEX_HWNC(y, x, n * 4 + 0, c)] = inEl[y][x]; + } + + // top-right + { + float16_t inEl[6][6] = { 0,0,0,0,0,0, 0,0,0,0,0,0, 0,0,0,0,0,0, 0,0,0,0,0,0, 0,0,0,0,0,0, 0,0,0,0,0,0 }; + + [unroll] + for (int i = 0; i < 5; i++) + [unroll] + for (int j = 0; j < 5; j++) + inEl[i + 1][j] = board[i][j+3]; + + + // ii) transform it + inputTransform4x4_gpu(inEl, inEl); + + // iii) write to output + [unroll] + for (int y = 0; y < 6; y++) + [unroll] + for (int x = 0; x < 6; x++) + transformedInput[TEMP_INDEX_HWNC(y, x, n * 4 + 1, c)] = inEl[y][x]; + } + + + // bottom-left + { + float16_t inEl[6][6] = { 0,0,0,0,0,0, 0,0,0,0,0,0, 0,0,0,0,0,0, 0,0,0,0,0,0, 0,0,0,0,0,0, 0,0,0,0,0,0 }; + + [unroll] + for (int i = 0; i < 5; i++) + [unroll] + for (int j = 0; j < 5; j++) + inEl[i][j + 1] = board[i+3][j]; + + // ii) transform it + inputTransform4x4_gpu(inEl, inEl); + + // iii) write to output + [unroll] + for (int y = 0; y < 6; y++) + [unroll] + for (int x = 0; x < 6; x++) + transformedInput[TEMP_INDEX_HWNC(y, x, n * 4 + 2, c)] = inEl[y][x]; + } + + // bottom-right + { + float16_t inEl[6][6] = { 0,0,0,0,0,0, 0,0,0,0,0,0, 0,0,0,0,0,0, 0,0,0,0,0,0, 0,0,0,0,0,0, 0,0,0,0,0,0 }; + + [unroll] + for (int i = 0; i < 5; i++) + [unroll] + for (int j = 0; j < 5; j++) + inEl[i][j] = board[i+3][j+3]; + + // ii) transform it + inputTransform4x4_gpu(inEl, inEl); + + // iii) write to output + [unroll] + for (int y = 0; y < 6; y++) + [unroll] + for (int x = 0; x < 6; x++) + transformedInput[TEMP_INDEX_HWNC(y, x, n * 4 + 3, c)] = inEl[y][x]; + } +} + +[numthreads(kWinogradTransformShaderBlockSize, 1, 1)] +void output_transform_shader_fp16 +( + uint3 tid : SV_DispatchThreadID +) +{ + const int H = 8, W = 8; + + int k = tid.x % C; // C is set to K in the constant buffer + int n = tid.x / C; + if (n > N) return; + + float16_t board[8][8]; + float16_t b = useBias ? bias[k] : 0; + + [unroll] + for (int hStart = 0; hStart < 8; hStart += 4) + [unroll] + for (int wStart = 0; wStart < 8; wStart += 4) + { + // i) read to per thread registers (for doing output transform) + int shln = n * 4 + (hStart / 4) * 2 + (wStart / 4); + float16_t outElTransformed[6][6]; + [unroll] + for (int y = 0; y < 6; y++) + [unroll] + for (int x = 0; x < 6; x++) + outElTransformed[y][x] = transformedOutput[TEMP_INDEX_HWNC(y, x, shln, k)]; + + // ii) transform it + float16_t outEl[4][4]; + outputTransform4x4_gpu(outEl, outElTransformed); + + { + [unroll] + for (int y = 0; y < 4; y++) + [unroll] + for (int x = 0; x < 4; x++) + board[hStart + y][wStart + x] = outEl[y][x]; + } + } + + // iii) write to output + { + [unroll] + for (int y = 0; y < 8; y++) + { + int index = INDEX_NCHW(n, k, y, 0) / 4; + // can possibly use uint4 to write entire row at a time? + // couldn't find half2 to uint re-interpret functions :( + // same issue for reads. + float16_t4 r1; + float16_t4 r2; + r1.x = board[y][0]; + r1.y = board[y][1]; + r1.z = board[y][2]; + r1.w = board[y][3]; + r2.x = board[y][4]; + r2.y = board[y][5]; + r2.z = board[y][6]; + r2.w = board[y][7]; + + // bias + r1 += b; + r2 += b; + + // residual add + if (skipAdd) { + r1 += skipConnection[index]; + r2 += skipConnection[index + 1]; + } + + // relu + if (relu) { + float16_t4 zeros = float16_t4(0, 0, 0, 0); + r1 = max(r1, zeros); + r2 = max(r2, zeros); + } + output[index] = r1; + output[index + 1] = r2; + } + } +} + +#else + +//----------------------------- FP32 versions of the same shaders above ------------------------------// + +[numthreads(kWinogradTransformShaderBlockSize, 1, 1)] +#if USE_FP16_MATH == 1 +void input_transform_shader_fp16 +#else +void input_transform_shader_fp32 +#endif +( + uint3 tid : SV_DispatchThreadID +) +{ + const int H = 8, W = 8; + int c = tid.x % C; + int n = tid.x / C; + if (n > N) return; + + float board[8][8]; + + // read the board (a row at a time) + [unroll] + for (int y = 0; y < 8; y++) + { + int index = INDEX_NCHW(n, c, y, 0) / 4; + float4 r1 = input[index]; + float4 r2 = input[index + 1]; + board[y][0] = r1.x; + board[y][1] = r1.y; + board[y][2] = r1.z; + board[y][3] = r1.w; + board[y][4] = r2.x; + board[y][5] = r2.y; + board[y][6] = r2.z; + board[y][7] = r2.w; + } + + // top-left + { + float inEl[6][6] = { 0,0,0,0,0,0, 0,0,0,0,0,0, 0,0,0,0,0,0, 0,0,0,0,0,0, 0,0,0,0,0,0, 0,0,0,0,0,0 }; + + [unroll] + for (int i = 0; i < 5; i++) + [unroll] + for (int j = 0; j < 5; j++) + inEl[i + 1][j + 1] = board[i][j]; + + // ii) transform it + inputTransform4x4_gpu(inEl, inEl); + + // iii) write to output + [unroll] + for (int y = 0; y < 6; y++) + [unroll] + for (int x = 0; x < 6; x++) + transformedInput[TEMP_INDEX_HWNC(y, x, n * 4 + 0, c)] = inEl[y][x]; + } + + // top-right + { + float inEl[6][6] = { 0,0,0,0,0,0, 0,0,0,0,0,0, 0,0,0,0,0,0, 0,0,0,0,0,0, 0,0,0,0,0,0, 0,0,0,0,0,0 }; + + [unroll] + for (int i = 0; i < 5; i++) + [unroll] + for (int j = 0; j < 5; j++) + inEl[i + 1][j] = board[i][j + 3]; + + + // ii) transform it + inputTransform4x4_gpu(inEl, inEl); + + // iii) write to output + [unroll] + for (int y = 0; y < 6; y++) + [unroll] + for (int x = 0; x < 6; x++) + transformedInput[TEMP_INDEX_HWNC(y, x, n * 4 + 1, c)] = inEl[y][x]; + } + + + // bottom-left + { + float inEl[6][6] = { 0,0,0,0,0,0, 0,0,0,0,0,0, 0,0,0,0,0,0, 0,0,0,0,0,0, 0,0,0,0,0,0, 0,0,0,0,0,0 }; + + [unroll] + for (int i = 0; i < 5; i++) + [unroll] + for (int j = 0; j < 5; j++) + inEl[i][j + 1] = board[i + 3][j]; + + // ii) transform it + inputTransform4x4_gpu(inEl, inEl); + + // iii) write to output + [unroll] + for (int y = 0; y < 6; y++) + [unroll] + for (int x = 0; x < 6; x++) + transformedInput[TEMP_INDEX_HWNC(y, x, n * 4 + 2, c)] = inEl[y][x]; + } + + // bottom-right + { + float inEl[6][6] = { 0,0,0,0,0,0, 0,0,0,0,0,0, 0,0,0,0,0,0, 0,0,0,0,0,0, 0,0,0,0,0,0, 0,0,0,0,0,0 }; + + [unroll] + for (int i = 0; i < 5; i++) + [unroll] + for (int j = 0; j < 5; j++) + inEl[i][j] = board[i + 3][j + 3]; + + // ii) transform it + inputTransform4x4_gpu(inEl, inEl); + + // iii) write to output + [unroll] + for (int y = 0; y < 6; y++) + [unroll] + for (int x = 0; x < 6; x++) + transformedInput[TEMP_INDEX_HWNC(y, x, n * 4 + 3, c)] = inEl[y][x]; + } +} + + +[numthreads(kWinogradTransformShaderBlockSize, 1, 1)] +#if USE_FP16_MATH == 1 +void output_transform_shader_fp16 +#else +void output_transform_shader_fp32 +#endif +( + uint3 tid : SV_DispatchThreadID +) +{ + const int H = 8, W = 8; + + int k = tid.x % C; // C is set to K in the constant buffer + int n = tid.x / C; + if (n > N) return; + + float board[8][8]; + float b = useBias ? bias[k] : 0; + + [unroll] + for (int hStart = 0; hStart < 8; hStart += 4) + [unroll] + for (int wStart = 0; wStart < 8; wStart += 4) + { + // i) read to per thread registers (for doing output transform) + int shln = n * 4 + (hStart / 4) * 2 + (wStart / 4); + float outElTransformed[6][6]; + [unroll] + for (int y = 0; y < 6; y++) + [unroll] + for (int x = 0; x < 6; x++) + outElTransformed[y][x] = transformedOutput[TEMP_INDEX_HWNC(y, x, shln, k)]; + + // ii) transform it + float outEl[4][4]; + outputTransform4x4_gpu(outEl, outElTransformed); + + { + [unroll] + for (int y = 0; y < 4; y++) + [unroll] + for (int x = 0; x < 4; x++) + board[hStart + y][wStart + x] = outEl[y][x]; + } + } + + // iii) write to output + { + [unroll] + for (int y = 0; y < 8; y++) + { + int index = INDEX_NCHW(n, k, y, 0) / 4; + // can possibly use uint4 to write entire row at a time? + // couldn't find half2 to uint re-interpret functions :( + // same issue for reads. + float4 r1; + float4 r2; + r1.x = board[y][0]; + r1.y = board[y][1]; + r1.z = board[y][2]; + r1.w = board[y][3]; + r2.x = board[y][4]; + r2.y = board[y][5]; + r2.z = board[y][6]; + r2.w = board[y][7]; + + // bias + r1 += b; + r2 += b; + + // residual add + if (skipAdd) { + r1 += skipConnection[index]; + r2 += skipConnection[index+1]; + } + + // relu + if (relu) { + float4 zeros = float4(0, 0, 0, 0); + r1 = max(r1, zeros); + r2 = max(r2, zeros); + } + + output[index] = r1; + output[index + 1] = r2; + } + } +} + +#endif \ No newline at end of file diff --git a/src/neural/dx/shaders/WinogradTransformSE.hlsl b/src/neural/dx/shaders/WinogradTransformSE.hlsl new file mode 100644 index 0000000000..0bc67c9284 --- /dev/null +++ b/src/neural/dx/shaders/WinogradTransformSE.hlsl @@ -0,0 +1,320 @@ +/* + This file is part of Leela Chess Zero. + Copyright (C) 2020 The LCZero Authors + + Leela Chess is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Leela Chess is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Leela Chess. If not, see . + + Additional permission under GNU GPL version 3 section 7 + + If you modify this Program, or any covered work, by linking or + combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA + Toolkit and the NVIDIA CUDA Deep Neural Network library (or a + modified version of those libraries), containing parts covered by the + terms of the respective license agreement, the licensors of this + Program grant you additional permission to convey the resulting work. +*/ + +#include "WinogradCommon.h" + +// Output transform shader - fused with SE. +// Each thread block processes entire 'C' dimension worth of data. +// N thread blocks launched. +// As DX Compute block size needs to be known in advance, we compile multiple +// versions of this same HLSL shader with various standard channel counts (128, +// 256, 320, 384, 512, 640, 768 and 1024) and use the next bigger channel count +// version to handle channel counts not in the list of standard counts. + +// Note that the weight matrices are transposed. +#define readw1(row, col) (se_w1[(row)*se_K + (col)]) +#define readw2(row, col) (se_w2[(row)*2 * C + (col)]) + +#if USE_FP16_MATH == 1 +groupshared half sharedData[BLOCK_SIZE]; + +[numthreads(BLOCK_SIZE, 1, 1)] +void OutputTransformSE +( + uint3 gid : SV_GroupID, + uint3 tid : SV_GroupThreadID +) +{ + const int H = 8, W = 8; + + int n = gid.x; + int k = tid.x; + + // C is set to K in the constant buffer + if (k >= C) return; + + + half board[8][8]; + half b = useBias ? bias[k] : 0; + + // Winograd output-transform + [unroll] + for (int hStart = 0; hStart < 8; hStart += 4) + [unroll] + for (int wStart = 0; wStart < 8; wStart += 4) + { + // i) read to per thread registers (for doing output transform) + int shln = n * 4 + (hStart / 4) * 2 + (wStart / 4); + half outElTransformed[6][6]; + [unroll] + for (int y = 0; y < 6; y++) + [unroll] + for (int x = 0; x < 6; x++) + outElTransformed[y][x] = transformedOutput[TEMP_INDEX_HWNC(y, x, shln, k)]; + + // ii) transform it + half outEl[4][4]; + outputTransform4x4_gpu(outEl, outElTransformed); + + { + [unroll] + for (int y = 0; y < 4; y++) + [unroll] + for (int x = 0; x < 4; x++) + board[hStart + y][wStart + x] = outEl[y][x]; + } + } + + // Add bias, and compute the average for SE. + half S = 0; + [unroll] + for (int y = 0; y < 8; y++) + [unroll] + for (int x = 0; x < 8; x++) + { + board[y][x] += b; + S += board[y][x]; + } + half avg = S / 64; + sharedData[k] = avg; + + GroupMemoryBarrierWithGroupSync(); + + // First fully-connected layer for SE + if (k < se_K) { + S = 0; + + for (int i = 0; i < C; i++) { + S += sharedData[i] * readw1(i, k); + } + + S += se_b1[k]; + + // relu + if (S < 0) S = 0; + + sharedData[k] = S; + } + + GroupMemoryBarrierWithGroupSync(); + + // Second fully-connected layer for SE + S = 0; + half B = 0; + for (int i = 0; i < se_K; i++) { + half val = sharedData[i]; + S += val * readw2(i, k); + B += val * readw2(i, k + C); + } + S += se_b2[k]; + B += se_b2[k + C]; + + // Sigmoid (only on the scale part). + S = 1.0 / (1.0 + exp(-S)); + + + // Scale, add skip connection, perform relu, and write to output. + [unroll] + for (int h = 0; h < 8; h++) + { + int index = INDEX_NCHW(n, k, h, 0) / 4; + // can possibly use uint4 to write entire row at a time? + // couldn't find half2 to uint re-interpret functions :( + // same issue for reads. + half4 r1; + half4 r2; + r1.x = board[h][0]; + r1.y = board[h][1]; + r1.z = board[h][2]; + r1.w = board[h][3]; + r2.x = board[h][4]; + r2.y = board[h][5]; + r2.z = board[h][6]; + r2.w = board[h][7]; + + // SE scale and bias + r1 = r1*S + B; + r2 = r2*S + B; + + // residual add + if (skipAdd) { + r1 += skipConnection[index]; + r2 += skipConnection[index+1]; + } + + // relu + if (relu) { + half4 zeros = half4(0, 0, 0, 0); + r1 = max(r1, zeros); + r2 = max(r2, zeros); + } + + output[index] = r1; + output[index + 1] = r2; + } +} + +#else + +groupshared float sharedData[BLOCK_SIZE]; + +[numthreads(BLOCK_SIZE, 1, 1)] +void OutputTransformSE +( + uint3 gid : SV_GroupID, + uint3 tid : SV_GroupThreadID +) +{ + const int H = 8, W = 8; + + int n = gid.x; + int k = tid.x; + + // C is set to K in the constant buffer + if (k >= C) return; + + + float board[8][8]; + float b = useBias ? bias[k] : 0; + + // Winograd output-transform + [unroll] + for (int hStart = 0; hStart < 8; hStart += 4) + [unroll] + for (int wStart = 0; wStart < 8; wStart += 4) + { + // i) read to per thread registers (for doing output transform) + int shln = n * 4 + (hStart / 4) * 2 + (wStart / 4); + float outElTransformed[6][6]; + [unroll] + for (int y = 0; y < 6; y++) + [unroll] + for (int x = 0; x < 6; x++) + outElTransformed[y][x] = transformedOutput[TEMP_INDEX_HWNC(y, x, shln, k)]; + + // ii) transform it + float outEl[4][4]; + outputTransform4x4_gpu(outEl, outElTransformed); + + { + [unroll] + for (int y = 0; y < 4; y++) + [unroll] + for (int x = 0; x < 4; x++) + board[hStart + y][wStart + x] = outEl[y][x]; + } + } + + // Add bias, and compute the average for SE. + float S = 0; + [unroll] + for (int y = 0; y < 8; y++) + [unroll] + for (int x = 0; x < 8; x++) + { + board[y][x] += b; + S += board[y][x]; + } + float avg = S / 64; + sharedData[k] = avg; + + GroupMemoryBarrierWithGroupSync(); + + // First fully-connected layer for SE + if (k < se_K) { + S = 0; + + for (int i = 0; i < C; i++) { + S += sharedData[i] * readw1(i, k); + } + + S += se_b1[k]; + + // relu + if (S < 0) S = 0; + + sharedData[k] = S; + } + + GroupMemoryBarrierWithGroupSync(); + + // Second fully-connected layer for SE + S = 0; + float B = 0; + for (int i = 0; i < se_K; i++) { + float val = sharedData[i]; + S += val * readw2(i, k); + B += val * readw2(i, k + C); + } + S += se_b2[k]; + B += se_b2[k + C]; + + // Sigmoid (only on the scale part). + S = 1.0 / (1.0 + exp(-S)); + + + // Scale, add skip connection, perform relu, and write to output. + [unroll] + for (int h = 0; h < 8; h++) + { + int index = INDEX_NCHW(n, k, h, 0) / 4; + // can possibly use uint4 to write entire row at a time? + // couldn't find half2 to uint re-interpret functions :( + // same issue for reads. + float4 r1; + float4 r2; + r1.x = board[h][0]; + r1.y = board[h][1]; + r1.z = board[h][2]; + r1.w = board[h][3]; + r2.x = board[h][4]; + r2.y = board[h][5]; + r2.z = board[h][6]; + r2.w = board[h][7]; + + // SE scale and bias + r1 = r1*S + B; + r2 = r2*S + B; + + // residual add + if (skipAdd) { + r1 += skipConnection[index]; + r2 += skipConnection[index+1]; + } + + // relu + if (relu) { + float4 zeros = float4(0, 0, 0, 0); + r1 = max(r1, zeros); + r2 = max(r2, zeros); + } + + output[index] = r1; + output[index + 1] = r2; + } +} +#endif \ No newline at end of file diff --git a/src/neural/dx/shaders/dxc_helper.py b/src/neural/dx/shaders/dxc_helper.py new file mode 100755 index 0000000000..f4fce987fa --- /dev/null +++ b/src/neural/dx/shaders/dxc_helper.py @@ -0,0 +1,14 @@ +#!/usr/bin/env python +# Call dxc with absolute paths where '/' are converted to '\' (in windows). +import sys +import os +import re + +x = sys.argv +x.pop(0) +for i in range(len(x)): + if re.match(r"[a-zA-Z]:/", x[i]): + x[i] = os.path.normpath(x[i]) +# We asssume dxc is already on the path. +os.system('dxc ' + '"{}"'.format('" "'.join(x))) + diff --git a/src/neural/dx/shaders/meson.build b/src/neural/dx/shaders/meson.build new file mode 100644 index 0000000000..5a4df50797 --- /dev/null +++ b/src/neural/dx/shaders/meson.build @@ -0,0 +1,183 @@ +# This file is part of Leela Chess Zero. +# Copyright (C) 2020 The LCZero Authors +# +# Leela Chess is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Leela Chess is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Leela Chess. If not, see . + +# Check for 'dxc' as 'dxc_helper.py' assumes it is on the path. +dxc = find_program('dxc', required: true) + +# Used to work around a 'dxc' limitation with '/' in absolute paths. +dxc_helper = find_program('dxc_helper.py', required: true) + +dxc_common = ['/Tcs_6_2', '/Fh', '@OUTPUT@', '@INPUT@'] + +files += custom_target('MatrixMul_Fp32', + output : 'MatrixMul_Fp32.h', + input : 'Gemm.hlsl', + command : [dxc_helper, '/EMatrixMul', '/Vn', 'g_MatrixMul_Fp32'] + dxc_common +) + +files += custom_target('MatrixMul_Fp16', + output : 'MatrixMul_Fp16.h', + input : 'Gemm.hlsl', + command : [dxc_helper, '/EMatrixMul', '/Vn', 'g_MatrixMul_Fp16', '/DUSE_FP16_MATH=1', + '-enable-16bit-types'] + dxc_common +) + +files += custom_target('ExpandPlanes_shader_fp32', + output : 'ExpandPlanes_shader_fp32.h', + input : 'ExpandPlanes.hlsl', + command : [dxc_helper, '/EExpandPlanes_shader_fp32'] + dxc_common +) + +files += custom_target('ExpandPlanes_shader_fp16', + output : 'ExpandPlanes_shader_fp16.h', + input : 'ExpandPlanes.hlsl', + command : [dxc_helper, '/EExpandPlanes_shader_fp16'] + dxc_common +) + +files += custom_target('input_transform_shader_fp32', + output : 'input_transform_shader_fp32.h', + input : 'WinogradTransform.hlsl', + command : [dxc_helper, '/Einput_transform_shader_fp32'] + dxc_common +) + +files += custom_target('output_transform_shader_fp32', + output : 'output_transform_shader_fp32.h', + input : 'WinogradTransform.hlsl', + command : [dxc_helper, '/Eoutput_transform_shader_fp32'] + dxc_common +) + +files += custom_target('conv_1x1_shader_fp32', + output : 'conv_1x1_shader_fp32.h', + input : 'Conv1x1.hlsl', + command : [dxc_helper, '/Econv_1x1_shader_fp32'] + dxc_common +) + +files += custom_target('add_vectors_shader', + output : 'add_vectors_shader.h', + input : 'AddVectors.hlsl', + command : [dxc_helper, '/Eadd_vectors_shader'] + dxc_common +) + +files += custom_target('policy_map_shader_fp32', + output : 'policy_map_shader_fp32.h', + input : 'PolicyMap.hlsl', + command : [dxc_helper, '/EPolicyMapShader', '/Vn', 'g_policy_map_shader_fp32'] + dxc_common +) + +files += custom_target('output_transform_shader_fp32_se_128', + output : 'output_transform_shader_fp32_se_128.h', + input : 'WinogradTransformSE.hlsl', + command : [dxc_helper, '/EOutputTransformSE', '/Vn', 'g_output_transform_shader_fp32_se_128', + '/DBLOCK_SIZE=128'] + dxc_common +) + +files += custom_target('output_transform_shader_fp32_se_256', + output : 'output_transform_shader_fp32_se_256.h', + input : 'WinogradTransformSE.hlsl', + command : [dxc_helper, '/EOutputTransformSE', '/Vn', 'g_output_transform_shader_fp32_se_256', + '/DBLOCK_SIZE=256'] + dxc_common +) + +files += custom_target('output_transform_shader_fp32_se_320', + output : 'output_transform_shader_fp32_se_320.h', + input : 'WinogradTransformSE.hlsl', + command : [dxc_helper, '/EOutputTransformSE', '/Vn', 'g_output_transform_shader_fp32_se_320', + '/DBLOCK_SIZE=320'] + dxc_common +) + +files += custom_target('output_transform_shader_fp32_se_384', + output : 'output_transform_shader_fp32_se_384.h', + input : 'WinogradTransformSE.hlsl', + command : [dxc_helper, '/EOutputTransformSE', '/Vn', 'g_output_transform_shader_fp32_se_384', + '/DBLOCK_SIZE=384'] + dxc_common +) + +files += custom_target('output_transform_shader_fp32_se_512', + output : 'output_transform_shader_fp32_se_512.h', + input : 'WinogradTransformSE.hlsl', + command : [dxc_helper, '/EOutputTransformSE', '/Vn', 'g_output_transform_shader_fp32_se_512', + '/DBLOCK_SIZE=512'] + dxc_common +) + +files += custom_target('output_transform_shader_fp32_se_640', + output : 'output_transform_shader_fp32_se_640.h', + input : 'WinogradTransformSE.hlsl', + command : [dxc_helper, '/EOutputTransformSE', '/Vn', 'g_output_transform_shader_fp32_se_640', + '/DBLOCK_SIZE=640'] + dxc_common +) + +files += custom_target('output_transform_shader_fp32_se_768', + output : 'output_transform_shader_fp32_se_768.h', + input : 'WinogradTransformSE.hlsl', + command : [dxc_helper, '/EOutputTransformSE', '/Vn', 'g_output_transform_shader_fp32_se_768', + '/DBLOCK_SIZE=768'] + dxc_common +) + +files += custom_target('output_transform_shader_fp32_se_1024', + output : 'output_transform_shader_fp32_se_1024.h', + input : 'WinogradTransformSE.hlsl', + command : [dxc_helper, '/EOutputTransformSE', '/Vn', 'g_output_transform_shader_fp32_se_1024', + '/DBLOCK_SIZE=1024'] + dxc_common +) + +files += custom_target('se_128', + output : 'se_128.h', + input : 'SE.hlsl', + command : [dxc_helper, '/ESE', '/Vn', 'g_se_128', '/DBLOCK_SIZE=128'] + dxc_common +) + +files += custom_target('se_256', + output : 'se_256.h', + input : 'SE.hlsl', + command : [dxc_helper, '/ESE', '/Vn', 'g_se_256', '/DBLOCK_SIZE=256'] + dxc_common +) + +files += custom_target('se_320', + output : 'se_320.h', + input : 'SE.hlsl', + command : [dxc_helper, '/ESE', '/Vn', 'g_se_320', '/DBLOCK_SIZE=320'] + dxc_common +) + +files += custom_target('se_384', + output : 'se_384.h', + input : 'SE.hlsl', + command : [dxc_helper, '/ESE', '/Vn', 'g_se_384', '/DBLOCK_SIZE=384'] + dxc_common +) + +files += custom_target('se_512', + output : 'se_512.h', + input : 'SE.hlsl', + command : [dxc_helper, '/ESE', '/Vn', 'g_se_512', '/DBLOCK_SIZE=512'] + dxc_common +) + +files += custom_target('se_640', + output : 'se_640.h', + input : 'SE.hlsl', + command : [dxc_helper, '/ESE', '/Vn', 'g_se_640', '/DBLOCK_SIZE=640'] + dxc_common +) + +files += custom_target('se_768', + output : 'se_768.h', + input : 'SE.hlsl', + command : [dxc_helper, '/ESE', '/Vn', 'g_se_768', '/DBLOCK_SIZE=768'] + dxc_common +) + +files += custom_target('se_1024', + output : 'se_1024.h', + input : 'SE.hlsl', + command : [dxc_helper, '/ESE', '/Vn', 'g_se_1024', '/DBLOCK_SIZE=1024'] + dxc_common +) + diff --git a/src/neural/dx/shaders/shader_shared.h b/src/neural/dx/shaders/shader_shared.h new file mode 100644 index 0000000000..331a271f86 --- /dev/null +++ b/src/neural/dx/shaders/shader_shared.h @@ -0,0 +1,52 @@ +/* + This file is part of Leela Chess Zero. + Copyright (C) 2020 The LCZero Authors + + Leela Chess is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Leela Chess is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Leela Chess. If not, see . + + Additional permission under GNU GPL version 3 section 7 + + If you modify this Program, or any covered work, by linking or + combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA + Toolkit and the NVIDIA CUDA Deep Neural Network library (or a + modified version of those libraries), containing parts covered by the + terms of the respective license agreement, the licensors of this + Program grant you additional permission to convey the resulting work. +*/ + +#define kExpandPlanesElementsPerBlock 256 +#define kExpandPlanesFp32BlockSize kExpandPlanesElementsPerBlock +#define kExpandPlanesFp16BlockSize (kExpandPlanesElementsPerBlock / 2) + +// for both input transform and output transform shaders +#define kWinogradTransformShaderBlockSize 64 + +#define kConv1x1BlockSize 64 + +#define kAddVectorsBlockSize 512 + +#define kPolicyMapBlockSize 256 + + +// Constants for GEMM shader. +#define kGemmBlockWidth 16 +#define kGemmBlockHeight 16 + +#define kGemmElPerThreadX 8 +#define kGemmElPerThreadY 8 + +#define kGemmElPerBlockX (kGemmElPerThreadX * kGemmBlockWidth) +#define kGemmElPerBlockY (kGemmElPerThreadY * kGemmBlockHeight) + +#define kGemmShMemKChunk 16 \ No newline at end of file diff --git a/src/neural/dx/shaders/shaders.h b/src/neural/dx/shaders/shaders.h new file mode 100644 index 0000000000..374f8e11d9 --- /dev/null +++ b/src/neural/dx/shaders/shaders.h @@ -0,0 +1,57 @@ +/* + This file is part of Leela Chess Zero. + Copyright (C) 2020 The LCZero Authors + + Leela Chess is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Leela Chess is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Leela Chess. If not, see . + + Additional permission under GNU GPL version 3 section 7 + + If you modify this Program, or any covered work, by linking or + combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA + Toolkit and the NVIDIA CUDA Deep Neural Network library (or a + modified version of those libraries), containing parts covered by the + terms of the respective license agreement, the licensors of this + Program grant you additional permission to convey the resulting work. +*/ + +#pragma once + +#include "ExpandPlanes_shader_fp32.h" +#include "ExpandPlanes_shader_fp16.h" +#include "input_transform_shader_fp32.h" +#include "output_transform_shader_fp32.h" +#include "conv_1x1_shader_fp32.h" +#include "add_vectors_shader.h" +#include "policy_map_shader_fp32.h" + +#include "output_transform_shader_fp32_se_128.h" +#include "output_transform_shader_fp32_se_256.h" +#include "output_transform_shader_fp32_se_320.h" +#include "output_transform_shader_fp32_se_384.h" +#include "output_transform_shader_fp32_se_512.h" +#include "output_transform_shader_fp32_se_640.h" +#include "output_transform_shader_fp32_se_768.h" +#include "output_transform_shader_fp32_se_1024.h" + +#include "se_128.h" +#include "se_256.h" +#include "se_320.h" +#include "se_384.h" +#include "se_512.h" +#include "se_640.h" +#include "se_768.h" +#include "se_1024.h" + +#include "MatrixMul_Fp32.h" +#include "MatrixMul_Fp16.h" diff --git a/third_party/d3dx12.h b/third_party/d3dx12.h new file mode 100644 index 0000000000..d001167920 --- /dev/null +++ b/third_party/d3dx12.h @@ -0,0 +1,1534 @@ +//********************************************************* +// +// Copyright (c) Microsoft. All rights reserved. +// This code is licensed under the MIT License (MIT). +// THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF +// ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY +// IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR +// PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT. +// +//********************************************************* + +#ifndef __D3DX12_H__ +#define __D3DX12_H__ + +#include "d3d12.h" + +#if defined( __cplusplus ) + +struct CD3DX12_DEFAULT {}; +extern const DECLSPEC_SELECTANY CD3DX12_DEFAULT D3D12_DEFAULT; + +//------------------------------------------------------------------------------------------------ +inline bool operator==( const D3D12_VIEWPORT& l, const D3D12_VIEWPORT& r ) +{ + return l.TopLeftX == r.TopLeftX && l.TopLeftY == r.TopLeftY && l.Width == r.Width && + l.Height == r.Height && l.MinDepth == r.MinDepth && l.MaxDepth == r.MaxDepth; +} + +//------------------------------------------------------------------------------------------------ +inline bool operator!=( const D3D12_VIEWPORT& l, const D3D12_VIEWPORT& r ) +{ return !( l == r ); } + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_RECT : public D3D12_RECT +{ + CD3DX12_RECT() + {} + explicit CD3DX12_RECT( const D3D12_RECT& o ) : + D3D12_RECT( o ) + {} + explicit CD3DX12_RECT( + LONG Left, + LONG Top, + LONG Right, + LONG Bottom ) + { + left = Left; + top = Top; + right = Right; + bottom = Bottom; + } + ~CD3DX12_RECT() {} + operator const D3D12_RECT&() const { return *this; } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_BOX : public D3D12_BOX +{ + CD3DX12_BOX() + {} + explicit CD3DX12_BOX( const D3D12_BOX& o ) : + D3D12_BOX( o ) + {} + explicit CD3DX12_BOX( + LONG Left, + LONG Right ) + { + left = Left; + top = 0; + front = 0; + right = Right; + bottom = 1; + back = 1; + } + explicit CD3DX12_BOX( + LONG Left, + LONG Top, + LONG Right, + LONG Bottom ) + { + left = Left; + top = Top; + front = 0; + right = Right; + bottom = Bottom; + back = 1; + } + explicit CD3DX12_BOX( + LONG Left, + LONG Top, + LONG Front, + LONG Right, + LONG Bottom, + LONG Back ) + { + left = Left; + top = Top; + front = Front; + right = Right; + bottom = Bottom; + back = Back; + } + ~CD3DX12_BOX() {} + operator const D3D12_BOX&() const { return *this; } +}; +inline bool operator==( const D3D12_BOX& l, const D3D12_BOX& r ) +{ + return l.left == r.left && l.top == r.top && l.front == r.front && + l.right == r.right && l.bottom == r.bottom && l.back == r.back; +} +inline bool operator!=( const D3D12_BOX& l, const D3D12_BOX& r ) +{ return !( l == r ); } + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_DEPTH_STENCIL_DESC : public D3D12_DEPTH_STENCIL_DESC +{ + CD3DX12_DEPTH_STENCIL_DESC() + {} + explicit CD3DX12_DEPTH_STENCIL_DESC( const D3D12_DEPTH_STENCIL_DESC& o ) : + D3D12_DEPTH_STENCIL_DESC( o ) + {} + explicit CD3DX12_DEPTH_STENCIL_DESC( CD3DX12_DEFAULT ) + { + DepthEnable = TRUE; + DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ALL; + DepthFunc = D3D12_COMPARISON_FUNC_LESS; + StencilEnable = FALSE; + StencilReadMask = D3D12_DEFAULT_STENCIL_READ_MASK; + StencilWriteMask = D3D12_DEFAULT_STENCIL_WRITE_MASK; + const D3D12_DEPTH_STENCILOP_DESC defaultStencilOp = + { D3D12_STENCIL_OP_KEEP, D3D12_STENCIL_OP_KEEP, D3D12_STENCIL_OP_KEEP, D3D12_COMPARISON_FUNC_ALWAYS }; + FrontFace = defaultStencilOp; + BackFace = defaultStencilOp; + } + explicit CD3DX12_DEPTH_STENCIL_DESC( + BOOL depthEnable, + D3D12_DEPTH_WRITE_MASK depthWriteMask, + D3D12_COMPARISON_FUNC depthFunc, + BOOL stencilEnable, + UINT8 stencilReadMask, + UINT8 stencilWriteMask, + D3D12_STENCIL_OP frontStencilFailOp, + D3D12_STENCIL_OP frontStencilDepthFailOp, + D3D12_STENCIL_OP frontStencilPassOp, + D3D12_COMPARISON_FUNC frontStencilFunc, + D3D12_STENCIL_OP backStencilFailOp, + D3D12_STENCIL_OP backStencilDepthFailOp, + D3D12_STENCIL_OP backStencilPassOp, + D3D12_COMPARISON_FUNC backStencilFunc ) + { + DepthEnable = depthEnable; + DepthWriteMask = depthWriteMask; + DepthFunc = depthFunc; + StencilEnable = stencilEnable; + StencilReadMask = stencilReadMask; + StencilWriteMask = stencilWriteMask; + FrontFace.StencilFailOp = frontStencilFailOp; + FrontFace.StencilDepthFailOp = frontStencilDepthFailOp; + FrontFace.StencilPassOp = frontStencilPassOp; + FrontFace.StencilFunc = frontStencilFunc; + BackFace.StencilFailOp = backStencilFailOp; + BackFace.StencilDepthFailOp = backStencilDepthFailOp; + BackFace.StencilPassOp = backStencilPassOp; + BackFace.StencilFunc = backStencilFunc; + } + ~CD3DX12_DEPTH_STENCIL_DESC() {} + operator const D3D12_DEPTH_STENCIL_DESC&() const { return *this; } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_BLEND_DESC : public D3D12_BLEND_DESC +{ + CD3DX12_BLEND_DESC() + {} + explicit CD3DX12_BLEND_DESC( const D3D12_BLEND_DESC& o ) : + D3D12_BLEND_DESC( o ) + {} + explicit CD3DX12_BLEND_DESC( CD3DX12_DEFAULT ) + { + AlphaToCoverageEnable = FALSE; + IndependentBlendEnable = FALSE; + const D3D12_RENDER_TARGET_BLEND_DESC defaultRenderTargetBlendDesc = + { + FALSE,FALSE, + D3D12_BLEND_ONE, D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD, + D3D12_BLEND_ONE, D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD, + D3D12_LOGIC_OP_NOOP, + D3D12_COLOR_WRITE_ENABLE_ALL, + }; + for (UINT i = 0; i < D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT; ++i) + RenderTarget[ i ] = defaultRenderTargetBlendDesc; + } + ~CD3DX12_BLEND_DESC() {} + operator const D3D12_BLEND_DESC&() const { return *this; } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_RASTERIZER_DESC : public D3D12_RASTERIZER_DESC +{ + CD3DX12_RASTERIZER_DESC() + {} + explicit CD3DX12_RASTERIZER_DESC( const D3D12_RASTERIZER_DESC& o ) : + D3D12_RASTERIZER_DESC( o ) + {} + explicit CD3DX12_RASTERIZER_DESC( CD3DX12_DEFAULT ) + { + FillMode = D3D12_FILL_MODE_SOLID; + CullMode = D3D12_CULL_MODE_BACK; + FrontCounterClockwise = FALSE; + DepthBias = D3D12_DEFAULT_DEPTH_BIAS; + DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP; + SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS; + DepthClipEnable = TRUE; + MultisampleEnable = FALSE; + AntialiasedLineEnable = FALSE; + ForcedSampleCount = 0; + ConservativeRaster = D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF; + } + explicit CD3DX12_RASTERIZER_DESC( + D3D12_FILL_MODE fillMode, + D3D12_CULL_MODE cullMode, + BOOL frontCounterClockwise, + INT depthBias, + FLOAT depthBiasClamp, + FLOAT slopeScaledDepthBias, + BOOL depthClipEnable, + BOOL multisampleEnable, + BOOL antialiasedLineEnable, + UINT forcedSampleCount, + D3D12_CONSERVATIVE_RASTERIZATION_MODE conservativeRaster) + { + FillMode = fillMode; + CullMode = cullMode; + FrontCounterClockwise = frontCounterClockwise; + DepthBias = depthBias; + DepthBiasClamp = depthBiasClamp; + SlopeScaledDepthBias = slopeScaledDepthBias; + DepthClipEnable = depthClipEnable; + MultisampleEnable = multisampleEnable; + AntialiasedLineEnable = antialiasedLineEnable; + ForcedSampleCount = forcedSampleCount; + ConservativeRaster = conservativeRaster; + } + ~CD3DX12_RASTERIZER_DESC() {} + operator const D3D12_RASTERIZER_DESC&() const { return *this; } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_RESOURCE_ALLOCATION_INFO : public D3D12_RESOURCE_ALLOCATION_INFO +{ + CD3DX12_RESOURCE_ALLOCATION_INFO() + {} + explicit CD3DX12_RESOURCE_ALLOCATION_INFO( const D3D12_RESOURCE_ALLOCATION_INFO& o ) : + D3D12_RESOURCE_ALLOCATION_INFO( o ) + {} + CD3DX12_RESOURCE_ALLOCATION_INFO( + UINT64 size, + UINT64 alignment ) + { + SizeInBytes = size; + Alignment = alignment; + } + operator const D3D12_RESOURCE_ALLOCATION_INFO&() const { return *this; } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_HEAP_PROPERTIES : public D3D12_HEAP_PROPERTIES +{ + CD3DX12_HEAP_PROPERTIES() + {} + explicit CD3DX12_HEAP_PROPERTIES(const D3D12_HEAP_PROPERTIES &o) : + D3D12_HEAP_PROPERTIES(o) + {} + CD3DX12_HEAP_PROPERTIES( + D3D12_CPU_PAGE_PROPERTY cpuPageProperty, + D3D12_MEMORY_POOL memoryPoolPreference, + UINT creationNodeMask = 1, + UINT nodeMask = 1 ) + { + Type = D3D12_HEAP_TYPE_CUSTOM; + CPUPageProperty = cpuPageProperty; + MemoryPoolPreference = memoryPoolPreference; + CreationNodeMask = creationNodeMask; + VisibleNodeMask = nodeMask; + } + explicit CD3DX12_HEAP_PROPERTIES( + D3D12_HEAP_TYPE type, + UINT creationNodeMask = 1, + UINT nodeMask = 1 ) + { + Type = type; + CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + CreationNodeMask = creationNodeMask; + VisibleNodeMask = nodeMask; + } + operator const D3D12_HEAP_PROPERTIES&() const { return *this; } + bool IsCPUAccessible() const + { + return Type == D3D12_HEAP_TYPE_UPLOAD || Type == D3D12_HEAP_TYPE_READBACK || (Type == D3D12_HEAP_TYPE_CUSTOM && + (CPUPageProperty == D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE || CPUPageProperty == D3D12_CPU_PAGE_PROPERTY_WRITE_BACK)); + } +}; +inline bool operator==( const D3D12_HEAP_PROPERTIES& l, const D3D12_HEAP_PROPERTIES& r ) +{ + return l.Type == r.Type && l.CPUPageProperty == r.CPUPageProperty && + l.MemoryPoolPreference == r.MemoryPoolPreference && + l.CreationNodeMask == r.CreationNodeMask && + l.VisibleNodeMask == r.VisibleNodeMask; +} +inline bool operator!=( const D3D12_HEAP_PROPERTIES& l, const D3D12_HEAP_PROPERTIES& r ) +{ return !( l == r ); } + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_HEAP_DESC : public D3D12_HEAP_DESC +{ + CD3DX12_HEAP_DESC() + {} + explicit CD3DX12_HEAP_DESC(const D3D12_HEAP_DESC &o) : + D3D12_HEAP_DESC(o) + {} + CD3DX12_HEAP_DESC( + UINT64 size, + D3D12_HEAP_PROPERTIES properties, + UINT64 alignment = 0, + D3D12_HEAP_FLAGS flags = D3D12_HEAP_FLAG_NONE ) + { + SizeInBytes = size; + Properties = properties; + Alignment = alignment; + Flags = flags; + } + CD3DX12_HEAP_DESC( + UINT64 size, + D3D12_HEAP_TYPE type, + UINT64 alignment = 0, + D3D12_HEAP_FLAGS flags = D3D12_HEAP_FLAG_NONE ) + { + SizeInBytes = size; + Properties = CD3DX12_HEAP_PROPERTIES( type ); + Alignment = alignment; + Flags = flags; + } + CD3DX12_HEAP_DESC( + UINT64 size, + D3D12_CPU_PAGE_PROPERTY cpuPageProperty, + D3D12_MEMORY_POOL memoryPoolPreference, + UINT64 alignment = 0, + D3D12_HEAP_FLAGS flags = D3D12_HEAP_FLAG_NONE ) + { + SizeInBytes = size; + Properties = CD3DX12_HEAP_PROPERTIES( cpuPageProperty, memoryPoolPreference ); + Alignment = alignment; + Flags = flags; + } + CD3DX12_HEAP_DESC( + const D3D12_RESOURCE_ALLOCATION_INFO& resAllocInfo, + D3D12_HEAP_PROPERTIES properties, + D3D12_HEAP_FLAGS flags = D3D12_HEAP_FLAG_NONE ) + { + SizeInBytes = resAllocInfo.SizeInBytes; + Properties = properties; + Alignment = resAllocInfo.Alignment; + Flags = flags; + } + CD3DX12_HEAP_DESC( + const D3D12_RESOURCE_ALLOCATION_INFO& resAllocInfo, + D3D12_HEAP_TYPE type, + D3D12_HEAP_FLAGS flags = D3D12_HEAP_FLAG_NONE ) + { + SizeInBytes = resAllocInfo.SizeInBytes; + Properties = CD3DX12_HEAP_PROPERTIES( type ); + Alignment = resAllocInfo.Alignment; + Flags = flags; + } + CD3DX12_HEAP_DESC( + const D3D12_RESOURCE_ALLOCATION_INFO& resAllocInfo, + D3D12_CPU_PAGE_PROPERTY cpuPageProperty, + D3D12_MEMORY_POOL memoryPoolPreference, + D3D12_HEAP_FLAGS flags = D3D12_HEAP_FLAG_NONE ) + { + SizeInBytes = resAllocInfo.SizeInBytes; + Properties = CD3DX12_HEAP_PROPERTIES( cpuPageProperty, memoryPoolPreference ); + Alignment = resAllocInfo.Alignment; + Flags = flags; + } + operator const D3D12_HEAP_DESC&() const { return *this; } + bool IsCPUAccessible() const + { return static_cast< const CD3DX12_HEAP_PROPERTIES* >( &Properties )->IsCPUAccessible(); } +}; +inline bool operator==( const D3D12_HEAP_DESC& l, const D3D12_HEAP_DESC& r ) +{ + return l.SizeInBytes == r.SizeInBytes && + l.Properties == r.Properties && + l.Alignment == r.Alignment && + l.Flags == r.Flags; +} +inline bool operator!=( const D3D12_HEAP_DESC& l, const D3D12_HEAP_DESC& r ) +{ return !( l == r ); } + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_CLEAR_VALUE : public D3D12_CLEAR_VALUE +{ + CD3DX12_CLEAR_VALUE() + {} + explicit CD3DX12_CLEAR_VALUE(const D3D12_CLEAR_VALUE &o) : + D3D12_CLEAR_VALUE(o) + {} + CD3DX12_CLEAR_VALUE( + DXGI_FORMAT format, + const FLOAT color[4] ) + { + Format = format; + memcpy( Color, color, sizeof( Color ) ); + } + CD3DX12_CLEAR_VALUE( + DXGI_FORMAT format, + FLOAT depth, + UINT8 stencil ) + { + Format = format; + /* Use memcpy to preserve NAN values */ + memcpy( &DepthStencil.Depth, &depth, sizeof( depth ) ); + DepthStencil.Stencil = stencil; + } + operator const D3D12_CLEAR_VALUE&() const { return *this; } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_RANGE : public D3D12_RANGE +{ + CD3DX12_RANGE() + {} + explicit CD3DX12_RANGE(const D3D12_RANGE &o) : + D3D12_RANGE(o) + {} + CD3DX12_RANGE( + SIZE_T begin, + SIZE_T end ) + { + Begin = begin; + End = end; + } + operator const D3D12_RANGE&() const { return *this; } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_SHADER_BYTECODE : public D3D12_SHADER_BYTECODE +{ + CD3DX12_SHADER_BYTECODE() + {} + explicit CD3DX12_SHADER_BYTECODE(const D3D12_SHADER_BYTECODE &o) : + D3D12_SHADER_BYTECODE(o) + {} + CD3DX12_SHADER_BYTECODE( + ID3DBlob* pShaderBlob ) + { + pShaderBytecode = pShaderBlob->GetBufferPointer(); + BytecodeLength = pShaderBlob->GetBufferSize(); + } + CD3DX12_SHADER_BYTECODE( + void* _pShaderBytecode, + SIZE_T bytecodeLength ) + { + pShaderBytecode = _pShaderBytecode; + BytecodeLength = bytecodeLength; + } + operator const D3D12_SHADER_BYTECODE&() const { return *this; } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_TILED_RESOURCE_COORDINATE : public D3D12_TILED_RESOURCE_COORDINATE +{ + CD3DX12_TILED_RESOURCE_COORDINATE() + {} + explicit CD3DX12_TILED_RESOURCE_COORDINATE(const D3D12_TILED_RESOURCE_COORDINATE &o) : + D3D12_TILED_RESOURCE_COORDINATE(o) + {} + CD3DX12_TILED_RESOURCE_COORDINATE( + UINT x, + UINT y, + UINT z, + UINT subresource ) + { + X = x; + Y = y; + Z = z; + Subresource = subresource; + } + operator const D3D12_TILED_RESOURCE_COORDINATE&() const { return *this; } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_TILE_REGION_SIZE : public D3D12_TILE_REGION_SIZE +{ + CD3DX12_TILE_REGION_SIZE() + {} + explicit CD3DX12_TILE_REGION_SIZE(const D3D12_TILE_REGION_SIZE &o) : + D3D12_TILE_REGION_SIZE(o) + {} + CD3DX12_TILE_REGION_SIZE( + UINT numTiles, + BOOL useBox, + UINT width, + UINT16 height, + UINT16 depth ) + { + NumTiles = numTiles; + UseBox = useBox; + Width = width; + Height = height; + Depth = depth; + } + operator const D3D12_TILE_REGION_SIZE&() const { return *this; } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_SUBRESOURCE_TILING : public D3D12_SUBRESOURCE_TILING +{ + CD3DX12_SUBRESOURCE_TILING() + {} + explicit CD3DX12_SUBRESOURCE_TILING(const D3D12_SUBRESOURCE_TILING &o) : + D3D12_SUBRESOURCE_TILING(o) + {} + CD3DX12_SUBRESOURCE_TILING( + UINT widthInTiles, + UINT16 heightInTiles, + UINT16 depthInTiles, + UINT startTileIndexInOverallResource ) + { + WidthInTiles = widthInTiles; + HeightInTiles = heightInTiles; + DepthInTiles = depthInTiles; + StartTileIndexInOverallResource = startTileIndexInOverallResource; + } + operator const D3D12_SUBRESOURCE_TILING&() const { return *this; } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_TILE_SHAPE : public D3D12_TILE_SHAPE +{ + CD3DX12_TILE_SHAPE() + {} + explicit CD3DX12_TILE_SHAPE(const D3D12_TILE_SHAPE &o) : + D3D12_TILE_SHAPE(o) + {} + CD3DX12_TILE_SHAPE( + UINT widthInTexels, + UINT heightInTexels, + UINT depthInTexels ) + { + WidthInTexels = widthInTexels; + HeightInTexels = heightInTexels; + DepthInTexels = depthInTexels; + } + operator const D3D12_TILE_SHAPE&() const { return *this; } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_RESOURCE_BARRIER : public D3D12_RESOURCE_BARRIER +{ + CD3DX12_RESOURCE_BARRIER() + {} + explicit CD3DX12_RESOURCE_BARRIER(const D3D12_RESOURCE_BARRIER &o) : + D3D12_RESOURCE_BARRIER(o) + {} + static inline CD3DX12_RESOURCE_BARRIER Transition( + _In_ ID3D12Resource* pResource, + D3D12_RESOURCE_STATES stateBefore, + D3D12_RESOURCE_STATES stateAfter, + UINT subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, + D3D12_RESOURCE_BARRIER_FLAGS flags = D3D12_RESOURCE_BARRIER_FLAG_NONE) + { + CD3DX12_RESOURCE_BARRIER result; + ZeroMemory(&result, sizeof(result)); + D3D12_RESOURCE_BARRIER &barrier = result; + result.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + result.Flags = flags; + barrier.Transition.pResource = pResource; + barrier.Transition.StateBefore = stateBefore; + barrier.Transition.StateAfter = stateAfter; + barrier.Transition.Subresource = subresource; + return result; + } + static inline CD3DX12_RESOURCE_BARRIER Aliasing( + _In_ ID3D12Resource* pResourceBefore, + _In_ ID3D12Resource* pResourceAfter) + { + CD3DX12_RESOURCE_BARRIER result; + ZeroMemory(&result, sizeof(result)); + D3D12_RESOURCE_BARRIER &barrier = result; + result.Type = D3D12_RESOURCE_BARRIER_TYPE_ALIASING; + barrier.Aliasing.pResourceBefore = pResourceBefore; + barrier.Aliasing.pResourceAfter = pResourceAfter; + return result; + } + static inline CD3DX12_RESOURCE_BARRIER UAV( + _In_ ID3D12Resource* pResource) + { + CD3DX12_RESOURCE_BARRIER result; + ZeroMemory(&result, sizeof(result)); + D3D12_RESOURCE_BARRIER &barrier = result; + result.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV; + barrier.UAV.pResource = pResource; + return result; + } + operator const D3D12_RESOURCE_BARRIER&() const { return *this; } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_PACKED_MIP_INFO : public D3D12_PACKED_MIP_INFO +{ + CD3DX12_PACKED_MIP_INFO() + {} + explicit CD3DX12_PACKED_MIP_INFO(const D3D12_PACKED_MIP_INFO &o) : + D3D12_PACKED_MIP_INFO(o) + {} + CD3DX12_PACKED_MIP_INFO( + UINT8 numStandardMips, + UINT8 numPackedMips, + UINT numTilesForPackedMips, + UINT startTileIndexInOverallResource ) + { + NumStandardMips = numStandardMips; + NumPackedMips = numPackedMips; + NumTilesForPackedMips = numTilesForPackedMips; + StartTileIndexInOverallResource = startTileIndexInOverallResource; + } + operator const D3D12_PACKED_MIP_INFO&() const { return *this; } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_SUBRESOURCE_FOOTPRINT : public D3D12_SUBRESOURCE_FOOTPRINT +{ + CD3DX12_SUBRESOURCE_FOOTPRINT() + {} + explicit CD3DX12_SUBRESOURCE_FOOTPRINT(const D3D12_SUBRESOURCE_FOOTPRINT &o) : + D3D12_SUBRESOURCE_FOOTPRINT(o) + {} + CD3DX12_SUBRESOURCE_FOOTPRINT( + DXGI_FORMAT format, + UINT width, + UINT height, + UINT depth, + UINT rowPitch ) + { + Format = format; + Width = width; + Height = height; + Depth = depth; + RowPitch = rowPitch; + } + explicit CD3DX12_SUBRESOURCE_FOOTPRINT( + const D3D12_RESOURCE_DESC& resDesc, + UINT rowPitch ) + { + Format = resDesc.Format; + Width = UINT( resDesc.Width ); + Height = resDesc.Height; + Depth = (resDesc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D ? resDesc.DepthOrArraySize : 1); + RowPitch = rowPitch; + } + operator const D3D12_SUBRESOURCE_FOOTPRINT&() const { return *this; } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_TEXTURE_COPY_LOCATION : public D3D12_TEXTURE_COPY_LOCATION +{ + CD3DX12_TEXTURE_COPY_LOCATION() + {} + explicit CD3DX12_TEXTURE_COPY_LOCATION(const D3D12_TEXTURE_COPY_LOCATION &o) : + D3D12_TEXTURE_COPY_LOCATION(o) + {} + CD3DX12_TEXTURE_COPY_LOCATION(ID3D12Resource* pRes) { pResource = pRes; } + CD3DX12_TEXTURE_COPY_LOCATION(ID3D12Resource* pRes, D3D12_PLACED_SUBRESOURCE_FOOTPRINT const& Footprint) + { + pResource = pRes; + Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + PlacedFootprint = Footprint; + } + CD3DX12_TEXTURE_COPY_LOCATION(ID3D12Resource* pRes, UINT Sub) + { + pResource = pRes; + Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + SubresourceIndex = Sub; + } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_DESCRIPTOR_RANGE : public D3D12_DESCRIPTOR_RANGE +{ + CD3DX12_DESCRIPTOR_RANGE() { } + explicit CD3DX12_DESCRIPTOR_RANGE(const D3D12_DESCRIPTOR_RANGE &o) : + D3D12_DESCRIPTOR_RANGE(o) + {} + CD3DX12_DESCRIPTOR_RANGE( + D3D12_DESCRIPTOR_RANGE_TYPE rangeType, + UINT numDescriptors, + UINT baseShaderRegister, + UINT registerSpace = 0, + UINT offsetInDescriptorsFromTableStart = + D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND) + { + Init(rangeType, numDescriptors, baseShaderRegister, registerSpace, offsetInDescriptorsFromTableStart); + } + + inline void Init( + D3D12_DESCRIPTOR_RANGE_TYPE rangeType, + UINT numDescriptors, + UINT baseShaderRegister, + UINT registerSpace = 0, + UINT offsetInDescriptorsFromTableStart = + D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND) + { + Init(*this, rangeType, numDescriptors, baseShaderRegister, registerSpace, offsetInDescriptorsFromTableStart); + } + + static inline void Init( + _Out_ D3D12_DESCRIPTOR_RANGE &range, + D3D12_DESCRIPTOR_RANGE_TYPE rangeType, + UINT numDescriptors, + UINT baseShaderRegister, + UINT registerSpace = 0, + UINT offsetInDescriptorsFromTableStart = + D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND) + { + range.RangeType = rangeType; + range.NumDescriptors = numDescriptors; + range.BaseShaderRegister = baseShaderRegister; + range.RegisterSpace = registerSpace; + range.OffsetInDescriptorsFromTableStart = offsetInDescriptorsFromTableStart; + } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_ROOT_DESCRIPTOR_TABLE : public D3D12_ROOT_DESCRIPTOR_TABLE +{ + CD3DX12_ROOT_DESCRIPTOR_TABLE() {} + explicit CD3DX12_ROOT_DESCRIPTOR_TABLE(const D3D12_ROOT_DESCRIPTOR_TABLE &o) : + D3D12_ROOT_DESCRIPTOR_TABLE(o) + {} + CD3DX12_ROOT_DESCRIPTOR_TABLE( + UINT numDescriptorRanges, + _In_reads_opt_(numDescriptorRanges) const D3D12_DESCRIPTOR_RANGE* _pDescriptorRanges) + { + Init(numDescriptorRanges, _pDescriptorRanges); + } + + inline void Init( + UINT numDescriptorRanges, + _In_reads_opt_(numDescriptorRanges) const D3D12_DESCRIPTOR_RANGE* _pDescriptorRanges) + { + Init(*this, numDescriptorRanges, _pDescriptorRanges); + } + + static inline void Init( + _Out_ D3D12_ROOT_DESCRIPTOR_TABLE &rootDescriptorTable, + UINT numDescriptorRanges, + _In_reads_opt_(numDescriptorRanges) const D3D12_DESCRIPTOR_RANGE* _pDescriptorRanges) + { + rootDescriptorTable.NumDescriptorRanges = numDescriptorRanges; + rootDescriptorTable.pDescriptorRanges = _pDescriptorRanges; + } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_ROOT_CONSTANTS : public D3D12_ROOT_CONSTANTS +{ + CD3DX12_ROOT_CONSTANTS() {} + explicit CD3DX12_ROOT_CONSTANTS(const D3D12_ROOT_CONSTANTS &o) : + D3D12_ROOT_CONSTANTS(o) + {} + CD3DX12_ROOT_CONSTANTS( + UINT num32BitValues, + UINT shaderRegister, + UINT registerSpace = 0) + { + Init(num32BitValues, shaderRegister, registerSpace); + } + + inline void Init( + UINT num32BitValues, + UINT shaderRegister, + UINT registerSpace = 0) + { + Init(*this, num32BitValues, shaderRegister, registerSpace); + } + + static inline void Init( + _Out_ D3D12_ROOT_CONSTANTS &rootConstants, + UINT num32BitValues, + UINT shaderRegister, + UINT registerSpace = 0) + { + rootConstants.Num32BitValues = num32BitValues; + rootConstants.ShaderRegister = shaderRegister; + rootConstants.RegisterSpace = registerSpace; + } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_ROOT_DESCRIPTOR : public D3D12_ROOT_DESCRIPTOR +{ + CD3DX12_ROOT_DESCRIPTOR() {} + explicit CD3DX12_ROOT_DESCRIPTOR(const D3D12_ROOT_DESCRIPTOR &o) : + D3D12_ROOT_DESCRIPTOR(o) + {} + CD3DX12_ROOT_DESCRIPTOR( + UINT shaderRegister, + UINT registerSpace = 0) + { + Init(shaderRegister, registerSpace); + } + + inline void Init( + UINT shaderRegister, + UINT registerSpace = 0) + { + Init(*this, shaderRegister, registerSpace); + } + + static inline void Init(_Out_ D3D12_ROOT_DESCRIPTOR &table, UINT shaderRegister, UINT registerSpace = 0) + { + table.ShaderRegister = shaderRegister; + table.RegisterSpace = registerSpace; + } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_ROOT_PARAMETER : public D3D12_ROOT_PARAMETER +{ + CD3DX12_ROOT_PARAMETER() {} + explicit CD3DX12_ROOT_PARAMETER(const D3D12_ROOT_PARAMETER &o) : + D3D12_ROOT_PARAMETER(o) + {} + + static inline void InitAsDescriptorTable( + _Out_ D3D12_ROOT_PARAMETER &rootParam, + UINT numDescriptorRanges, + _In_reads_(numDescriptorRanges) const D3D12_DESCRIPTOR_RANGE* pDescriptorRanges, + D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) + { + rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + rootParam.ShaderVisibility = visibility; + CD3DX12_ROOT_DESCRIPTOR_TABLE::Init(rootParam.DescriptorTable, numDescriptorRanges, pDescriptorRanges); + } + + static inline void InitAsConstants( + _Out_ D3D12_ROOT_PARAMETER &rootParam, + UINT num32BitValues, + UINT shaderRegister, + UINT registerSpace = 0, + D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) + { + rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; + rootParam.ShaderVisibility = visibility; + CD3DX12_ROOT_CONSTANTS::Init(rootParam.Constants, num32BitValues, shaderRegister, registerSpace); + } + + static inline void InitAsConstantBufferView( + _Out_ D3D12_ROOT_PARAMETER &rootParam, + UINT shaderRegister, + UINT registerSpace = 0, + D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) + { + rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; + rootParam.ShaderVisibility = visibility; + CD3DX12_ROOT_DESCRIPTOR::Init(rootParam.Descriptor, shaderRegister, registerSpace); + } + + static inline void InitAsShaderResourceView( + _Out_ D3D12_ROOT_PARAMETER &rootParam, + UINT shaderRegister, + UINT registerSpace = 0, + D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) + { + rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_SRV; + rootParam.ShaderVisibility = visibility; + CD3DX12_ROOT_DESCRIPTOR::Init(rootParam.Descriptor, shaderRegister, registerSpace); + } + + static inline void InitAsUnorderedAccessView( + _Out_ D3D12_ROOT_PARAMETER &rootParam, + UINT shaderRegister, + UINT registerSpace = 0, + D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) + { + rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_UAV; + rootParam.ShaderVisibility = visibility; + CD3DX12_ROOT_DESCRIPTOR::Init(rootParam.Descriptor, shaderRegister, registerSpace); + } + + inline void InitAsDescriptorTable( + UINT numDescriptorRanges, + _In_reads_(numDescriptorRanges) const D3D12_DESCRIPTOR_RANGE* pDescriptorRanges, + D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) + { + InitAsDescriptorTable(*this, numDescriptorRanges, pDescriptorRanges, visibility); + } + + inline void InitAsConstants( + UINT num32BitValues, + UINT shaderRegister, + UINT registerSpace = 0, + D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) + { + InitAsConstants(*this, num32BitValues, shaderRegister, registerSpace, visibility); + } + + inline void InitAsConstantBufferView( + UINT shaderRegister, + UINT registerSpace = 0, + D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) + { + InitAsConstantBufferView(*this, shaderRegister, registerSpace, visibility); + } + + inline void InitAsShaderResourceView( + UINT shaderRegister, + UINT registerSpace = 0, + D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) + { + InitAsShaderResourceView(*this, shaderRegister, registerSpace, visibility); + } + + inline void InitAsUnorderedAccessView( + UINT shaderRegister, + UINT registerSpace = 0, + D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) + { + InitAsUnorderedAccessView(*this, shaderRegister, registerSpace, visibility); + } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_STATIC_SAMPLER_DESC : public D3D12_STATIC_SAMPLER_DESC +{ + CD3DX12_STATIC_SAMPLER_DESC() {} + explicit CD3DX12_STATIC_SAMPLER_DESC(const D3D12_STATIC_SAMPLER_DESC &o) : + D3D12_STATIC_SAMPLER_DESC(o) + {} + CD3DX12_STATIC_SAMPLER_DESC( + UINT shaderRegister, + D3D12_FILTER filter = D3D12_FILTER_ANISOTROPIC, + D3D12_TEXTURE_ADDRESS_MODE addressU = D3D12_TEXTURE_ADDRESS_MODE_WRAP, + D3D12_TEXTURE_ADDRESS_MODE addressV = D3D12_TEXTURE_ADDRESS_MODE_WRAP, + D3D12_TEXTURE_ADDRESS_MODE addressW = D3D12_TEXTURE_ADDRESS_MODE_WRAP, + FLOAT mipLODBias = 0, + UINT maxAnisotropy = 16, + D3D12_COMPARISON_FUNC comparisonFunc = D3D12_COMPARISON_FUNC_LESS_EQUAL, + D3D12_STATIC_BORDER_COLOR borderColor = D3D12_STATIC_BORDER_COLOR_OPAQUE_WHITE, + FLOAT minLOD = 0.f, + FLOAT maxLOD = D3D12_FLOAT32_MAX, + D3D12_SHADER_VISIBILITY shaderVisibility = D3D12_SHADER_VISIBILITY_ALL, + UINT registerSpace = 0) + { + Init( + shaderRegister, + filter, + addressU, + addressV, + addressW, + mipLODBias, + maxAnisotropy, + comparisonFunc, + borderColor, + minLOD, + maxLOD, + shaderVisibility, + registerSpace); + } + + static inline void Init( + _Out_ D3D12_STATIC_SAMPLER_DESC &samplerDesc, + UINT shaderRegister, + D3D12_FILTER filter = D3D12_FILTER_ANISOTROPIC, + D3D12_TEXTURE_ADDRESS_MODE addressU = D3D12_TEXTURE_ADDRESS_MODE_WRAP, + D3D12_TEXTURE_ADDRESS_MODE addressV = D3D12_TEXTURE_ADDRESS_MODE_WRAP, + D3D12_TEXTURE_ADDRESS_MODE addressW = D3D12_TEXTURE_ADDRESS_MODE_WRAP, + FLOAT mipLODBias = 0, + UINT maxAnisotropy = 16, + D3D12_COMPARISON_FUNC comparisonFunc = D3D12_COMPARISON_FUNC_LESS_EQUAL, + D3D12_STATIC_BORDER_COLOR borderColor = D3D12_STATIC_BORDER_COLOR_OPAQUE_WHITE, + FLOAT minLOD = 0.f, + FLOAT maxLOD = D3D12_FLOAT32_MAX, + D3D12_SHADER_VISIBILITY shaderVisibility = D3D12_SHADER_VISIBILITY_ALL, + UINT registerSpace = 0) + { + samplerDesc.ShaderRegister = shaderRegister; + samplerDesc.Filter = filter; + samplerDesc.AddressU = addressU; + samplerDesc.AddressV = addressV; + samplerDesc.AddressW = addressW; + samplerDesc.MipLODBias = mipLODBias; + samplerDesc.MaxAnisotropy = maxAnisotropy; + samplerDesc.ComparisonFunc = comparisonFunc; + samplerDesc.BorderColor = borderColor; + samplerDesc.MinLOD = minLOD; + samplerDesc.MaxLOD = maxLOD; + samplerDesc.ShaderVisibility = shaderVisibility; + samplerDesc.RegisterSpace = registerSpace; + } + inline void Init( + UINT shaderRegister, + D3D12_FILTER filter = D3D12_FILTER_ANISOTROPIC, + D3D12_TEXTURE_ADDRESS_MODE addressU = D3D12_TEXTURE_ADDRESS_MODE_WRAP, + D3D12_TEXTURE_ADDRESS_MODE addressV = D3D12_TEXTURE_ADDRESS_MODE_WRAP, + D3D12_TEXTURE_ADDRESS_MODE addressW = D3D12_TEXTURE_ADDRESS_MODE_WRAP, + FLOAT mipLODBias = 0, + UINT maxAnisotropy = 16, + D3D12_COMPARISON_FUNC comparisonFunc = D3D12_COMPARISON_FUNC_LESS_EQUAL, + D3D12_STATIC_BORDER_COLOR borderColor = D3D12_STATIC_BORDER_COLOR_OPAQUE_WHITE, + FLOAT minLOD = 0.f, + FLOAT maxLOD = D3D12_FLOAT32_MAX, + D3D12_SHADER_VISIBILITY shaderVisibility = D3D12_SHADER_VISIBILITY_ALL, + UINT registerSpace = 0) + { + Init( + *this, + shaderRegister, + filter, + addressU, + addressV, + addressW, + mipLODBias, + maxAnisotropy, + comparisonFunc, + borderColor, + minLOD, + maxLOD, + shaderVisibility, + registerSpace); + } + +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_ROOT_SIGNATURE_DESC : public D3D12_ROOT_SIGNATURE_DESC +{ + CD3DX12_ROOT_SIGNATURE_DESC() {} + explicit CD3DX12_ROOT_SIGNATURE_DESC(const D3D12_ROOT_SIGNATURE_DESC &o) : + D3D12_ROOT_SIGNATURE_DESC(o) + {} + CD3DX12_ROOT_SIGNATURE_DESC( + UINT numParameters, + _In_reads_opt_(numParameters) const D3D12_ROOT_PARAMETER* _pParameters, + UINT numStaticSamplers = 0, + _In_reads_opt_(numStaticSamplers) const D3D12_STATIC_SAMPLER_DESC* _pStaticSamplers = NULL, + D3D12_ROOT_SIGNATURE_FLAGS flags = D3D12_ROOT_SIGNATURE_FLAG_NONE) + { + Init(numParameters, _pParameters, numStaticSamplers, _pStaticSamplers, flags); + } + CD3DX12_ROOT_SIGNATURE_DESC(CD3DX12_DEFAULT) + { + Init(0, NULL, 0, NULL, D3D12_ROOT_SIGNATURE_FLAG_NONE); + } + + inline void Init( + UINT numParameters, + _In_reads_opt_(numParameters) const D3D12_ROOT_PARAMETER* _pParameters, + UINT numStaticSamplers = 0, + _In_reads_opt_(numStaticSamplers) const D3D12_STATIC_SAMPLER_DESC* _pStaticSamplers = NULL, + D3D12_ROOT_SIGNATURE_FLAGS flags = D3D12_ROOT_SIGNATURE_FLAG_NONE) + { + Init(*this, numParameters, _pParameters, numStaticSamplers, _pStaticSamplers, flags); + } + + static inline void Init( + _Out_ D3D12_ROOT_SIGNATURE_DESC &desc, + UINT numParameters, + _In_reads_opt_(numParameters) const D3D12_ROOT_PARAMETER* _pParameters, + UINT numStaticSamplers = 0, + _In_reads_opt_(numStaticSamplers) const D3D12_STATIC_SAMPLER_DESC* _pStaticSamplers = NULL, + D3D12_ROOT_SIGNATURE_FLAGS flags = D3D12_ROOT_SIGNATURE_FLAG_NONE) + { + desc.NumParameters = numParameters; + desc.pParameters = _pParameters; + desc.NumStaticSamplers = numStaticSamplers; + desc.pStaticSamplers = _pStaticSamplers; + desc.Flags = flags; + } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_CPU_DESCRIPTOR_HANDLE : public D3D12_CPU_DESCRIPTOR_HANDLE +{ + CD3DX12_CPU_DESCRIPTOR_HANDLE() {} + explicit CD3DX12_CPU_DESCRIPTOR_HANDLE(const D3D12_CPU_DESCRIPTOR_HANDLE &o) : + D3D12_CPU_DESCRIPTOR_HANDLE(o) + {} + CD3DX12_CPU_DESCRIPTOR_HANDLE(CD3DX12_DEFAULT) { ptr = 0; } + CD3DX12_CPU_DESCRIPTOR_HANDLE(_In_ const D3D12_CPU_DESCRIPTOR_HANDLE &other, INT offsetScaledByIncrementSize) + { + InitOffsetted(other, offsetScaledByIncrementSize); + } + CD3DX12_CPU_DESCRIPTOR_HANDLE(_In_ const D3D12_CPU_DESCRIPTOR_HANDLE &other, INT offsetInDescriptors, UINT descriptorIncrementSize) + { + InitOffsetted(other, offsetInDescriptors, descriptorIncrementSize); + } + CD3DX12_CPU_DESCRIPTOR_HANDLE& Offset(INT offsetInDescriptors, UINT descriptorIncrementSize) + { + ptr += offsetInDescriptors * descriptorIncrementSize; + return *this; + } + CD3DX12_CPU_DESCRIPTOR_HANDLE& Offset(INT offsetScaledByIncrementSize) + { + ptr += offsetScaledByIncrementSize; + return *this; + } + bool operator==(_In_ const D3D12_CPU_DESCRIPTOR_HANDLE& other) const + { + return (ptr == other.ptr); + } + bool operator!=(_In_ const D3D12_CPU_DESCRIPTOR_HANDLE& other) const + { + return (ptr != other.ptr); + } + CD3DX12_CPU_DESCRIPTOR_HANDLE &operator=(const D3D12_CPU_DESCRIPTOR_HANDLE &other) + { + ptr = other.ptr; + return *this; + } + + inline void InitOffsetted(_In_ const D3D12_CPU_DESCRIPTOR_HANDLE &base, INT offsetScaledByIncrementSize) + { + InitOffsetted(*this, base, offsetScaledByIncrementSize); + } + + inline void InitOffsetted(_In_ const D3D12_CPU_DESCRIPTOR_HANDLE &base, INT offsetInDescriptors, UINT descriptorIncrementSize) + { + InitOffsetted(*this, base, offsetInDescriptors, descriptorIncrementSize); + } + + static inline void InitOffsetted(_Out_ D3D12_CPU_DESCRIPTOR_HANDLE &handle, _In_ const D3D12_CPU_DESCRIPTOR_HANDLE &base, INT offsetScaledByIncrementSize) + { + handle.ptr = base.ptr + offsetScaledByIncrementSize; + } + + static inline void InitOffsetted(_Out_ D3D12_CPU_DESCRIPTOR_HANDLE &handle, _In_ const D3D12_CPU_DESCRIPTOR_HANDLE &base, INT offsetInDescriptors, UINT descriptorIncrementSize) + { + handle.ptr = base.ptr + offsetInDescriptors * descriptorIncrementSize; + } +}; + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_GPU_DESCRIPTOR_HANDLE : public D3D12_GPU_DESCRIPTOR_HANDLE +{ + CD3DX12_GPU_DESCRIPTOR_HANDLE() {} + explicit CD3DX12_GPU_DESCRIPTOR_HANDLE(const D3D12_GPU_DESCRIPTOR_HANDLE &o) : + D3D12_GPU_DESCRIPTOR_HANDLE(o) + {} + CD3DX12_GPU_DESCRIPTOR_HANDLE(CD3DX12_DEFAULT) { ptr = 0; } + CD3DX12_GPU_DESCRIPTOR_HANDLE(_In_ const D3D12_GPU_DESCRIPTOR_HANDLE &other, INT offsetScaledByIncrementSize) + { + InitOffsetted(other, offsetScaledByIncrementSize); + } + CD3DX12_GPU_DESCRIPTOR_HANDLE(_In_ const D3D12_GPU_DESCRIPTOR_HANDLE &other, INT offsetInDescriptors, UINT descriptorIncrementSize) + { + InitOffsetted(other, offsetInDescriptors, descriptorIncrementSize); + } + CD3DX12_GPU_DESCRIPTOR_HANDLE& Offset(INT offsetInDescriptors, UINT descriptorIncrementSize) + { + ptr += offsetInDescriptors * descriptorIncrementSize; + return *this; + } + CD3DX12_GPU_DESCRIPTOR_HANDLE& Offset(INT offsetScaledByIncrementSize) + { + ptr += offsetScaledByIncrementSize; + return *this; + } + inline bool operator==(_In_ const D3D12_GPU_DESCRIPTOR_HANDLE& other) const + { + return (ptr == other.ptr); + } + inline bool operator!=(_In_ const D3D12_GPU_DESCRIPTOR_HANDLE& other) const + { + return (ptr != other.ptr); + } + CD3DX12_GPU_DESCRIPTOR_HANDLE &operator=(const D3D12_GPU_DESCRIPTOR_HANDLE &other) + { + ptr = other.ptr; + return *this; + } + + inline void InitOffsetted(_In_ const D3D12_GPU_DESCRIPTOR_HANDLE &base, INT offsetScaledByIncrementSize) + { + InitOffsetted(*this, base, offsetScaledByIncrementSize); + } + + inline void InitOffsetted(_In_ const D3D12_GPU_DESCRIPTOR_HANDLE &base, INT offsetInDescriptors, UINT descriptorIncrementSize) + { + InitOffsetted(*this, base, offsetInDescriptors, descriptorIncrementSize); + } + + static inline void InitOffsetted(_Out_ D3D12_GPU_DESCRIPTOR_HANDLE &handle, _In_ const D3D12_GPU_DESCRIPTOR_HANDLE &base, INT offsetScaledByIncrementSize) + { + handle.ptr = base.ptr + offsetScaledByIncrementSize; + } + + static inline void InitOffsetted(_Out_ D3D12_GPU_DESCRIPTOR_HANDLE &handle, _In_ const D3D12_GPU_DESCRIPTOR_HANDLE &base, INT offsetInDescriptors, UINT descriptorIncrementSize) + { + handle.ptr = base.ptr + offsetInDescriptors * descriptorIncrementSize; + } +}; + +//------------------------------------------------------------------------------------------------ +inline UINT D3D12CalcSubresource( UINT MipSlice, UINT ArraySlice, UINT PlaneSlice, UINT MipLevels, UINT ArraySize ) +{ + return MipSlice + ArraySlice * MipLevels + PlaneSlice * MipLevels * ArraySize; +} + +//------------------------------------------------------------------------------------------------ +template +inline void D3D12DecomposeSubresource( UINT Subresource, UINT MipLevels, UINT ArraySize, _Out_ T& MipSlice, _Out_ U& ArraySlice, _Out_ V& PlaneSlice ) +{ + MipSlice = static_cast(Subresource % MipLevels); + ArraySlice = static_cast((Subresource / MipLevels) % ArraySize); + PlaneSlice = static_cast(Subresource / (MipLevels * ArraySize)); +} + +//------------------------------------------------------------------------------------------------ +inline UINT8 D3D12GetFormatPlaneCount( + _In_ ID3D12Device* pDevice, + DXGI_FORMAT Format + ) +{ + D3D12_FEATURE_DATA_FORMAT_INFO formatInfo = {Format}; + if (FAILED(pDevice->CheckFeatureSupport(D3D12_FEATURE_FORMAT_INFO, &formatInfo, sizeof(formatInfo)))) + { + return 0; + } + return formatInfo.PlaneCount; +} + +//------------------------------------------------------------------------------------------------ +struct CD3DX12_RESOURCE_DESC : public D3D12_RESOURCE_DESC +{ + CD3DX12_RESOURCE_DESC() + {} + explicit CD3DX12_RESOURCE_DESC( const D3D12_RESOURCE_DESC& o ) : + D3D12_RESOURCE_DESC( o ) + {} + CD3DX12_RESOURCE_DESC( + D3D12_RESOURCE_DIMENSION dimension, + UINT64 alignment, + UINT64 width, + UINT height, + UINT16 depthOrArraySize, + UINT16 mipLevels, + DXGI_FORMAT format, + UINT sampleCount, + UINT sampleQuality, + D3D12_TEXTURE_LAYOUT layout, + D3D12_RESOURCE_FLAGS flags ) + { + Dimension = dimension; + Alignment = alignment; + Width = width; + Height = height; + DepthOrArraySize = depthOrArraySize; + MipLevels = mipLevels; + Format = format; + SampleDesc.Count = sampleCount; + SampleDesc.Quality = sampleQuality; + Layout = layout; + Flags = flags; + } + static inline CD3DX12_RESOURCE_DESC Buffer( + const D3D12_RESOURCE_ALLOCATION_INFO& resAllocInfo, + D3D12_RESOURCE_FLAGS flags = D3D12_RESOURCE_FLAG_NONE ) + { + return CD3DX12_RESOURCE_DESC( D3D12_RESOURCE_DIMENSION_BUFFER, resAllocInfo.Alignment, resAllocInfo.SizeInBytes, + 1, 1, 1, DXGI_FORMAT_UNKNOWN, 1, 0, D3D12_TEXTURE_LAYOUT_ROW_MAJOR, flags ); + } + static inline CD3DX12_RESOURCE_DESC Buffer( + UINT64 width, + D3D12_RESOURCE_FLAGS flags = D3D12_RESOURCE_FLAG_NONE, + UINT64 alignment = 0 ) + { + return CD3DX12_RESOURCE_DESC( D3D12_RESOURCE_DIMENSION_BUFFER, alignment, width, 1, 1, 1, + DXGI_FORMAT_UNKNOWN, 1, 0, D3D12_TEXTURE_LAYOUT_ROW_MAJOR, flags ); + } + static inline CD3DX12_RESOURCE_DESC Tex1D( + DXGI_FORMAT format, + UINT64 width, + UINT16 arraySize = 1, + UINT16 mipLevels = 0, + D3D12_RESOURCE_FLAGS flags = D3D12_RESOURCE_FLAG_NONE, + D3D12_TEXTURE_LAYOUT layout = D3D12_TEXTURE_LAYOUT_UNKNOWN, + UINT64 alignment = 0 ) + { + return CD3DX12_RESOURCE_DESC( D3D12_RESOURCE_DIMENSION_TEXTURE1D, alignment, width, 1, arraySize, + mipLevels, format, 1, 0, layout, flags ); + } + static inline CD3DX12_RESOURCE_DESC Tex2D( + DXGI_FORMAT format, + UINT64 width, + UINT height, + UINT16 arraySize = 1, + UINT16 mipLevels = 0, + UINT sampleCount = 1, + UINT sampleQuality = 0, + D3D12_RESOURCE_FLAGS flags = D3D12_RESOURCE_FLAG_NONE, + D3D12_TEXTURE_LAYOUT layout = D3D12_TEXTURE_LAYOUT_UNKNOWN, + UINT64 alignment = 0 ) + { + return CD3DX12_RESOURCE_DESC( D3D12_RESOURCE_DIMENSION_TEXTURE2D, alignment, width, height, arraySize, + mipLevels, format, sampleCount, sampleQuality, layout, flags ); + } + static inline CD3DX12_RESOURCE_DESC Tex3D( + DXGI_FORMAT format, + UINT64 width, + UINT height, + UINT16 depth, + UINT16 mipLevels = 0, + D3D12_RESOURCE_FLAGS flags = D3D12_RESOURCE_FLAG_NONE, + D3D12_TEXTURE_LAYOUT layout = D3D12_TEXTURE_LAYOUT_UNKNOWN, + UINT64 alignment = 0 ) + { + return CD3DX12_RESOURCE_DESC( D3D12_RESOURCE_DIMENSION_TEXTURE3D, alignment, width, height, depth, + mipLevels, format, 1, 0, layout, flags ); + } + inline UINT16 Depth() const + { return (Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D ? DepthOrArraySize : 1); } + inline UINT16 ArraySize() const + { return (Dimension != D3D12_RESOURCE_DIMENSION_TEXTURE3D ? DepthOrArraySize : 1); } + inline UINT8 PlaneCount(_In_ ID3D12Device* pDevice) const + { return D3D12GetFormatPlaneCount(pDevice, Format); } + inline UINT Subresources(_In_ ID3D12Device* pDevice) const + { return MipLevels * ArraySize() * PlaneCount(pDevice); } + inline UINT CalcSubresource(UINT MipSlice, UINT ArraySlice, UINT PlaneSlice) + { return D3D12CalcSubresource(MipSlice, ArraySlice, PlaneSlice, MipLevels, ArraySize()); } + operator const D3D12_RESOURCE_DESC&() const { return *this; } +}; +inline bool operator==( const D3D12_RESOURCE_DESC& l, const D3D12_RESOURCE_DESC& r ) +{ + return l.Dimension == r.Dimension && + l.Alignment == r.Alignment && + l.Width == r.Width && + l.Height == r.Height && + l.DepthOrArraySize == r.DepthOrArraySize && + l.MipLevels == r.MipLevels && + l.Format == r.Format && + l.SampleDesc.Count == r.SampleDesc.Count && + l.SampleDesc.Quality == r.SampleDesc.Quality && + l.Layout == r.Layout && + l.Flags == r.Flags; +} +inline bool operator!=( const D3D12_RESOURCE_DESC& l, const D3D12_RESOURCE_DESC& r ) +{ return !( l == r ); } + +//------------------------------------------------------------------------------------------------ +// Row-by-row memcpy +inline void MemcpySubresource( + _In_ const D3D12_MEMCPY_DEST* pDest, + _In_ const D3D12_SUBRESOURCE_DATA* pSrc, + SIZE_T RowSizeInBytes, + UINT NumRows, + UINT NumSlices) +{ + for (UINT z = 0; z < NumSlices; ++z) + { + BYTE* pDestSlice = reinterpret_cast(pDest->pData) + pDest->SlicePitch * z; + const BYTE* pSrcSlice = reinterpret_cast(pSrc->pData) + pSrc->SlicePitch * z; + for (UINT y = 0; y < NumRows; ++y) + { + memcpy(pDestSlice + pDest->RowPitch * y, + pSrcSlice + pSrc->RowPitch * y, + RowSizeInBytes); + } + } +} + +//------------------------------------------------------------------------------------------------ +// Returns required size of a buffer to be used for data upload +inline UINT64 GetRequiredIntermediateSize( + _In_ ID3D12Resource* pDestinationResource, + _In_range_(0,D3D12_REQ_SUBRESOURCES) UINT FirstSubresource, + _In_range_(0,D3D12_REQ_SUBRESOURCES-FirstSubresource) UINT NumSubresources) +{ + D3D12_RESOURCE_DESC Desc = pDestinationResource->GetDesc(); + UINT64 RequiredSize = 0; + + ID3D12Device* pDevice; + pDestinationResource->GetDevice(__uuidof(*pDevice), reinterpret_cast(&pDevice)); + pDevice->GetCopyableFootprints(&Desc, FirstSubresource, NumSubresources, 0, nullptr, nullptr, nullptr, &RequiredSize); + pDevice->Release(); + + return RequiredSize; +} + +//------------------------------------------------------------------------------------------------ +// All arrays must be populated (e.g. by calling GetCopyableFootprints) +inline UINT64 UpdateSubresources( + _In_ ID3D12GraphicsCommandList* pCmdList, + _In_ ID3D12Resource* pDestinationResource, + _In_ ID3D12Resource* pIntermediate, + _In_range_(0,D3D12_REQ_SUBRESOURCES) UINT FirstSubresource, + _In_range_(0,D3D12_REQ_SUBRESOURCES-FirstSubresource) UINT NumSubresources, + UINT64 RequiredSize, + _In_reads_(NumSubresources) const D3D12_PLACED_SUBRESOURCE_FOOTPRINT* pLayouts, + _In_reads_(NumSubresources) const UINT* pNumRows, + _In_reads_(NumSubresources) const UINT64* pRowSizesInBytes, + _In_reads_(NumSubresources) const D3D12_SUBRESOURCE_DATA* pSrcData) +{ + // Minor validation + D3D12_RESOURCE_DESC IntermediateDesc = pIntermediate->GetDesc(); + D3D12_RESOURCE_DESC DestinationDesc = pDestinationResource->GetDesc(); + if (IntermediateDesc.Dimension != D3D12_RESOURCE_DIMENSION_BUFFER || + IntermediateDesc.Width < RequiredSize + pLayouts[0].Offset || + RequiredSize > (SIZE_T)-1 || + (DestinationDesc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER && + (FirstSubresource != 0 || NumSubresources != 1))) + { + return 0; + } + + BYTE* pData; + HRESULT hr = pIntermediate->Map(0, NULL, reinterpret_cast(&pData)); + if (FAILED(hr)) + { + return 0; + } + + for (UINT i = 0; i < NumSubresources; ++i) + { + if (pRowSizesInBytes[i] > (SIZE_T)-1) return 0; + D3D12_MEMCPY_DEST DestData = { pData + pLayouts[i].Offset, pLayouts[i].Footprint.RowPitch, pLayouts[i].Footprint.RowPitch * pNumRows[i] }; + MemcpySubresource(&DestData, &pSrcData[i], (SIZE_T)pRowSizesInBytes[i], pNumRows[i], pLayouts[i].Footprint.Depth); + } + pIntermediate->Unmap(0, NULL); + + if (DestinationDesc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) + { + CD3DX12_BOX SrcBox( UINT( pLayouts[0].Offset ), UINT( pLayouts[0].Offset + pLayouts[0].Footprint.Width ) ); + pCmdList->CopyBufferRegion( + pDestinationResource, 0, pIntermediate, pLayouts[0].Offset, pLayouts[0].Footprint.Width); + } + else + { + for (UINT i = 0; i < NumSubresources; ++i) + { + CD3DX12_TEXTURE_COPY_LOCATION Dst(pDestinationResource, i + FirstSubresource); + CD3DX12_TEXTURE_COPY_LOCATION Src(pIntermediate, pLayouts[i]); + pCmdList->CopyTextureRegion(&Dst, 0, 0, 0, &Src, nullptr); + } + } + return RequiredSize; +} + +//------------------------------------------------------------------------------------------------ +// Heap-allocating UpdateSubresources implementation +inline UINT64 UpdateSubresources( + _In_ ID3D12GraphicsCommandList* pCmdList, + _In_ ID3D12Resource* pDestinationResource, + _In_ ID3D12Resource* pIntermediate, + UINT64 IntermediateOffset, + _In_range_(0,D3D12_REQ_SUBRESOURCES) UINT FirstSubresource, + _In_range_(0,D3D12_REQ_SUBRESOURCES-FirstSubresource) UINT NumSubresources, + _In_reads_(NumSubresources) D3D12_SUBRESOURCE_DATA* pSrcData) +{ + UINT64 RequiredSize = 0; + UINT64 MemToAlloc = static_cast(sizeof(D3D12_PLACED_SUBRESOURCE_FOOTPRINT) + sizeof(UINT) + sizeof(UINT64)) * NumSubresources; + if (MemToAlloc > SIZE_MAX) + { + return 0; + } + void* pMem = HeapAlloc(GetProcessHeap(), 0, static_cast(MemToAlloc)); + if (pMem == NULL) + { + return 0; + } + D3D12_PLACED_SUBRESOURCE_FOOTPRINT* pLayouts = reinterpret_cast(pMem); + UINT64* pRowSizesInBytes = reinterpret_cast(pLayouts + NumSubresources); + UINT* pNumRows = reinterpret_cast(pRowSizesInBytes + NumSubresources); + + D3D12_RESOURCE_DESC Desc = pDestinationResource->GetDesc(); + ID3D12Device* pDevice; + pDestinationResource->GetDevice(__uuidof(*pDevice), reinterpret_cast(&pDevice)); + pDevice->GetCopyableFootprints(&Desc, FirstSubresource, NumSubresources, IntermediateOffset, pLayouts, pNumRows, pRowSizesInBytes, &RequiredSize); + pDevice->Release(); + + UINT64 Result = UpdateSubresources(pCmdList, pDestinationResource, pIntermediate, FirstSubresource, NumSubresources, RequiredSize, pLayouts, pNumRows, pRowSizesInBytes, pSrcData); + HeapFree(GetProcessHeap(), 0, pMem); + return Result; +} + +//------------------------------------------------------------------------------------------------ +// Stack-allocating UpdateSubresources implementation +template +inline UINT64 UpdateSubresources( + _In_ ID3D12GraphicsCommandList* pCmdList, + _In_ ID3D12Resource* pDestinationResource, + _In_ ID3D12Resource* pIntermediate, + UINT64 IntermediateOffset, + _In_range_(0, MaxSubresources) UINT FirstSubresource, + _In_range_(1, MaxSubresources - FirstSubresource) UINT NumSubresources, + _In_reads_(NumSubresources) D3D12_SUBRESOURCE_DATA* pSrcData) +{ + UINT64 RequiredSize = 0; + D3D12_PLACED_SUBRESOURCE_FOOTPRINT Layouts[MaxSubresources]; + UINT NumRows[MaxSubresources]; + UINT64 RowSizesInBytes[MaxSubresources]; + + D3D12_RESOURCE_DESC Desc = pDestinationResource->GetDesc(); + ID3D12Device* pDevice; + pDestinationResource->GetDevice(__uuidof(*pDevice), reinterpret_cast(&pDevice)); + pDevice->GetCopyableFootprints(&Desc, FirstSubresource, NumSubresources, IntermediateOffset, Layouts, NumRows, RowSizesInBytes, &RequiredSize); + pDevice->Release(); + + return UpdateSubresources(pCmdList, pDestinationResource, pIntermediate, FirstSubresource, NumSubresources, RequiredSize, Layouts, NumRows, RowSizesInBytes, pSrcData); +} + +//------------------------------------------------------------------------------------------------ +inline bool D3D12IsLayoutOpaque( D3D12_TEXTURE_LAYOUT Layout ) +{ return Layout == D3D12_TEXTURE_LAYOUT_UNKNOWN || Layout == D3D12_TEXTURE_LAYOUT_64KB_UNDEFINED_SWIZZLE; } + +//------------------------------------------------------------------------------------------------ +inline ID3D12CommandList * const * CommandListCast(ID3D12GraphicsCommandList * const * pp) +{ + // This cast is useful for passing strongly typed command list pointers into + // ExecuteCommandLists. + // This cast is valid as long as the const-ness is respected. D3D12 APIs do + // respect the const-ness of their arguments. + return reinterpret_cast(pp); +} + + +#endif // defined( __cplusplus ) + +#endif //__D3DX12_H__ + + + From 75c7ea203dfab9e33d3292b2e0357a5a14c59fe1 Mon Sep 17 00:00:00 2001 From: borg323 <39573933+borg323@users.noreply.github.com> Date: Wed, 29 Jan 2020 00:20:02 +0200 Subject: [PATCH 013/151] add check_dx batch file (#1062) * add check_dx batch file * fix network_check printing bug * disable pgo build for dx backend --- appveyor.yml | 19 ++++++++++++------- scripts/check_dx.bat | 5 +++++ src/neural/network_check.cc | 4 ++-- 3 files changed, 19 insertions(+), 9 deletions(-) create mode 100644 scripts/check_dx.bat diff --git a/appveyor.yml b/appveyor.yml index 9409083910..25206f9d99 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -63,19 +63,22 @@ before_build: - cmd: git submodule update --init --recursive - cmd: SET BUILD_BLAS=%BLAS% - cmd: IF %OPENCL%==true SET BUILD_BLAS=true +- cmd: IF %DX%==true SET BUILD_BLAS=true - cmd: meson build --backend vs2017 --buildtype release -Dgtest=%GTEST% -Dopencl=%OPENCL% -Dblas=%BUILD_BLAS% -Ddnnl=true -Deigen=true -Ddx=%DX% -Dcudnn=%CUDA% -Dispc_native_only=false -Dpopcnt=false -Dcudnn_include="%CUDA_PATH%\include","%PKG_FOLDER%\cuda\include" -Dcudnn_libdirs="%CUDA_PATH%\lib\x64","%PKG_FOLDER%\cuda\lib\x64" -Dprotobuf_include="%PKG_FOLDER%\protobuf\include" -Dprotobuf_libdir="%PKG_FOLDER%\protobuf\lib" -Dopenblas_include="%PKG_FOLDER%\OpenBLAS\dist64\include" -Dopenblas_libdirs="%PKG_FOLDER%\OpenBLAS\dist64\lib" -Ddnnl_dir="%PKG_FOLDER%\dnnl_win_1.1.1_cpu_vcomp" -Dopencl_include="%PKG_FOLDER%\opencl-nug.0.777.77\build\native\include" -Dopencl_libdirs="%PKG_FOLDER%\opencl-nug.0.777.77\build\native\lib\x64" -Ddefault_library=static build_script: -- cmd: IF %APPVEYOR_REPO_TAG%==false msbuild "C:\projects\lc0\build\lc0.sln" /m /p:WholeProgramOptimization=true /p:DebugInformationFormat=ProgramDatabase /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" -- cmd: IF %APPVEYOR_REPO_TAG%==true msbuild "C:\projects\lc0\build\lc0.sln" /m /p:WholeProgramOptimization=PGInstrument /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" +- cmd: SET PGO=false +- cmd: IF %APPVEYOR_REPO_TAG%==true IF %DX%==false SET PGO=true +- cmd: IF %PGO%==false msbuild "C:\projects\lc0\build\lc0.sln" /m /p:WholeProgramOptimization=true /p:DebugInformationFormat=ProgramDatabase /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" +- cmd: IF %PGO%==true msbuild "C:\projects\lc0\build\lc0.sln" /m /p:WholeProgramOptimization=PGInstrument /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" - cmd: cd build - cmd: IF %NAME%==cpu-openblas copy C:\cache\OpenBLAS\dist64\bin\libopenblas.dll - cmd: IF %NAME%==cpu-dnnl copy C:\cache\dnnl_win_1.1.1_cpu_vcomp\bin\dnnl.dll -- cmd: IF %APPVEYOR_REPO_TAG%==true IF %OPENCL%==true copy C:\cache\opencl-nug.0.777.77\build\native\bin\OpenCL.dll -- cmd: IF %APPVEYOR_REPO_TAG%==true IF %CUDA%==true copy "%CUDA_PATH%"\bin\*.dll -- cmd: IF %APPVEYOR_REPO_TAG%==true IF %CUDA%==true copy %PKG_FOLDER%\cuda\bin\cudnn64_7.dll -- cmd: IF %APPVEYOR_REPO_TAG%==true lc0 benchmark --weights=c:\cache\testnet --backend=random --movetime=10000 +- cmd: IF %PGO%==true IF %OPENCL%==true copy C:\cache\opencl-nug.0.777.77\build\native\bin\OpenCL.dll +- cmd: IF %PGO%==true IF %CUDA%==true copy "%CUDA_PATH%"\bin\*.dll +- cmd: IF %PGO%==true IF %CUDA%==true copy %PKG_FOLDER%\cuda\bin\cudnn64_7.dll +- cmd: IF %PGO%==true lc0 benchmark --weights=c:\cache\testnet --backend=random --movetime=10000 - cmd: cd .. -- cmd: IF %APPVEYOR_REPO_TAG%==true msbuild "C:\projects\lc0\build\lc0.sln" /m /p:WholeProgramOptimization=PGOptimize /p:DebugInformationFormat=ProgramDatabase /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" +- cmd: IF %PGO%==true msbuild "C:\projects\lc0\build\lc0.sln" /m /p:WholeProgramOptimization=PGOptimize /p:DebugInformationFormat=ProgramDatabase /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" after_build: - cmd: IF %APPVEYOR_REPO_TAG%==true 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip %APPVEYOR_BUILD_FOLDER%\build\lc0.exe - cmd: IF %APPVEYOR_REPO_TAG%==true appveyor DownloadFile "https://ci.appveyor.com/api/projects/LeelaChessZero/lczero-client/artifacts/client.exe?branch=release&pr=false&job=Environment%%3A%%20NAME%%3D.exe%%2C%%20GOOS%%3Dwindows" @@ -92,6 +95,8 @@ after_build: - cmd: IF %APPVEYOR_REPO_TAG%==true IF %NAME%==cpu-dnnl 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip .\dist\DNNL-LICENSE - cmd: IF %APPVEYOR_REPO_TAG%==true IF %OPENCL%==true type scripts\check_opencl.bat |more /P > dist\check_opencl.bat - cmd: IF %APPVEYOR_REPO_TAG%==true IF %OPENCL%==true 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip .\dist\check_opencl.bat +- cmd: IF %APPVEYOR_REPO_TAG%==true IF %DX%==true type scripts\check_dx.bat |more /P > dist\check_dx.bat +- cmd: IF %APPVEYOR_REPO_TAG%==true IF %DX%==true 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip .\dist\check_dx.bat - cmd: IF %APPVEYOR_REPO_TAG%==true IF %CUDA%==true copy "%CUDA_PATH%\EULA.txt" dist\CUDA.txt - cmd: IF %APPVEYOR_REPO_TAG%==true IF %CUDA%==true copy "%PKG_FOLDER%\cuda\NVIDIA_SLA_cuDNN_Support.txt" dist\CUDNN.txt - cmd: IF %APPVEYOR_REPO_TAG%==true IF %CUDA%==true type dist\README-cuda.txt |more /P > dist\README.txt diff --git a/scripts/check_dx.bat b/scripts/check_dx.bat new file mode 100644 index 0000000000..c594361bb9 --- /dev/null +++ b/scripts/check_dx.bat @@ -0,0 +1,5 @@ +@ECHO OFF +ECHO Sanity checking the dx12 driver. +lc0 benchmark --backend=check --backend-opts=mode=check,freq=1.0,atol=5e-1,dx,blas %* +PAUSE + diff --git a/src/neural/network_check.cc b/src/neural/network_check.cc index cf5c3c138c..3834cdf6b2 100644 --- a/src/neural/network_check.cc +++ b/src/neural/network_check.cc @@ -297,8 +297,8 @@ class CheckNetwork : public Network { case kCheckOnly: CERR << std::scientific << std::setprecision(1) << "Check mode: check only with relative tolerance " - << params_.absolute_tolerance << ", absolute tolerance " - << params_.relative_tolerance << "."; + << params_.relative_tolerance << ", absolute tolerance " + << params_.absolute_tolerance << "."; break; case kErrorDisplay: CERR << "Check mode: error display."; From 26378aff42c0f51ea77f03e82d918f83c097cbad Mon Sep 17 00:00:00 2001 From: Ankan Banerjee Date: Wed, 29 Jan 2020 20:05:26 +0530 Subject: [PATCH 014/151] Fix for hang in dx backend (#1063) * fix for fence wait issue - Need to wait until fence value is smaller than what we need. In some cases, when multiple threads are active, the fence can increment by 2 before the check on CPU happens which was causing a hang due to the incorrect == check. --- src/neural/dx/network_dx.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/neural/dx/network_dx.cc b/src/neural/dx/network_dx.cc index 45d61dcf4e..24aa3128fa 100644 --- a/src/neural/dx/network_dx.cc +++ b/src/neural/dx/network_dx.cc @@ -56,7 +56,7 @@ void DxContext::WaitForGpu(uint64_t fence_val) { // Wait for commands to finish on GPU. // (spinloop has lowest latency, we can try event based signal if CPU // overhead becomes a bottleneck). - while (fence_->GetCompletedValue() != fence_val) + while (fence_->GetCompletedValue() < fence_val) ; upload_scratch_mem_.offset = 0; } From 97637488856a2cfd248045d994b23a93e5b93b4d Mon Sep 17 00:00:00 2001 From: borg323 <39573933+borg323@users.noreply.github.com> Date: Sat, 1 Feb 2020 23:54:04 +0200 Subject: [PATCH 015/151] Some cuda installations use /usr/lib/cuda (#1040) --- meson_options.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/meson_options.txt b/meson_options.txt index 70991af912..ccd95d7939 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -40,7 +40,7 @@ option('opencl_libdirs', option('cudnn_libdirs', type: 'array', - value: ['/opt/cuda/lib64/', '/usr/local/cuda/lib64/'], + value: ['/opt/cuda/lib64/', '/usr/local/cuda/lib64/', '/usr/lib/cuda/lib64/'], description: 'Paths to Cuda/cudnn libraries') option('mkl_libdirs', @@ -60,7 +60,7 @@ option('dnnl_dir', option('cudnn_include', type: 'array', - value: ['/opt/cuda/include/', '/usr/local/cuda/include/'], + value: ['/opt/cuda/include/', '/usr/local/cuda/include/', '/usr/lib/cuda/include/'], description: 'Paths to cudnn include directory') option('build_backends', From b69b90af5a67048767085378d243a3453205900f Mon Sep 17 00:00:00 2001 From: borg323 <39573933+borg323@users.noreply.github.com> Date: Fri, 7 Feb 2020 23:24:47 +0200 Subject: [PATCH 016/151] add comment with the plan for NetworkCapabilities (#1058) --- src/neural/network.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/neural/network.h b/src/neural/network.h index 979319e801..3911592bb8 100644 --- a/src/neural/network.h +++ b/src/neural/network.h @@ -69,6 +69,18 @@ class NetworkComputation { virtual ~NetworkComputation() {} }; +// The plan: +// 1. Search must not look directly into any fields of NetworkFormat anymore. +// 2. Backends populate NetworkCapabilities that show search how to use NN, both +// for input and output. +// 3. Input part of NetworkCapabilities is just copy of InputFormat for now, and +// is likely to stay so (because search not knowing how to use NN is not very +// useful), but it's fine if it will change. +// 4. On the other hand, output part of NetworkCapabilities is set of +// independent parameters (like WDL, moves left head etc), because search can +// look what's set and act accordingly. Backends may derive it from +// OutputFormat field or other places. + struct NetworkCapabilities { pblczero::NetworkFormat::InputFormat input_format; // TODO expose information of whether GetDVal() is usable or always zero. From afbaa718b0cf372bd338237b0d6584834cbec433 Mon Sep 17 00:00:00 2001 From: Alexander Lyashuk Date: Wed, 12 Feb 2020 07:56:22 +0100 Subject: [PATCH 017/151] =?UTF-8?q?Fix=20g++=20and=20clang=20warnings.=20S?= =?UTF-8?q?witch=20to=20C++17.=20Remove=20utils/optio=E2=80=A6=20(#1059)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Fix g++ warnings. * More cleanup. --- meson.build | 2 +- src/chess/callbacks.h | 14 +++++----- src/chess/pgn.h | 15 +++++----- src/chess/uciloop.h | 18 ++++++------ src/engine.h | 7 +++-- src/mcts/search.cc | 2 +- src/mcts/search.h | 4 +-- src/mcts/stoppers/factory.cc | 8 ++++-- src/mcts/stoppers/stoppers.h | 3 +- src/neural/encoder.cc | 2 -- src/neural/loader.cc | 4 ++- src/selfplay/loop.cc | 14 ++++++---- src/selfplay/tournament.cc | 8 +++--- src/utils/optional.h | 53 ------------------------------------ 14 files changed, 56 insertions(+), 98 deletions(-) delete mode 100644 src/utils/optional.h diff --git a/meson.build b/meson.build index 1f49503744..3a258c6259 100644 --- a/meson.build +++ b/meson.build @@ -15,7 +15,7 @@ # along with Leela Chess. If not, see . project('lc0', 'cpp', - default_options : ['cpp_std=c++14', 'b_ndebug=if-release', 'warning_level=3'], + default_options : ['cpp_std=c++17', 'b_ndebug=if-release', 'warning_level=3'], meson_version: '>=0.46') cc = meson.get_compiler('cpp') diff --git a/src/chess/callbacks.h b/src/chess/callbacks.h index f442d83c4a..8d4a098de4 100644 --- a/src/chess/callbacks.h +++ b/src/chess/callbacks.h @@ -29,12 +29,12 @@ #include #include +#include #include #include #include "chess/bitboard.h" #include "chess/position.h" -#include "utils/optional.h" namespace lczero { @@ -50,7 +50,7 @@ struct BestMoveInfo { // Index of the game in the tournament (0-based). int game_id = -1; // The color of the player, if known. - optional is_black; + std::optional is_black; }; // Is sent during the search. @@ -68,14 +68,14 @@ struct ThinkingInfo { // Hash fullness * 1000 int hashfull = -1; // Win in centipawns. - optional score; + std::optional score; // Win/Draw/Lose probability * 1000. struct WDL { int w; int d; int l; }; - optional wdl; + std::optional wdl; // Number of successful TB probes (not the same as playouts ending in TB hit). int tb_hits = -1; // Best line found. Moves are from perspective of white player. @@ -91,7 +91,7 @@ struct ThinkingInfo { // Index of the game in the tournament (0-based). int game_id = -1; // The color of the player, if known. - optional is_black; + std::optional is_black; }; // Is sent when a single game is finished. @@ -107,11 +107,11 @@ struct GameInfo { // Index of the game in the tournament (0-based). int game_id = -1; // The color of the player1, if known. - optional is_black; + std::optional is_black; // Minimum resign threshold which would have resulted in a false positive // if resign had of been enabled. // Only provided if the game wasn't played with resign enabled. - optional min_false_positive_threshold; + std::optional min_false_positive_threshold; using Callback = std::function; }; diff --git a/src/chess/pgn.h b/src/chess/pgn.h index 61f6cb556f..b25f37d98a 100644 --- a/src/chess/pgn.h +++ b/src/chess/pgn.h @@ -31,6 +31,7 @@ #include "chess/bitboard.h" #include "chess/board.h" +#include "utils/logging.h" namespace lczero { @@ -48,8 +49,8 @@ class PgnReader { } // Handle braced comments. int cur_offset = 0; - while (in_comment && line.find('}', cur_offset) != std::string::npos || - !in_comment && line.find('{', cur_offset) != std::string::npos) { + while ((in_comment && line.find('}', cur_offset) != std::string::npos) || + (!in_comment && line.find('{', cur_offset) != std::string::npos)) { if (in_comment && line.find('}', cur_offset) != std::string::npos) { line = line.substr(0, cur_offset) + line.substr(line.find('}', cur_offset) + 1); @@ -77,7 +78,7 @@ class PgnReader { const auto idx = word.find('.'); if (idx != std::string::npos) { bool all_nums = true; - for (int i = 0; i < idx; i++) { + for (size_t i = 0; i < idx; i++) { if (word[i] < '0' || word[i] > '9') { all_nums = false; break; @@ -131,14 +132,14 @@ class PgnReader { default: // 0 and 1 are pawn and king, which are not legal promotions, other // numbers don't correspond to a known piece type. - std::cerr << "Unexpected promotion!!" << std::endl; + CERR << "Unexpected promotion!!"; throw Exception("Trying to create a move with illegal promotion."); } } Move SanToMove(const std::string& san, const ChessBoard& board) { int p = 0; - int idx = 0; + size_t idx = 0; if (san[0] == 'K') { p = 1; } else if (san[0] == 'Q') { @@ -233,14 +234,14 @@ class PgnReader { continue; } if (pc1 != -1) { - std::cerr << "Ambiguous!!" << std::endl; + CERR << "Ambiguous!!"; throw Exception("Opening book move seems ambiguous."); } pr1 = sq.row(); pc1 = sq.col(); } if (pc1 == -1) { - std::cerr << "No Match!!" << std::endl; + CERR << "No Match!!"; throw Exception("Opening book move seems illegal."); } r1 = pr1; diff --git a/src/chess/uciloop.h b/src/chess/uciloop.h index 4224dc7664..8d5d0e2010 100644 --- a/src/chess/uciloop.h +++ b/src/chess/uciloop.h @@ -28,23 +28,25 @@ #pragma once #include +#include #include #include #include + #include "chess/callbacks.h" #include "utils/exception.h" namespace lczero { struct GoParams { - optional wtime; - optional btime; - optional winc; - optional binc; - optional movestogo; - optional depth; - optional nodes; - optional movetime; + std::optional wtime; + std::optional btime; + std::optional winc; + std::optional binc; + std::optional movestogo; + std::optional depth; + std::optional nodes; + std::optional movetime; bool infinite = false; std::vector searchmoves; bool ponder = false; diff --git a/src/engine.h b/src/engine.h index cd64eae0c3..0c4537aef4 100644 --- a/src/engine.h +++ b/src/engine.h @@ -27,6 +27,8 @@ #pragma once +#include + #include "chess/uciloop.h" #include "mcts/search.h" #include "neural/cache.h" @@ -34,7 +36,6 @@ #include "neural/network.h" #include "syzygy/syzygy.h" #include "utils/mutex.h" -#include "utils/optional.h" #include "utils/optionsparser.h" namespace lczero { @@ -103,10 +104,10 @@ class EngineController { // The current position as given with SetPosition. For normal (ie. non-ponder) // search, the tree is set up with this position, however, during ponder we // actually search the position one move earlier. - optional current_position_; + std::optional current_position_; GoParams go_params_; - optional move_start_time_; + std::optional move_start_time_; }; class EngineLoop : public UciLoop { diff --git a/src/mcts/search.cc b/src/mcts/search.cc index 88bee0aa58..7dfeacdc99 100644 --- a/src/mcts/search.cc +++ b/src/mcts/search.cc @@ -255,7 +255,7 @@ std::vector Search::GetVerboseStats(Node* node, << edge.GetQ(fpu, logit_q) + edge.GetU(U_coeff) << ") "; oss << "(V: "; - optional v; + std::optional v; if (edge.IsTerminal()) { v = edge.node()->GetQ(); } else { diff --git a/src/mcts/search.h b/src/mcts/search.h index 08dcdb2144..5d0ca58fb9 100644 --- a/src/mcts/search.h +++ b/src/mcts/search.h @@ -28,6 +28,7 @@ #pragma once #include +#include #include #include @@ -41,7 +42,6 @@ #include "syzygy/syzygy.h" #include "utils/logging.h" #include "utils/mutex.h" -#include "utils/optional.h" namespace lczero { @@ -173,7 +173,7 @@ class Search { uint16_t max_depth_ GUARDED_BY(nodes_mutex_) = 0; // Cummulative depth of all paths taken in PickNodetoExtend. uint64_t cum_depth_ GUARDED_BY(nodes_mutex_) = 0; - optional nps_start_time_; + std::optional nps_start_time_; std::atomic tb_hits_{0}; std::unique_ptr uci_responder_; diff --git a/src/mcts/stoppers/factory.cc b/src/mcts/stoppers/factory.cc index 2a8c94d26a..da312e25c8 100644 --- a/src/mcts/stoppers/factory.cc +++ b/src/mcts/stoppers/factory.cc @@ -27,6 +27,8 @@ #include "mcts/stoppers/factory.h" +#include + #include "mcts/stoppers/stoppers.h" namespace lczero { @@ -223,12 +225,12 @@ std::unique_ptr LegacyTimeManager::CreateTimeManagementStopper( const OptionsDict& options, const GoParams& params, const Position& position) { const bool is_black = position.IsBlackToMove(); - const optional& time = (is_black ? params.btime : params.wtime); + const std::optional& time = (is_black ? params.btime : params.wtime); // If no time limit is given, don't stop on this condition. if (params.infinite || params.ponder || !time) return nullptr; const int64_t move_overhead = options.Get(kMoveOverheadId.GetId()); - const optional& inc = is_black ? params.binc : params.winc; + const std::optional& inc = is_black ? params.binc : params.winc; const int increment = inc ? std::max(int64_t(0), *inc) : 0; // How to scale moves time. @@ -299,7 +301,7 @@ std::unique_ptr LegacyTimeManager::GetStopper( result->AddStopper(CreateTimeManagementStopper(options, params, position)); // All the standard stoppers (go nodes, RAM limit, smart pruning, etc). PopulateStoppers(result.get(), options, params); - return std::move(result); + return result; } } // namespace lczero diff --git a/src/mcts/stoppers/stoppers.h b/src/mcts/stoppers/stoppers.h index ac574b6ba1..76c89704bb 100644 --- a/src/mcts/stoppers/stoppers.h +++ b/src/mcts/stoppers/stoppers.h @@ -27,6 +27,7 @@ #pragma once +#include #include #include "mcts/node.h" @@ -129,7 +130,7 @@ class SmartPruningStopper : public SearchStopper { private: const double smart_pruning_factor_; Mutex mutex_; - optional first_eval_time_ GUARDED_BY(mutex_); + std::optional first_eval_time_ GUARDED_BY(mutex_); }; } // namespace lczero \ No newline at end of file diff --git a/src/neural/encoder.cc b/src/neural/encoder.cc index 2a01aa5e84..293030a2b2 100644 --- a/src/neural/encoder.cc +++ b/src/neural/encoder.cc @@ -29,8 +29,6 @@ #include -#include "utils/optional.h" - namespace lczero { namespace { diff --git a/src/neural/loader.cc b/src/neural/loader.cc index 27f417374f..a41ab46e5a 100644 --- a/src/neural/loader.cc +++ b/src/neural/loader.cc @@ -87,7 +87,9 @@ WeightsFile ParseWeightsProto(const std::string& buffer) { ArrayInputStream raw_input_stream(buffer.data(), buffer.size()); CodedInputStream input_stream(&raw_input_stream); - // Set protobuf limit to 2GB, print warning at 500MB. + // Set protobuf limit to 2GB. + // Remove the second parameter when everyone uses newer protobufs. + // Until then, let everyone who uses new libprotobuf observe warnings. :sigh: input_stream.SetTotalBytesLimit(2000 * 1000000, 500 * 1000000); if (!net.ParseFromCodedStream(&input_stream)) diff --git a/src/selfplay/loop.cc b/src/selfplay/loop.cc index 0f507863f0..98f28b7fe8 100644 --- a/src/selfplay/loop.cc +++ b/src/selfplay/loop.cc @@ -26,6 +26,9 @@ */ #include "selfplay/loop.h" + +#include + #include "selfplay/tournament.h" #include "utils/configfile.h" @@ -136,8 +139,8 @@ void SelfPlayLoop::SendTournament(const TournamentInfo& info) { // Initialize variables. float percentage = -1; - optional elo; - optional los; + std::optional elo; + std::optional los; // Only caculate percentage if any games at all (avoid divide by 0). if ((winp1 + losep1 + draws) > 0) { @@ -163,18 +166,19 @@ void SelfPlayLoop::SendTournament(const TournamentInfo& info) { } if (elo) { oss << " Elo: " << std::fixed << std::setw(5) << std::setprecision(2) - << (elo.value_or(0.0f)); + << (*elo); } if (los) { oss << " LOS: " << std::fixed << std::setw(5) << std::setprecision(2) - << (los.value_or(0.0f) * 100.0f) << "%"; + << (*los * 100.0f) << "%"; } oss << " P1-W: +" << info.results[0][0] << " -" << info.results[2][0] << " =" << info.results[1][0]; oss << " P1-B: +" << info.results[0][1] << " -" << info.results[2][1] << " =" << info.results[1][1]; - oss << " npm " + std::to_string(static_cast(info.nodes_total_) / info.move_count_); + oss << " npm " + std::to_string(static_cast(info.nodes_total_) / + info.move_count_); oss << " nodes " + std::to_string(info.nodes_total_); oss << " moves " + std::to_string(info.move_count_); SendResponse(oss.str()); diff --git a/src/selfplay/tournament.cc b/src/selfplay/tournament.cc index a1a7b582f6..c08da5f82a 100644 --- a/src/selfplay/tournament.cc +++ b/src/selfplay/tournament.cc @@ -370,10 +370,10 @@ void SelfPlayTournament::Worker() { Mutex::Lock lock(mutex_); if (abort_) break; bool mirrored = player_options_[0].Get(kOpeningsMirroredId.GetId()); - if (kTotalGames >= 0 && games_count_ >= kTotalGames || - kTotalGames == -2 && !openings_.empty() && - games_count_ >= - static_cast(openings_.size()) * (mirrored ? 2 : 1)) + if ((kTotalGames >= 0 && games_count_ >= kTotalGames) || + (kTotalGames == -2 && !openings_.empty() && + games_count_ >= + static_cast(openings_.size()) * (mirrored ? 2 : 1))) break; game_id = games_count_++; } diff --git a/src/utils/optional.h b/src/utils/optional.h deleted file mode 100644 index c823ba45ff..0000000000 --- a/src/utils/optional.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - This file is part of Leela Chess Zero. - Copyright (C) 2018 The LCZero Authors - - Leela Chess is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Leela Chess is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with Leela Chess. If not, see . - - Additional permission under GNU GPL version 3 section 7 - - If you modify this Program, or any covered work, by linking or - combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA - Toolkit and the NVIDIA CUDA Deep Neural Network library (or a - modified version of those libraries), containing parts covered by the - terms of the respective license agreement, the licensors of this - Program grant you additional permission to convey the resulting work. -*/ - -#pragma once - -namespace lczero { - -// Very poor-man implementation of std::optional. It literally cannot do -// anything, but it's enough for our use case. -template -class optional { - public: - operator bool() const { return has_value_; } - constexpr const T& operator*() const& { return value_; } - constexpr const T* operator->() const& { return &value_; } - optional& operator=(const T& value) { - value_ = value; - has_value_ = true; - return *this; - } - void reset() { has_value_ = false; } - T value_or(const T& def) const { return has_value_ ? value_ : def; } - - private: - T value_; - bool has_value_ = false; -}; - -} // namespace lczero From dac1a70ab563377036631ab51bad8c303151c003 Mon Sep 17 00:00:00 2001 From: Tilps Date: Thu, 13 Feb 2020 22:57:53 +1100 Subject: [PATCH 018/151] Small forward looking fix for when we allow starting selfplay from fen that might be black to move. (#1047) --- src/selfplay/game.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/selfplay/game.cc b/src/selfplay/game.cc index 95c948595b..e23c985e3c 100644 --- a/src/selfplay/game.cc +++ b/src/selfplay/game.cc @@ -90,7 +90,7 @@ SelfPlayGame::SelfPlayGame(PlayerOptions player1, PlayerOptions player2, void SelfPlayGame::Play(int white_threads, int black_threads, bool training, bool enable_resign) { - bool blacks_move = (tree_[0]->GetPlyCount() % 2) == 1; + bool blacks_move = tree_[0]->IsBlackToMove(); // Do moves while not end of the game. (And while not abort_) while (!abort_) { From cf6bccc7ab9710e153bb33c22b5bc61c353c04d6 Mon Sep 17 00:00:00 2001 From: borg323 <39573933+borg323@users.noreply.github.com> Date: Thu, 13 Feb 2020 21:40:22 +0200 Subject: [PATCH 019/151] appveyor fix for training.lczero.org (#1073) --- appveyor.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/appveyor.yml b/appveyor.yml index 25206f9d99..3f1a976901 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -48,7 +48,7 @@ install: - cmd: IF NOT EXIST c:\cache\protobuf\ cmake -G "Visual Studio 15 2017 Win64" -Dprotobuf_BUILD_SHARED_LIBS=NO -Dprotobuf_BUILD_TESTS=OFF -DCMAKE_INSTALL_PREFIX=c:/cache/protobuf ../cmake - cmd: IF NOT EXIST c:\cache\protobuf\ msbuild INSTALL.vcxproj /p:Configuration=Release /p:Platform=x64 /m - cmd: set PATH=c:\cache\protobuf\bin;%PATH% -- cmd: IF NOT EXIST c:\cache\testnet appveyor DownloadFile http://lczero.org/get_network?sha=7170f639ba1cdc407283b8e52377283e36845b954788c6ada8897937637ef032 -Filename c:\cache\testnet +- cmd: IF NOT EXIST c:\cache\testnet appveyor DownloadFile http://training.lczero.org/get_network?sha=7170f639ba1cdc407283b8e52377283e36845b954788c6ada8897937637ef032 -Filename c:\cache\testnet - cmd: IF %GTEST%==true IF NOT EXIST C:\cache\syzygy mkdir C:\cache\syzygy - cmd: IF %GTEST%==true cd C:\cache\syzygy - cmd: IF %GTEST%==true IF NOT EXIST KQvK.rtbz curl --remote-name-all https://tablebase.lichess.ovh/tables/standard/3-4-5/K{P,N,R,B,Q}vK.rtb{w,z} From 59af9830ff343f15a3054aba0da00379ae63c48c Mon Sep 17 00:00:00 2001 From: Alexander Lyashuk Date: Thu, 13 Feb 2020 21:55:14 +0100 Subject: [PATCH 020/151] Hide cache usage by default. (#1074) --- src/mcts/params.cc | 8 +++++++- src/mcts/params.h | 3 +++ src/mcts/search.cc | 7 +++++-- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/src/mcts/params.cc b/src/mcts/params.cc index 347a314b27..e223416a03 100644 --- a/src/mcts/params.cc +++ b/src/mcts/params.cc @@ -189,6 +189,9 @@ const OptionId SearchParams::kHistoryFillId{ const OptionId SearchParams::kShortSightednessId{ "short-sightedness", "ShortSightedness", "Used to focus more on short term gains over long term."}; +const OptionId SearchParams::kDisplayCacheUsageId{ + "display-cache-usage", "DisplayCacheUsage", + "Display cache fullness through UCI info `hash` section."}; void SearchParams::Populate(OptionsParser* options) { // Here the uci optimized defaults" are set. @@ -232,10 +235,12 @@ void SearchParams::Populate(OptionsParser* options) { std::vector history_fill_opt{"no", "fen_only", "always"}; options->Add(kHistoryFillId, history_fill_opt) = "fen_only"; options->Add(kShortSightednessId, 0.0f, 1.0f) = 0.0f; + options->Add(kDisplayCacheUsageId) = false; options->HideOption(kNoiseEpsilonId); options->HideOption(kNoiseAlphaId); options->HideOption(kLogLiveStatsId); + options->HideOption(kDisplayCacheUsageId); } SearchParams::SearchParams(const OptionsDict& options) @@ -269,6 +274,7 @@ SearchParams::SearchParams(const OptionsDict& options) kHistoryFill( EncodeHistoryFill(options.Get(kHistoryFillId.GetId()))), kMiniBatchSize(options.Get(kMiniBatchSizeId.GetId())), - kShortSightedness(options.Get(kShortSightednessId.GetId())) {} + kShortSightedness(options.Get(kShortSightednessId.GetId())), + kDisplayCacheUsage(options.Get(kDisplayCacheUsageId.GetId())) {} } // namespace lczero diff --git a/src/mcts/params.h b/src/mcts/params.h index d23115d228..d4877769a6 100644 --- a/src/mcts/params.h +++ b/src/mcts/params.h @@ -99,6 +99,7 @@ class SearchParams { return options_.Get(kScoreTypeId.GetId()); } FillEmptyHistory GetHistoryFill() const { return kHistoryFill; } + bool GetDisplayCacheUsage() const { return kDisplayCacheUsage; } // Search parameter IDs. static const OptionId kMiniBatchSizeId; @@ -134,6 +135,7 @@ class SearchParams { static const OptionId kScoreTypeId; static const OptionId kHistoryFillId; static const OptionId kShortSightednessId; + static const OptionId kDisplayCacheUsageId; private: const OptionsDict& options_; @@ -163,6 +165,7 @@ class SearchParams { const FillEmptyHistory kHistoryFill; const int kMiniBatchSize; const float kShortSightedness; + const bool kDisplayCacheUsage; }; } // namespace lczero diff --git a/src/mcts/search.cc b/src/mcts/search.cc index 7dfeacdc99..3f02c63ca5 100644 --- a/src/mcts/search.cc +++ b/src/mcts/search.cc @@ -95,6 +95,7 @@ void Search::SendUciInfo() REQUIRES(nodes_mutex_) { const auto edges = GetBestChildrenNoTemperature(root_node_, max_pv); const auto score_type = params_.GetScoreType(); const auto per_pv_counters = params_.GetPerPvCounters(); + const auto display_cache_usage = params_.GetDisplayCacheUsage(); std::vector uci_infos; @@ -106,8 +107,10 @@ void Search::SendUciInfo() REQUIRES(nodes_mutex_) { if (!per_pv_counters) { common_info.nodes = total_playouts_ + initial_visits_; } - common_info.hashfull = - cache_->GetSize() * 1000LL / std::max(cache_->GetCapacity(), 1); + if (display_cache_usage) { + common_info.hashfull = + cache_->GetSize() * 1000LL / std::max(cache_->GetCapacity(), 1); + } if (nps_start_time_) { const auto time_since_first_batch_ms = std::chrono::duration_cast( From fa5b0cbd7faef87c9972ac08dfa0bde25aefc6bc Mon Sep 17 00:00:00 2001 From: Tilps Date: Fri, 14 Feb 2020 09:01:11 +1100 Subject: [PATCH 021/151] =?UTF-8?q?Add=20option=20to=20reduce=20concurrent?= =?UTF-8?q?=20search=20without=20limiting=20search=E2=80=A6=20(#1070)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add option to reduce concurrent search without limiting search worker count. * Fix for stupid windows atomics. * Address some review feedback. * Random update I'll undo to trigger rerun of appveyor * Undo last commit. --- src/mcts/params.cc | 9 ++++++++- src/mcts/params.h | 3 +++ src/mcts/search.cc | 29 ++++++++++++++++++++++++++++- src/mcts/search.h | 2 ++ 4 files changed, 41 insertions(+), 2 deletions(-) diff --git a/src/mcts/params.cc b/src/mcts/params.cc index e223416a03..b5facc5d3e 100644 --- a/src/mcts/params.cc +++ b/src/mcts/params.cc @@ -192,6 +192,10 @@ const OptionId SearchParams::kShortSightednessId{ const OptionId SearchParams::kDisplayCacheUsageId{ "display-cache-usage", "DisplayCacheUsage", "Display cache fullness through UCI info `hash` section."}; +const OptionId SearchParams::kMaxConcurrentSearchersId{ + "max-concurrent-searchers", "MaxConcurrentSearchers", + "If not 0, at most this many search workers can be gathering minibatches " + "at once."}; void SearchParams::Populate(OptionsParser* options) { // Here the uci optimized defaults" are set. @@ -236,6 +240,7 @@ void SearchParams::Populate(OptionsParser* options) { options->Add(kHistoryFillId, history_fill_opt) = "fen_only"; options->Add(kShortSightednessId, 0.0f, 1.0f) = 0.0f; options->Add(kDisplayCacheUsageId) = false; + options->Add(kMaxConcurrentSearchersId, 0, 128) = 0; options->HideOption(kNoiseEpsilonId); options->HideOption(kNoiseAlphaId); @@ -275,6 +280,8 @@ SearchParams::SearchParams(const OptionsDict& options) EncodeHistoryFill(options.Get(kHistoryFillId.GetId()))), kMiniBatchSize(options.Get(kMiniBatchSizeId.GetId())), kShortSightedness(options.Get(kShortSightednessId.GetId())), - kDisplayCacheUsage(options.Get(kDisplayCacheUsageId.GetId())) {} + kDisplayCacheUsage(options.Get(kDisplayCacheUsageId.GetId())), + kMaxConcurrentSearchers( + options.Get(kMaxConcurrentSearchersId.GetId())) {} } // namespace lczero diff --git a/src/mcts/params.h b/src/mcts/params.h index d4877769a6..df58f9279c 100644 --- a/src/mcts/params.h +++ b/src/mcts/params.h @@ -100,6 +100,7 @@ class SearchParams { } FillEmptyHistory GetHistoryFill() const { return kHistoryFill; } bool GetDisplayCacheUsage() const { return kDisplayCacheUsage; } + int GetMaxConcurrentSearchers() const { return kMaxConcurrentSearchers; } // Search parameter IDs. static const OptionId kMiniBatchSizeId; @@ -136,6 +137,7 @@ class SearchParams { static const OptionId kHistoryFillId; static const OptionId kShortSightednessId; static const OptionId kDisplayCacheUsageId; + static const OptionId kMaxConcurrentSearchersId; private: const OptionsDict& options_; @@ -166,6 +168,7 @@ class SearchParams { const int kMiniBatchSize; const float kShortSightedness; const bool kDisplayCacheUsage; + const int kMaxConcurrentSearchers; }; } // namespace lczero diff --git a/src/mcts/search.cc b/src/mcts/search.cc index 3f02c63ca5..466f9bca61 100644 --- a/src/mcts/search.cc +++ b/src/mcts/search.cc @@ -67,7 +67,12 @@ Search::Search(const NodeTree& tree, Network* network, start_time_(start_time), initial_visits_(root_node_->GetN()), uci_responder_(std::move(uci_responder)), - params_(options) {} + params_(options) { + if (params_.GetMaxConcurrentSearchers() != 0) { + pending_searchers_.store(params_.GetMaxConcurrentSearchers(), + std::memory_order_release); + } +} namespace { void ApplyDirichletNoise(Node* node, float eps, double alpha) { @@ -671,12 +676,34 @@ void SearchWorker::ExecuteOneIteration() { // 1. Initialize internal structures. InitializeIteration(search_->network_->NewComputation()); + if (params_.GetMaxConcurrentSearchers() != 0) { + while (true) { + // If search is stop, we've not gathered or done anything and we don't + // want to, so we can safely skip all below. + if (search_->stop_.load(std::memory_order_acquire)) return; + int available = + search_->pending_searchers_.load(std::memory_order_acquire); + if (available > 0 && + search_->pending_searchers_.compare_exchange_weak( + available, available - 1, std::memory_order_acq_rel)) { + break; + } + // This is a hard spin lock to reduce latency but at the expense of busy + // wait cpu usage. If search worker count is large, this is probably a bad + // idea. + } + } + // 2. Gather minibatch. GatherMinibatch(); // 3. Prefetch into cache. MaybePrefetchIntoCache(); + if (params_.GetMaxConcurrentSearchers() != 0) { + search_->pending_searchers_.fetch_add(1, std::memory_order_acq_rel); + } + // 4. Run NN computation. RunNNComputation(); diff --git a/src/mcts/search.h b/src/mcts/search.h index 5d0ca58fb9..616f025480 100644 --- a/src/mcts/search.h +++ b/src/mcts/search.h @@ -176,6 +176,8 @@ class Search { std::optional nps_start_time_; std::atomic tb_hits_{0}; + std::atomic pending_searchers_{0}; + std::unique_ptr uci_responder_; const SearchParams params_; From 40a7c9acc03bcd657e19db7faf158c9afae718d5 Mon Sep 17 00:00:00 2001 From: Alexander Lyashuk Date: Fri, 14 Feb 2020 19:49:46 +0100 Subject: [PATCH 022/151] Allow different cpuct at root. (#1075) * Allow different cpuct at root. * Comment out some FRC tests. * Throw exception if cpuct is negative. --- src/chess/board_test.cc | 3788 +++++++++++++++++++-------------------- src/mcts/params.cc | 13 +- src/mcts/params.h | 3 + src/mcts/search.cc | 13 +- 4 files changed, 1917 insertions(+), 1900 deletions(-) diff --git a/src/chess/board_test.cc b/src/chess/board_test.cc index 1b19091c2f..a0995e9456 100644 --- a/src/chess/board_test.cc +++ b/src/chess/board_test.cc @@ -183,1926 +183,1926 @@ const struct { const char* const fen; const uint32_t perft[6]; } kChess960Positions[] = { - {"bqnb1rkr/pp3ppp/3ppn2/2p5/5P2/P2P4/NPP1P1PP/BQ1BNRKR w HFhf - 2 9", - {21, 528, 12189, 326672, 8146062, 227689589}}, // 1 - {"2nnrbkr/p1qppppp/8/1ppb4/6PP/3PP3/PPP2P2/BQNNRBKR w HEhe - 1 9", - {21, 807, 18002, 667366, 16253601, 590751109}}, // 2 - {"b1q1rrkb/pppppppp/3nn3/8/P7/1PPP4/4PPPP/BQNNRKRB w GE - 1 9", - {20, 479, 10471, 273318, 6417013, 177654692}}, // 3 - {"qbbnnrkr/2pp2pp/p7/1p2pp2/8/P3PP2/1PPP1KPP/QBBNNR1R w hf - 0 9", - {22, 593, 13440, 382958, 9183776, 274103539}}, // 4 - {"1nbbnrkr/p1p1ppp1/3p4/1p3P1p/3Pq2P/8/PPP1P1P1/QNBBNRKR w HFhf - 0 9", - {28, 1120, 31058, 1171749, 34030312, 1250970898}}, // 5 - {"qnbnr1kr/ppp1b1pp/4p3/3p1p2/8/2NPP3/PPP1BPPP/QNB1R1KR w HEhe - 1 9", - {29, 899, 26578, 824055, 24851983, 775718317}}, // 6 - {"q1bnrkr1/ppppp2p/2n2p2/4b1p1/2NP4/8/PPP1PPPP/QNB1RRKB w ge - 1 9", - {30, 860, 24566, 732757, 21093346, 649209803}}, // 7 - {"qbn1brkr/ppp1p1p1/2n4p/3p1p2/P7/6PP/QPPPPP2/1BNNBRKR w HFhf - 0 9", - {25, 635, 17054, 465806, 13203304, 377184252}}, // 8 - {"qnnbbrkr/1p2ppp1/2pp3p/p7/1P5P/2NP4/P1P1PPP1/Q1NBBRKR w HFhf - 0 9", - {24, 572, 15243, 384260, 11110203, 293989890}}, // 9 - {"qn1rbbkr/ppp2p1p/1n1pp1p1/8/3P4/P6P/1PP1PPPK/QNNRBB1R w hd - 2 9", - {28, 811, 23175, 679699, 19836606, 594527992}}, // 10 - {"qnr1bkrb/pppp2pp/3np3/5p2/8/P2P2P1/NPP1PP1P/QN1RBKRB w GDg - 3 9", - {33, 823, 26895, 713420, 23114629, 646390782}}, // 11 - {"qb1nrkbr/1pppp1p1/1n3p2/p1B4p/8/3P1P1P/PPP1P1P1/QBNNRK1R w HEhe - 0 9", - {31, 855, 25620, 735703, 21796206, 651054626}}, // 12 - {"qnnbrk1r/1p1ppbpp/2p5/p4p2/2NP3P/8/PPP1PPP1/Q1NBRKBR w HEhe - 0 9", - {26, 790, 21238, 642367, 17819770, 544866674}}, // 13 - {"1qnrkbbr/1pppppp1/p1n4p/8/P7/1P1N1P2/2PPP1PP/QN1RKBBR w HDhd - 0 9", - {37, 883, 32187, 815535, 29370838, 783201510}}, // 14 + // {"bqnb1rkr/pp3ppp/3ppn2/2p5/5P2/P2P4/NPP1P1PP/BQ1BNRKR w HFhf - 2 9", + // {21, 528, 12189, 326672, 8146062, 227689589}}, // 1 + // {"2nnrbkr/p1qppppp/8/1ppb4/6PP/3PP3/PPP2P2/BQNNRBKR w HEhe - 1 9", {21, + // 807, 18002, 667366, 16253601, 590751109}}, // 2 + // {"b1q1rrkb/pppppppp/3nn3/8/P7/1PPP4/4PPPP/BQNNRKRB w GE - 1 9", {20, 479, + // 10471, 273318, 6417013, 177654692}}, // 3 + // {"qbbnnrkr/2pp2pp/p7/1p2pp2/8/P3PP2/1PPP1KPP/QBBNNR1R w hf - 0 9", {22, + // 593, 13440, 382958, 9183776, 274103539}}, // 4 + // {"1nbbnrkr/p1p1ppp1/3p4/1p3P1p/3Pq2P/8/PPP1P1P1/QNBBNRKR w HFhf - 0 9", + // {28, 1120, 31058, 1171749, 34030312, 1250970898}}, // 5 + // {"qnbnr1kr/ppp1b1pp/4p3/3p1p2/8/2NPP3/PPP1BPPP/QNB1R1KR w HEhe - 1 9", + // {29, 899, 26578, 824055, 24851983, 775718317}}, // 6 + // {"q1bnrkr1/ppppp2p/2n2p2/4b1p1/2NP4/8/PPP1PPPP/QNB1RRKB w ge - 1 9", {30, + // 860, 24566, 732757, 21093346, 649209803}}, // 7 + // {"qbn1brkr/ppp1p1p1/2n4p/3p1p2/P7/6PP/QPPPPP2/1BNNBRKR w HFhf - 0 9", + // {25, 635, 17054, 465806, 13203304, 377184252}}, // 8 + // {"qnnbbrkr/1p2ppp1/2pp3p/p7/1P5P/2NP4/P1P1PPP1/Q1NBBRKR w HFhf - 0 9", + // {24, 572, 15243, 384260, 11110203, 293989890}}, // 9 + // {"qn1rbbkr/ppp2p1p/1n1pp1p1/8/3P4/P6P/1PP1PPPK/QNNRBB1R w hd - 2 9", {28, + // 811, 23175, 679699, 19836606, 594527992}}, // 10 + // {"qnr1bkrb/pppp2pp/3np3/5p2/8/P2P2P1/NPP1PP1P/QN1RBKRB w GDg - 3 9", {33, + // 823, 26895, 713420, 23114629, 646390782}}, // 11 + // {"qb1nrkbr/1pppp1p1/1n3p2/p1B4p/8/3P1P1P/PPP1P1P1/QBNNRK1R w HEhe - 0 9", + // {31, 855, 25620, 735703, 21796206, 651054626}}, // 12 + // {"qnnbrk1r/1p1ppbpp/2p5/p4p2/2NP3P/8/PPP1PPP1/Q1NBRKBR w HEhe - 0 9", + // {26, 790, 21238, 642367, 17819770, 544866674}}, // 13 + // {"1qnrkbbr/1pppppp1/p1n4p/8/P7/1P1N1P2/2PPP1PP/QN1RKBBR w HDhd - 0 9", + // {37, 883, 32187, 815535, 29370838, 783201510}}, // 14 {"qn1rkrbb/pp1p1ppp/2p1p3/3n4/4P2P/2NP4/PPP2PP1/Q1NRKRBB w FDfd - 1 9", {24, 585, 14769, 356950, 9482310, 233468620}}, // 15 - {"bb1qnrkr/pp1p1pp1/1np1p3/4N2p/8/1P4P1/P1PPPP1P/BBNQ1RKR w HFhf - 0 9", - {29, 864, 25747, 799727, 24219627, 776836316}}, // 16 - {"bnqbnr1r/p1p1ppkp/3p4/1p4p1/P7/3NP2P/1PPP1PP1/BNQB1RKR w HF - 0 9", - {26, 889, 24353, 832956, 23701014, 809194268}}, // 17 - {"bnqnrbkr/1pp2pp1/p7/3pP2p/4P1P1/8/PPPP3P/BNQNRBKR w HEhe d6 0 9", - {31, 984, 28677, 962591, 29032175, 1008880643}}, // 18 - {"b1qnrrkb/ppp1pp1p/n2p1Pp1/8/8/P7/1PPPP1PP/BNQNRKRB w GE - 0 9", - {20, 484, 10532, 281606, 6718715, 193594729}}, // 19 - {"n1bqnrkr/pp1ppp1p/2p5/6p1/2P2b2/PN6/1PNPPPPP/1BBQ1RKR w HFhf - 2 9", - {23, 732, 17746, 558191, 14481581, 457140569}}, // 20 - {"n1bb1rkr/qpnppppp/2p5/p7/P1P5/5P2/1P1PPRPP/NQBBN1KR w Hhf - 1 9", - {27, 697, 18724, 505089, 14226907, 400942568}}, // 21 - {"nqb1rbkr/pppppp1p/4n3/6p1/4P3/1NP4P/PP1P1PP1/1QBNRBKR w HEhe - 1 9", - {28, 641, 18811, 456916, 13780398, 354122358}}, // 22 - {"n1bnrrkb/pp1pp2p/2p2p2/6p1/5B2/3P4/PPP1PPPP/NQ1NRKRB w GE - 2 9", - {28, 606, 16883, 381646, 10815324, 254026570}}, // 23 - {"nbqnbrkr/2ppp1p1/pp3p1p/8/4N2P/1N6/PPPPPPP1/1BQ1BRKR w HFhf - 0 9", - {26, 626, 17268, 437525, 12719546, 339132046}}, // 24 - {"nq1bbrkr/pp2nppp/2pp4/4p3/1PP1P3/1B6/P2P1PPP/NQN1BRKR w HFhf - 2 9", - {21, 504, 11812, 302230, 7697880, 207028745}}, // 25 - {"nqnrb1kr/2pp1ppp/1p1bp3/p1B5/5P2/3N4/PPPPP1PP/NQ1R1BKR w HDhd - 0 9", - {30, 672, 19307, 465317, 13454573, 345445468}}, // 26 - {"nqn2krb/p1prpppp/1pbp4/7P/5P2/8/PPPPPKP1/NQNRB1RB w g - 3 9", - {21, 461, 10608, 248069, 6194124, 152861936}}, // 27 - {"nb1n1kbr/ppp1rppp/3pq3/P3p3/8/4P3/1PPPRPPP/NBQN1KBR w Hh - 1 9", - {19, 566, 11786, 358337, 8047916, 249171636}}, // 28 - {"nqnbrkbr/1ppppp1p/p7/6p1/6P1/P6P/1PPPPP2/NQNBRKBR w HEhe - 1 9", - {20, 382, 8694, 187263, 4708975, 112278808}}, // 29 - {"nq1rkb1r/pp1pp1pp/1n2bp1B/2p5/8/5P1P/PPPPP1P1/NQNRKB1R w HDhd - 2 9", - {24, 809, 20090, 673811, 17647882, 593457788}}, // 30 - {"nqnrkrb1/pppppp2/7p/4b1p1/8/PN1NP3/1PPP1PPP/1Q1RKRBB w FDfd - 1 9", - {26, 683, 18102, 473911, 13055173, 352398011}}, // 31 - {"bb1nqrkr/1pp1ppp1/pn5p/3p4/8/P2NNP2/1PPPP1PP/BB2QRKR w HFhf - 0 9", - {29, 695, 21193, 552634, 17454857, 483785639}}, // 32 - {"bnn1qrkr/pp1ppp1p/2p5/b3Q1p1/8/5P1P/PPPPP1P1/BNNB1RKR w HFhf - 2 9", - {44, 920, 35830, 795317, 29742670, 702867204}}, // 33 - {"bnnqrbkr/pp1p2p1/2p1p2p/5p2/1P5P/1R6/P1PPPPP1/BNNQRBK1 w Ehe - 0 9", - {33, 1022, 32724, 1024721, 32898113, 1047360456}}, // 34 - {"b1nqrkrb/2pppppp/p7/1P6/1n6/P4P2/1P1PP1PP/BNNQRKRB w GEge - 0 9", - {23, 638, 15744, 446539, 11735969, 344211589}}, // 35 - {"n1bnqrkr/3ppppp/1p6/pNp1b3/2P3P1/8/PP1PPP1P/NBB1QRKR w HFhf - 1 9", - {29, 728, 20768, 532084, 15621236, 415766465}}, // 36 - {"n2bqrkr/p1p1pppp/1pn5/3p1b2/P6P/1NP5/1P1PPPP1/1NBBQRKR w HFhf - 3 9", - {20, 533, 12152, 325059, 8088751, 223068417}}, // 37 - {"nnbqrbkr/1pp1p1p1/p2p4/5p1p/2P1P3/N7/PPQP1PPP/N1B1RBKR w HEhe - 0 9", - {27, 619, 18098, 444421, 13755384, 357222394}}, // 38 - {"nnbqrkr1/pp1pp2p/2p2b2/5pp1/1P5P/4P1P1/P1PP1P2/NNBQRKRB w GEge - 1 9", - {32, 1046, 33721, 1111186, 36218182, 1202830851}}, // 39 - {"nb1qbrkr/p1pppp2/1p1n2pp/8/1P6/2PN3P/P2PPPP1/NB1QBRKR w HFhf - 0 9", - {25, 521, 14021, 306427, 8697700, 201455191}}, // 40 - {"nnq1brkr/pp1pppp1/8/2p4P/8/5K2/PPPbPP1P/NNQBBR1R w hf - 0 9", - {23, 724, 18263, 571072, 15338230, 484638597}}, // 41 - {"nnqrbb1r/pppppk2/5pp1/7p/1P6/3P2PP/P1P1PP2/NNQRBBKR w HD - 0 9", - {30, 717, 21945, 547145, 17166700, 450069742}}, // 42 - {"nnqr1krb/p1p1pppp/2bp4/8/1p1P4/4P3/PPP2PPP/NNQRBKRB w GDgd - 0 9", - {25, 873, 20796, 728628, 18162741, 641708630}}, // 43 - {"nbnqrkbr/p2ppp2/1p4p1/2p4p/3P3P/3N4/PPP1PPPR/NB1QRKB1 w Ehe - 0 9", - {24, 589, 15190, 382317, 10630667, 279474189}}, // 44 - {"n1qbrkbr/p1ppp2p/2n2pp1/1p6/1P6/2P3P1/P2PPP1P/NNQBRKBR w HEhe - 0 9", - {22, 592, 14269, 401976, 10356818, 301583306}}, // 45 - {"2qrkbbr/ppn1pppp/n1p5/3p4/5P2/P1PP4/1P2P1PP/NNQRKBBR w HDhd - 1 9", - {27, 750, 20584, 605458, 16819085, 516796736}}, // 46 - {"1nqr1rbb/pppkp1pp/1n3p2/3p4/1P6/5P1P/P1PPPKP1/NNQR1RBB w - - 1 9", - {24, 623, 15921, 429446, 11594634, 322745925}}, // 47 - {"bbn1rqkr/pp1pp2p/4npp1/2p5/1P6/2BPP3/P1P2PPP/1BNNRQKR w HEhe - 0 9", - {23, 730, 17743, 565340, 14496370, 468608864}}, // 48 - {"bn1brqkr/pppp2p1/3npp2/7p/PPP5/8/3PPPPP/BNNBRQKR w HEhe - 0 9", - {25, 673, 17835, 513696, 14284338, 434008567}}, // 49 - {"bn1rqbkr/ppp1ppp1/1n6/2p4p/7P/3P4/PPP1PPP1/BN1RQBKR w HDhd - 0 9", - {25, 776, 20562, 660217, 18486027, 616653869}}, // 50 - {"bnnr1krb/ppp2ppp/3p4/3Bp3/q1P3PP/8/PP1PPP2/BNNRQKR1 w GDgd - 0 9", - {29, 1040, 30772, 1053113, 31801525, 1075147725}}, // 51 - {"1bbnrqkr/pp1ppppp/8/2p5/n7/3PNPP1/PPP1P2P/NBB1RQKR w HEhe - 1 9", - {24, 598, 15673, 409766, 11394778, 310589129}}, // 52 - {"nnbbrqkr/p2ppp1p/1pp5/8/6p1/N1P5/PPBPPPPP/N1B1RQKR w HEhe - 0 9", - {26, 530, 14031, 326312, 8846766, 229270702}}, // 53 - {"nnbrqbkr/2p1p1pp/p4p2/1p1p4/8/NP6/P1PPPPPP/N1BRQBKR w HDhd - 0 9", - {17, 496, 10220, 303310, 7103549, 217108001}}, // 54 - {"nnbrqk1b/pp2pprp/2pp2p1/8/3PP1P1/8/PPP2P1P/NNBRQRKB w d - 1 9", - {33, 820, 27856, 706784, 24714401, 645835197}}, // 55 - {"1bnrbqkr/ppnpp1p1/2p2p1p/8/1P6/4PPP1/P1PP3P/NBNRBQKR w HDhd - 0 9", - {27, 705, 19760, 548680, 15964771, 464662032}}, // 56 - {"n1rbbqkr/pp1pppp1/7p/P1p5/1n6/2PP4/1P2PPPP/NNRBBQKR w HChc - 0 9", - {22, 631, 14978, 431801, 10911545, 320838556}}, // 57 - {"n1rqb1kr/p1pppp1p/1pn4b/3P2p1/P7/1P6/2P1PPPP/NNRQBBKR w HChc - 0 9", - {24, 477, 12506, 263189, 7419372, 165945904}}, // 58 - {"nnrqbkrb/pppp1pp1/7p/4p3/6P1/2N2B2/PPPPPP1P/NR1QBKR1 w Ggc - 2 9", - {29, 658, 19364, 476620, 14233587, 373744834}}, // 59 - {"n1nrqkbr/ppb2ppp/3pp3/2p5/2P3P1/5P2/PP1PPB1P/NBNRQK1R w HDhd - 1 9", - {32, 801, 25861, 681428, 22318948, 619857455}}, // 60 - {"2rbqkbr/p1pppppp/1nn5/1p6/7P/P4P2/1PPPP1PB/NNRBQK1R w HChc - 2 9", - {27, 647, 18030, 458057, 13189156, 354689323}}, // 61 - {"nn1qkbbr/pp2ppp1/2rp4/2p4p/P2P4/1N5P/1PP1PPP1/1NRQKBBR w HCh - 1 9", - {24, 738, 18916, 586009, 16420659, 519075930}}, // 62 - {"nnrqk1bb/p1ppp2p/5rp1/1p3p2/1P4P1/5P1P/P1PPP3/NNRQKRBB w FCc - 1 9", - {25, 795, 20510, 648945, 17342527, 556144017}}, // 63 - {"bb1nrkqr/ppppn2p/4ppp1/8/1P4P1/4P3/P1PPKP1P/BBNNR1QR w he - 0 9", - {29, 664, 20024, 498376, 15373803, 406016364}}, // 64 - {"bnnbrkqr/1p1ppp2/8/p1p3pp/1P6/N4P2/PBPPP1PP/2NBRKQR w HEhe - 0 9", - {31, 770, 24850, 677212, 22562080, 662029574}}, // 65 - {"1nnrkbqr/p1pp1ppp/4p3/1p6/1Pb1P3/6PB/P1PP1P1P/BNNRK1QR w HDhd - 0 9", - {27, 776, 22133, 641002, 19153245, 562738257}}, // 66 - {"bnr1kqrb/pppp1pp1/1n5p/4p3/P3P3/3P2P1/1PP2P1P/BNNRKQRB w GDg - 0 9", - {26, 624, 16411, 435426, 11906515, 338092952}}, // 67 - {"nbbnrkqr/p1ppp1pp/1p3p2/8/2P5/4P3/PP1P1PPP/NBBNRKQR w HEhe - 1 9", - {25, 624, 15561, 419635, 10817378, 311138112}}, // 68 - {"nn1brkqr/pp1bpppp/8/2pp4/P4P2/1PN5/2PPP1PP/N1BBRKQR w HEhe - 1 9", - {23, 659, 16958, 476567, 13242252, 373557073}}, // 69 - {"n1brkbqr/ppp1pp1p/6pB/3p4/2Pn4/8/PP2PPPP/NN1RKBQR w HDhd - 0 9", - {32, 1026, 30360, 978278, 29436320, 957904151}}, // 70 - {"nnbrkqrb/p2ppp2/Q5pp/1pp5/4PP2/2N5/PPPP2PP/N1BRK1RB w GDgd - 0 9", - {36, 843, 29017, 715537, 24321197, 630396940}}, // 71 - {"nbnrbk1r/pppppppq/8/7p/8/1N2QPP1/PPPPP2P/NB1RBK1R w HDhd - 2 9", - {36, 973, 35403, 1018054, 37143354, 1124883780}}, // 72 - {"nnrbbkqr/2pppp1p/p7/6p1/1p2P3/4QPP1/PPPP3P/NNRBBK1R w HChc - 0 9", - {36, 649, 22524, 489526, 16836636, 416139320}}, // 73 - {"nnrkbbqr/1p2pppp/p2p4/2p5/8/1N2P1P1/PPPP1P1P/1NKRBBQR w hc - 0 9", - {26, 672, 18136, 477801, 13342771, 363074681}}, // 74 - {"n1rkbqrb/pp1ppp2/2n3p1/2p4p/P5PP/1P6/2PPPP2/NNRKBQRB w GCgc - 0 9", - {24, 804, 20712, 684001, 18761475, 617932151}}, // 75 - {"nbkr1qbr/1pp1pppp/pn1p4/8/3P2P1/5R2/PPP1PP1P/NBN1KQBR w H - 2 9", - {30, 627, 18669, 423329, 12815016, 312798696}}, // 76 - {"nnr1kqbr/pp1pp1p1/2p5/b4p1p/P7/1PNP4/2P1PPPP/N1RBKQBR w HChc - 1 9", - {12, 421, 6530, 227044, 4266410, 149176979}}, // 77 - {"n1rkqbbr/p1pp1pp1/np2p2p/8/8/N4PP1/PPPPP1BP/N1RKQ1BR w HChc - 0 9", - {27, 670, 19119, 494690, 14708490, 397268628}}, // 78 - {"nnr1qrbb/p2kpppp/1p1p4/2p5/6P1/PP1P4/2P1PP1P/NNRKQRBB w FC - 0 9", - {27, 604, 17043, 409665, 11993332, 308518181}}, // 79 - {"bbnnrkrq/ppp1pp2/6p1/3p4/7p/7P/PPPPPPP1/BBNNRRKQ w ge - 0 9", - {20, 559, 12242, 355326, 8427161, 252274233}}, // 80 - {"bnnbrkr1/ppp2p1p/5q2/3pp1p1/4P3/1N4P1/PPPPRP1P/BN1B1KRQ w Gge - 0 9", - {26, 1036, 27228, 1028084, 28286576, 1042120495}}, // 81 - {"bn1rkbrq/1pppppp1/p6p/1n6/3P4/6PP/PPPRPP2/BNN1KBRQ w Ggd - 2 9", - {29, 633, 19278, 455476, 14333034, 361900466}}, // 82 - {"b1nrkrqb/1p1npppp/p2p4/2p5/5P2/4P2P/PPPP1RP1/BNNRK1QB w Dfd - 1 9", - {25, 475, 12603, 270909, 7545536, 179579818}}, // 83 - {"1bbnrkrq/ppppppp1/8/7p/1n4P1/1PN5/P1PPPP1P/NBBR1KRQ w Gge - 0 9", - {30, 803, 25473, 709716, 23443854, 686365049}}, // 84 - {"nnbbrkrq/2pp1pp1/1p5p/pP2p3/7P/N7/P1PPPPP1/N1BBRKRQ w GEge - 0 9", - {18, 432, 9638, 242350, 6131124, 160393505}}, // 85 - {"nnbrkbrq/1pppp1p1/p7/7p/1P2Pp2/BN6/P1PP1PPP/1N1RKBRQ w GDgd - 0 9", - {27, 482, 13441, 282259, 8084701, 193484216}}, // 86 - {"n1brkrqb/pppp3p/n3pp2/6p1/3P1P2/N1P5/PP2P1PP/N1BRKRQB w FDfd - 0 9", - {28, 642, 19005, 471729, 14529434, 384837696}}, // 87 + // {"bb1qnrkr/pp1p1pp1/1np1p3/4N2p/8/1P4P1/P1PPPP1P/BBNQ1RKR w HFhf - 0 9", + // {29, 864, 25747, 799727, 24219627, 776836316}}, // 16 + // {"bnqbnr1r/p1p1ppkp/3p4/1p4p1/P7/3NP2P/1PPP1PP1/BNQB1RKR w HF - 0 9", + // {26, 889, 24353, 832956, 23701014, 809194268}}, // 17 + // {"bnqnrbkr/1pp2pp1/p7/3pP2p/4P1P1/8/PPPP3P/BNQNRBKR w HEhe d6 0 9", {31, + // 984, 28677, 962591, 29032175, 1008880643}}, // 18 + // {"b1qnrrkb/ppp1pp1p/n2p1Pp1/8/8/P7/1PPPP1PP/BNQNRKRB w GE - 0 9", {20, + // 484, 10532, 281606, 6718715, 193594729}}, // 19 + // {"n1bqnrkr/pp1ppp1p/2p5/6p1/2P2b2/PN6/1PNPPPPP/1BBQ1RKR w HFhf - 2 9", + // {23, 732, 17746, 558191, 14481581, 457140569}}, // 20 + // {"n1bb1rkr/qpnppppp/2p5/p7/P1P5/5P2/1P1PPRPP/NQBBN1KR w Hhf - 1 9", {27, + // 697, 18724, 505089, 14226907, 400942568}}, // 21 + // {"nqb1rbkr/pppppp1p/4n3/6p1/4P3/1NP4P/PP1P1PP1/1QBNRBKR w HEhe - 1 9", + // {28, 641, 18811, 456916, 13780398, 354122358}}, // 22 + // {"n1bnrrkb/pp1pp2p/2p2p2/6p1/5B2/3P4/PPP1PPPP/NQ1NRKRB w GE - 2 9", {28, + // 606, 16883, 381646, 10815324, 254026570}}, // 23 + // {"nbqnbrkr/2ppp1p1/pp3p1p/8/4N2P/1N6/PPPPPPP1/1BQ1BRKR w HFhf - 0 9", + // {26, 626, 17268, 437525, 12719546, 339132046}}, // 24 + // {"nq1bbrkr/pp2nppp/2pp4/4p3/1PP1P3/1B6/P2P1PPP/NQN1BRKR w HFhf - 2 9", + // {21, 504, 11812, 302230, 7697880, 207028745}}, // 25 + // {"nqnrb1kr/2pp1ppp/1p1bp3/p1B5/5P2/3N4/PPPPP1PP/NQ1R1BKR w HDhd - 0 9", + // {30, 672, 19307, 465317, 13454573, 345445468}}, // 26 + // {"nqn2krb/p1prpppp/1pbp4/7P/5P2/8/PPPPPKP1/NQNRB1RB w g - 3 9", {21, 461, + // 10608, 248069, 6194124, 152861936}}, // 27 + // {"nb1n1kbr/ppp1rppp/3pq3/P3p3/8/4P3/1PPPRPPP/NBQN1KBR w Hh - 1 9", {19, + // 566, 11786, 358337, 8047916, 249171636}}, // 28 + // {"nqnbrkbr/1ppppp1p/p7/6p1/6P1/P6P/1PPPPP2/NQNBRKBR w HEhe - 1 9", {20, + // 382, 8694, 187263, 4708975, 112278808}}, // 29 + // {"nq1rkb1r/pp1pp1pp/1n2bp1B/2p5/8/5P1P/PPPPP1P1/NQNRKB1R w HDhd - 2 9", + // {24, 809, 20090, 673811, 17647882, 593457788}}, // 30 + // {"nqnrkrb1/pppppp2/7p/4b1p1/8/PN1NP3/1PPP1PPP/1Q1RKRBB w FDfd - 1 9", + // {26, 683, 18102, 473911, 13055173, 352398011}}, // 31 + // {"bb1nqrkr/1pp1ppp1/pn5p/3p4/8/P2NNP2/1PPPP1PP/BB2QRKR w HFhf - 0 9", + // {29, 695, 21193, 552634, 17454857, 483785639}}, // 32 + // {"bnn1qrkr/pp1ppp1p/2p5/b3Q1p1/8/5P1P/PPPPP1P1/BNNB1RKR w HFhf - 2 9", + // {44, 920, 35830, 795317, 29742670, 702867204}}, // 33 + // {"bnnqrbkr/pp1p2p1/2p1p2p/5p2/1P5P/1R6/P1PPPPP1/BNNQRBK1 w Ehe - 0 9", + // {33, 1022, 32724, 1024721, 32898113, 1047360456}}, // 34 + // {"b1nqrkrb/2pppppp/p7/1P6/1n6/P4P2/1P1PP1PP/BNNQRKRB w GEge - 0 9", {23, + // 638, 15744, 446539, 11735969, 344211589}}, // 35 + // {"n1bnqrkr/3ppppp/1p6/pNp1b3/2P3P1/8/PP1PPP1P/NBB1QRKR w HFhf - 1 9", + // {29, 728, 20768, 532084, 15621236, 415766465}}, // 36 + // {"n2bqrkr/p1p1pppp/1pn5/3p1b2/P6P/1NP5/1P1PPPP1/1NBBQRKR w HFhf - 3 9", + // {20, 533, 12152, 325059, 8088751, 223068417}}, // 37 + // {"nnbqrbkr/1pp1p1p1/p2p4/5p1p/2P1P3/N7/PPQP1PPP/N1B1RBKR w HEhe - 0 9", + // {27, 619, 18098, 444421, 13755384, 357222394}}, // 38 + // {"nnbqrkr1/pp1pp2p/2p2b2/5pp1/1P5P/4P1P1/P1PP1P2/NNBQRKRB w GEge - 1 9", + // {32, 1046, 33721, 1111186, 36218182, 1202830851}}, // 39 + // {"nb1qbrkr/p1pppp2/1p1n2pp/8/1P6/2PN3P/P2PPPP1/NB1QBRKR w HFhf - 0 9", + // {25, 521, 14021, 306427, 8697700, 201455191}}, // 40 + // {"nnq1brkr/pp1pppp1/8/2p4P/8/5K2/PPPbPP1P/NNQBBR1R w hf - 0 9", {23, 724, + // 18263, 571072, 15338230, 484638597}}, // 41 + // {"nnqrbb1r/pppppk2/5pp1/7p/1P6/3P2PP/P1P1PP2/NNQRBBKR w HD - 0 9", {30, + // 717, 21945, 547145, 17166700, 450069742}}, // 42 + // {"nnqr1krb/p1p1pppp/2bp4/8/1p1P4/4P3/PPP2PPP/NNQRBKRB w GDgd - 0 9", {25, + // 873, 20796, 728628, 18162741, 641708630}}, // 43 + // {"nbnqrkbr/p2ppp2/1p4p1/2p4p/3P3P/3N4/PPP1PPPR/NB1QRKB1 w Ehe - 0 9", + // {24, 589, 15190, 382317, 10630667, 279474189}}, // 44 + // {"n1qbrkbr/p1ppp2p/2n2pp1/1p6/1P6/2P3P1/P2PPP1P/NNQBRKBR w HEhe - 0 9", + // {22, 592, 14269, 401976, 10356818, 301583306}}, // 45 + // {"2qrkbbr/ppn1pppp/n1p5/3p4/5P2/P1PP4/1P2P1PP/NNQRKBBR w HDhd - 1 9", + // {27, 750, 20584, 605458, 16819085, 516796736}}, // 46 + // {"1nqr1rbb/pppkp1pp/1n3p2/3p4/1P6/5P1P/P1PPPKP1/NNQR1RBB w - - 1 9", {24, + // 623, 15921, 429446, 11594634, 322745925}}, // 47 + // {"bbn1rqkr/pp1pp2p/4npp1/2p5/1P6/2BPP3/P1P2PPP/1BNNRQKR w HEhe - 0 9", + // {23, 730, 17743, 565340, 14496370, 468608864}}, // 48 + // {"bn1brqkr/pppp2p1/3npp2/7p/PPP5/8/3PPPPP/BNNBRQKR w HEhe - 0 9", {25, + // 673, 17835, 513696, 14284338, 434008567}}, // 49 + // {"bn1rqbkr/ppp1ppp1/1n6/2p4p/7P/3P4/PPP1PPP1/BN1RQBKR w HDhd - 0 9", {25, + // 776, 20562, 660217, 18486027, 616653869}}, // 50 + // {"bnnr1krb/ppp2ppp/3p4/3Bp3/q1P3PP/8/PP1PPP2/BNNRQKR1 w GDgd - 0 9", {29, + // 1040, 30772, 1053113, 31801525, 1075147725}}, // 51 + // {"1bbnrqkr/pp1ppppp/8/2p5/n7/3PNPP1/PPP1P2P/NBB1RQKR w HEhe - 1 9", {24, + // 598, 15673, 409766, 11394778, 310589129}}, // 52 + // {"nnbbrqkr/p2ppp1p/1pp5/8/6p1/N1P5/PPBPPPPP/N1B1RQKR w HEhe - 0 9", {26, + // 530, 14031, 326312, 8846766, 229270702}}, // 53 + // {"nnbrqbkr/2p1p1pp/p4p2/1p1p4/8/NP6/P1PPPPPP/N1BRQBKR w HDhd - 0 9", {17, + // 496, 10220, 303310, 7103549, 217108001}}, // 54 + // {"nnbrqk1b/pp2pprp/2pp2p1/8/3PP1P1/8/PPP2P1P/NNBRQRKB w d - 1 9", {33, + // 820, 27856, 706784, 24714401, 645835197}}, // 55 + // {"1bnrbqkr/ppnpp1p1/2p2p1p/8/1P6/4PPP1/P1PP3P/NBNRBQKR w HDhd - 0 9", + // {27, 705, 19760, 548680, 15964771, 464662032}}, // 56 + // {"n1rbbqkr/pp1pppp1/7p/P1p5/1n6/2PP4/1P2PPPP/NNRBBQKR w HChc - 0 9", {22, + // 631, 14978, 431801, 10911545, 320838556}}, // 57 + // {"n1rqb1kr/p1pppp1p/1pn4b/3P2p1/P7/1P6/2P1PPPP/NNRQBBKR w HChc - 0 9", + // {24, 477, 12506, 263189, 7419372, 165945904}}, // 58 + // {"nnrqbkrb/pppp1pp1/7p/4p3/6P1/2N2B2/PPPPPP1P/NR1QBKR1 w Ggc - 2 9", {29, + // 658, 19364, 476620, 14233587, 373744834}}, // 59 + // {"n1nrqkbr/ppb2ppp/3pp3/2p5/2P3P1/5P2/PP1PPB1P/NBNRQK1R w HDhd - 1 9", + // {32, 801, 25861, 681428, 22318948, 619857455}}, // 60 + // {"2rbqkbr/p1pppppp/1nn5/1p6/7P/P4P2/1PPPP1PB/NNRBQK1R w HChc - 2 9", {27, + // 647, 18030, 458057, 13189156, 354689323}}, // 61 + // {"nn1qkbbr/pp2ppp1/2rp4/2p4p/P2P4/1N5P/1PP1PPP1/1NRQKBBR w HCh - 1 9", + // {24, 738, 18916, 586009, 16420659, 519075930}}, // 62 + // {"nnrqk1bb/p1ppp2p/5rp1/1p3p2/1P4P1/5P1P/P1PPP3/NNRQKRBB w FCc - 1 9", + // {25, 795, 20510, 648945, 17342527, 556144017}}, // 63 + // {"bb1nrkqr/ppppn2p/4ppp1/8/1P4P1/4P3/P1PPKP1P/BBNNR1QR w he - 0 9", {29, + // 664, 20024, 498376, 15373803, 406016364}}, // 64 + // {"bnnbrkqr/1p1ppp2/8/p1p3pp/1P6/N4P2/PBPPP1PP/2NBRKQR w HEhe - 0 9", {31, + // 770, 24850, 677212, 22562080, 662029574}}, // 65 + // {"1nnrkbqr/p1pp1ppp/4p3/1p6/1Pb1P3/6PB/P1PP1P1P/BNNRK1QR w HDhd - 0 9", + // {27, 776, 22133, 641002, 19153245, 562738257}}, // 66 + // {"bnr1kqrb/pppp1pp1/1n5p/4p3/P3P3/3P2P1/1PP2P1P/BNNRKQRB w GDg - 0 9", + // {26, 624, 16411, 435426, 11906515, 338092952}}, // 67 + // {"nbbnrkqr/p1ppp1pp/1p3p2/8/2P5/4P3/PP1P1PPP/NBBNRKQR w HEhe - 1 9", {25, + // 624, 15561, 419635, 10817378, 311138112}}, // 68 + // {"nn1brkqr/pp1bpppp/8/2pp4/P4P2/1PN5/2PPP1PP/N1BBRKQR w HEhe - 1 9", {23, + // 659, 16958, 476567, 13242252, 373557073}}, // 69 + // {"n1brkbqr/ppp1pp1p/6pB/3p4/2Pn4/8/PP2PPPP/NN1RKBQR w HDhd - 0 9", {32, + // 1026, 30360, 978278, 29436320, 957904151}}, // 70 + // {"nnbrkqrb/p2ppp2/Q5pp/1pp5/4PP2/2N5/PPPP2PP/N1BRK1RB w GDgd - 0 9", {36, + // 843, 29017, 715537, 24321197, 630396940}}, // 71 + // {"nbnrbk1r/pppppppq/8/7p/8/1N2QPP1/PPPPP2P/NB1RBK1R w HDhd - 2 9", {36, + // 973, 35403, 1018054, 37143354, 1124883780}}, // 72 + // {"nnrbbkqr/2pppp1p/p7/6p1/1p2P3/4QPP1/PPPP3P/NNRBBK1R w HChc - 0 9", {36, + // 649, 22524, 489526, 16836636, 416139320}}, // 73 + // {"nnrkbbqr/1p2pppp/p2p4/2p5/8/1N2P1P1/PPPP1P1P/1NKRBBQR w hc - 0 9", {26, + // 672, 18136, 477801, 13342771, 363074681}}, // 74 + // {"n1rkbqrb/pp1ppp2/2n3p1/2p4p/P5PP/1P6/2PPPP2/NNRKBQRB w GCgc - 0 9", + // {24, 804, 20712, 684001, 18761475, 617932151}}, // 75 + // {"nbkr1qbr/1pp1pppp/pn1p4/8/3P2P1/5R2/PPP1PP1P/NBN1KQBR w H - 2 9", {30, + // 627, 18669, 423329, 12815016, 312798696}}, // 76 + // {"nnr1kqbr/pp1pp1p1/2p5/b4p1p/P7/1PNP4/2P1PPPP/N1RBKQBR w HChc - 1 9", + // {12, 421, 6530, 227044, 4266410, 149176979}}, // 77 + // {"n1rkqbbr/p1pp1pp1/np2p2p/8/8/N4PP1/PPPPP1BP/N1RKQ1BR w HChc - 0 9", + // {27, 670, 19119, 494690, 14708490, 397268628}}, // 78 + // {"nnr1qrbb/p2kpppp/1p1p4/2p5/6P1/PP1P4/2P1PP1P/NNRKQRBB w FC - 0 9", {27, + // 604, 17043, 409665, 11993332, 308518181}}, // 79 + // {"bbnnrkrq/ppp1pp2/6p1/3p4/7p/7P/PPPPPPP1/BBNNRRKQ w ge - 0 9", {20, 559, + // 12242, 355326, 8427161, 252274233}}, // 80 + // {"bnnbrkr1/ppp2p1p/5q2/3pp1p1/4P3/1N4P1/PPPPRP1P/BN1B1KRQ w Gge - 0 9", + // {26, 1036, 27228, 1028084, 28286576, 1042120495}}, // 81 + // {"bn1rkbrq/1pppppp1/p6p/1n6/3P4/6PP/PPPRPP2/BNN1KBRQ w Ggd - 2 9", {29, + // 633, 19278, 455476, 14333034, 361900466}}, // 82 + // {"b1nrkrqb/1p1npppp/p2p4/2p5/5P2/4P2P/PPPP1RP1/BNNRK1QB w Dfd - 1 9", + // {25, 475, 12603, 270909, 7545536, 179579818}}, // 83 + // {"1bbnrkrq/ppppppp1/8/7p/1n4P1/1PN5/P1PPPP1P/NBBR1KRQ w Gge - 0 9", {30, + // 803, 25473, 709716, 23443854, 686365049}}, // 84 + // {"nnbbrkrq/2pp1pp1/1p5p/pP2p3/7P/N7/P1PPPPP1/N1BBRKRQ w GEge - 0 9", {18, + // 432, 9638, 242350, 6131124, 160393505}}, // 85 + // {"nnbrkbrq/1pppp1p1/p7/7p/1P2Pp2/BN6/P1PP1PPP/1N1RKBRQ w GDgd - 0 9", + // {27, 482, 13441, 282259, 8084701, 193484216}}, // 86 + // {"n1brkrqb/pppp3p/n3pp2/6p1/3P1P2/N1P5/PP2P1PP/N1BRKRQB w FDfd - 0 9", + // {28, 642, 19005, 471729, 14529434, 384837696}}, // 87 {"nbnrbk2/p1pppp1p/1p3qr1/6p1/1B1P4/1N6/PPP1PPPP/1BNR1RKQ w d - 2 9", {30, 796, 22780, 687302, 20120565, 641832725}}, // 88 - {"nnrbbrkq/1pp2ppp/3p4/p3p3/3P1P2/1P2P3/P1P3PP/NNRBBKRQ w GC - 1 9", - {31, 827, 24538, 663082, 19979594, 549437308}}, // 89 - {"nnrkbbrq/1pp2p1p/p2pp1p1/2P5/8/8/PP1PPPPP/NNRKBBRQ w Ggc - 0 9", - {24, 762, 19283, 624598, 16838099, 555230555}}, // 90 - {"nnr1brqb/1ppkp1pp/8/p2p1p2/1P1P4/N1P5/P3PPPP/N1RKBRQB w FC - 1 9", - {23, 640, 15471, 444905, 11343507, 334123513}}, // 91 - {"nbnrkrbq/2ppp2p/p4p2/1P4p1/4PP2/8/1PPP2PP/NBNRKRBQ w FDfd - 0 9", - {31, 826, 26137, 732175, 23555139, 686250413}}, // 92 - {"1nrbkr1q/1pppp1pp/1n6/p4p2/N1b4P/8/PPPPPPPB/N1RBKR1Q w FCfc - 2 9", - {27, 862, 24141, 755171, 22027695, 696353497}}, // 93 - {"nnrkrbbq/pppp2pp/8/4pp2/4P3/P7/1PPPBPPP/NNKRR1BQ w c - 0 9", - {25, 792, 19883, 636041, 16473376, 532214177}}, // 94 - {"n1rk1qbb/pppprpp1/2n4p/4p3/2PP3P/8/PP2PPP1/NNRKRQBB w ECc - 1 9", - {25, 622, 16031, 425247, 11420973, 321855685}}, // 95 - {"bbq1rnkr/pnp1pp1p/1p1p4/6p1/2P5/2Q1P2P/PP1P1PP1/BB1NRNKR w HEhe - 2 9", - {36, 870, 30516, 811047, 28127620, 799738334}}, // 96 - {"bq1brnkr/1p1ppp1p/1np5/p5p1/8/1N5P/PPPPPPP1/BQ1BRNKR w HEhe - 0 9", - {22, 588, 13524, 380068, 9359618, 273795898}}, // 97 - {"bq1rn1kr/1pppppbp/Nn4p1/8/8/P7/1PPPPPPP/BQ1RNBKR w HDhd - 1 9", - {24, 711, 18197, 542570, 14692779, 445827351}}, // 98 - {"bqnr1kr1/pppppp1p/6p1/5n2/4B3/3N2PP/PbPPPP2/BQNR1KR1 w GDgd - 2 9", - {31, 1132, 36559, 1261476, 43256823, 1456721391}}, // 99 - {"qbb1rnkr/ppp3pp/4n3/3ppp2/1P3PP1/8/P1PPPN1P/QBB1RNKR w HEhe - 0 9", - {28, 696, 20502, 541886, 16492398, 456983120}}, // 100 - {"qnbbr1kr/pp1ppp1p/4n3/6p1/2p3P1/2PP1P2/PP2P2P/QNBBRNKR w HEhe - 0 9", - {25, 655, 16520, 450189, 11767038, 335414976}}, // 101 - {"1nbrnbkr/p1ppp1pp/1p6/5p2/4q1PP/3P4/PPP1PP2/QNBRNBKR w HDhd - 1 9", - {30, 1162, 33199, 1217278, 36048727, 1290346802}}, // 102 - {"q1brnkrb/p1pppppp/n7/1p6/P7/3P1P2/QPP1P1PP/1NBRNKRB w GDgd - 0 9", - {32, 827, 26106, 718243, 23143989, 673147648}}, // 103 - {"qbnrb1kr/ppp1pp1p/3p4/2n3p1/1P6/6N1/P1PPPPPP/QBNRB1KR w HDhd - 2 9", - {29, 751, 23132, 610397, 19555214, 530475036}}, // 104 - {"q1rbbnkr/pppp1p2/2n3pp/2P1p3/3P4/8/PP1NPPPP/Q1RBBNKR w HChc - 2 9", - {29, 806, 24540, 687251, 21694330, 619907316}}, // 105 - {"q1r1bbkr/pnpp1ppp/2n1p3/1p6/2P2P2/2N1N3/PP1PP1PP/Q1R1BBKR w HChc - 2 9", - {32, 1017, 32098, 986028, 31204371, 958455898}}, // 106 - {"2rnbkrb/pqppppp1/1pn5/7p/2P5/P1R5/QP1PPPPP/1N1NBKRB w Ggc - 4 9", - {26, 625, 16506, 434635, 11856964, 336672890}}, // 107 - {"qbnr1kbr/p2ppppp/2p5/1p6/4n2P/P4N2/1PPP1PP1/QBNR1KBR w HDhd - 0 9", - {27, 885, 23828, 767273, 21855658, 706272554}}, // 108 - {"qnrbnk1r/pp1pp2p/5p2/2pbP1p1/3P4/1P6/P1P2PPP/QNRBNKBR w HChc - 0 9", - {26, 954, 24832, 892456, 24415089, 866744329}}, // 109 - {"qnrnk1br/p1p2ppp/8/1pbpp3/8/PP2N3/1QPPPPPP/1NR1KBBR w HChc - 0 9", - {26, 783, 20828, 634267, 17477825, 539674275}}, // 110 - {"qnrnkrbb/Bpppp2p/6p1/5p2/5P2/3PP3/PPP3PP/QNRNKR1B w FCfc - 1 9", - {28, 908, 25730, 861240, 25251641, 869525254}}, // 111 - {"bbnqrn1r/ppppp2k/5p2/6pp/7P/1QP5/PP1PPPP1/B1N1RNKR w HE - 0 9", - {33, 643, 21790, 487109, 16693640, 410115900}}, // 112 - {"b1qbrnkr/ppp1pp2/2np4/6pp/4P3/2N4P/PPPP1PP1/BQ1BRNKR w HEhe - 0 9", - {28, 837, 24253, 745617, 22197063, 696399065}}, // 113 - {"bnqr1bkr/pp1ppppp/2p5/4N3/5P2/P7/1PPPPnPP/BNQR1BKR w HDhd - 3 9", - {25, 579, 13909, 341444, 8601011, 225530258}}, // 114 - {"b1qr1krb/pp1ppppp/n2n4/8/2p5/2P3P1/PP1PPP1P/BNQRNKRB w GDgd - 0 9", - {28, 707, 19721, 549506, 15583376, 468399900}}, // 115 - {"nbbqr1kr/1pppp1pp/8/p1n2p2/4P3/PN6/1PPPQPPP/1BB1RNKR w HEhe - 0 9", - {30, 745, 23416, 597858, 19478789, 515473678}}, // 116 - {"nqbbrn1r/p1pppp1k/1p4p1/7p/4P3/1R3B2/PPPP1PPP/NQB2NKR w H - 0 9", - {24, 504, 13512, 317355, 9002073, 228726497}}, // 117 - {"nqbr1bkr/p1p1ppp1/1p1n4/3pN2p/1P6/8/P1PPPPPP/NQBR1BKR w HDhd - 0 9", - {29, 898, 26532, 809605, 24703467, 757166494}}, // 118 - {"nqbrn1rb/pppp1kp1/5p1p/4p3/P4B2/3P2P1/1PP1PP1P/NQ1RNKRB w GD - 0 9", - {34, 671, 22332, 473110, 15556806, 353235120}}, // 119 - {"nb1r1nkr/ppp1ppp1/2bp4/7p/3P2qP/P6R/1PP1PPP1/NBQRBNK1 w Dhd - 1 9", - {38, 1691, 60060, 2526992, 88557078, 3589649998}}, // 120 - {"n1rbbnkr/1p1pp1pp/p7/2p1qp2/1B3P2/3P4/PPP1P1PP/NQRB1NKR w HChc - 0 9", - {24, 913, 21595, 807544, 19866918, 737239330}}, // 121 - {"nqrnbbkr/p2p1p1p/1pp5/1B2p1p1/1P3P2/4P3/P1PP2PP/NQRNB1KR w HChc - 0 9", - {33, 913, 30159, 843874, 28053260, 804687975}}, // 122 - {"nqr1bkrb/ppp1pp2/2np2p1/P6p/8/2P4P/1P1PPPP1/NQRNBKRB w GCgc - 0 9", - {24, 623, 16569, 442531, 12681936, 351623879}}, // 123 - {"nb1rnkbr/pqppppp1/1p5p/8/1PP4P/8/P2PPPP1/NBQRNKBR w HDhd - 1 9", - {31, 798, 24862, 694386, 22616076, 666227466}}, // 124 - {"nqrbnkbr/2p1p1pp/3p4/pp3p2/6PP/3P1N2/PPP1PP2/NQRB1KBR w HChc - 0 9", - {24, 590, 14409, 383690, 9698432, 274064911}}, // 125 - {"nqrnkbbr/pp1p1p1p/4p1p1/1p6/8/5P1P/P1PPP1P1/NQRNKBBR w HChc - 0 9", - {30, 1032, 31481, 1098116, 34914919, 1233362066}}, // 126 - {"nqrnkrbb/p2ppppp/1p6/2p5/2P3P1/5P2/PP1PPN1P/NQR1KRBB w FCfc - 1 9", - {30, 775, 23958, 668000, 21141738, 621142773}}, // 127 - {"bbnrqrk1/pp2pppp/4n3/2pp4/P7/1N5P/BPPPPPP1/B2RQNKR w HD - 2 9", - {23, 708, 17164, 554089, 14343443, 481405144}}, // 128 - {"bnr1qnkr/p1pp1p1p/1p4p1/4p1b1/2P1P3/1P6/PB1P1PPP/1NRBQNKR w HChc - 1 9", - {30, 931, 29249, 921746, 30026687, 968109774}}, // 129 - {"b1rqnbkr/ppp1ppp1/3p3p/2n5/P3P3/2NP4/1PP2PPP/B1RQNBKR w HChc - 0 9", - {24, 596, 15533, 396123, 11099382, 294180723}}, // 130 - {"bnrqnr1b/pp1pkppp/2p1p3/P7/2P5/7P/1P1PPPP1/BNRQNKRB w GC - 0 9", - {24, 572, 15293, 390903, 11208688, 302955778}}, // 131 - {"n1brq1kr/bppppppp/p7/8/4P1Pn/8/PPPP1P2/NBBRQNKR w HDhd - 0 9", - {20, 570, 13139, 371247, 9919113, 284592289}}, // 132 - {"1rbbqnkr/ppn1ppp1/3p3p/2p5/3P4/1N4P1/PPPBPP1P/1R1BQNKR w HBhb - 0 9", - {29, 1009, 29547, 1040816, 31059587, 1111986835}}, // 133 - {"nrbq2kr/ppppppb1/5n1p/5Pp1/8/P5P1/1PPPP2P/NRBQNBKR w HBhb - 1 9", - {20, 520, 11745, 316332, 7809837, 216997152}}, // 134 - {"nrb1nkrb/pp3ppp/1qBpp3/2p5/8/P5P1/1PPPPP1P/NRBQNKR1 w GBgb - 2 9", - {32, 850, 25642, 734088, 21981567, 664886187}}, // 135 - {"1br1bnkr/ppqppp1p/1np3p1/8/1PP4P/4N3/P2PPPP1/NBRQB1KR w HChc - 1 9", - {32, 798, 24765, 691488, 22076141, 670296871}}, // 136 - {"nrqbb1kr/1p1pp1pp/2p3n1/p4p2/3PP3/P5N1/1PP2PPP/NRQBB1KR w HBhb - 0 9", - {32, 791, 26213, 684890, 23239122, 634260266}}, // 137 - {"nrqn1bkr/ppppp1pp/4b3/8/4P1p1/5P2/PPPP3P/NRQNBBKR w HBhb - 0 9", - {29, 687, 20223, 506088, 15236287, 398759980}}, // 138 - {"nrqnbrkb/pppp1p2/4p2p/3B2p1/8/1P4P1/PQPPPP1P/NR1NBKR1 w GB - 0 9", - {37, 764, 27073, 610950, 21284835, 514864869}}, // 139 - {"nbrq1kbr/Bp3ppp/2pnp3/3p4/5P2/2P4P/PP1PP1P1/NBRQNK1R w HChc - 0 9", - {40, 1271, 48022, 1547741, 56588117, 1850696281}}, // 140 - {"nrqbnkbr/1p2ppp1/p1p4p/3p4/1P6/8/PQPPPPPP/1RNBNKBR w HBhb - 0 9", - {28, 757, 23135, 668025, 21427496, 650939962}}, // 141 - {"nrqn1bbr/2ppkppp/4p3/pB6/8/2P1P3/PP1P1PPP/NRQNK1BR w HB - 1 9", - {27, 642, 17096, 442653, 11872805, 327545120}}, // 142 - {"nrqnkrb1/p1ppp2p/1p4p1/4bp2/4PP1P/4N3/PPPP2P1/NRQ1KRBB w FBfb - 1 9", - {27, 958, 27397, 960350, 28520172, 995356563}}, // 143 - {"1bnrnqkr/pbpp2pp/8/1p2pp2/P6P/3P1N2/1PP1PPP1/BBNR1QKR w HDhd - 0 9", - {27, 859, 23475, 773232, 21581178, 732696327}}, // 144 - {"b1rbnqkr/1pp1ppp1/2n4p/p2p4/5P2/1PBP4/P1P1P1PP/1NRBNQKR w HChc - 0 9", - {26, 545, 14817, 336470, 9537260, 233549184}}, // 145 - {"1nrnqbkr/p1pppppp/1p6/8/2b2P2/P1N5/1PP1P1PP/BNR1QBKR w HChc - 2 9", - {24, 668, 17716, 494866, 14216070, 406225409}}, // 146 - {"1nrnqkrb/2ppp1pp/p7/1p3p2/5P2/N5K1/PPPPP2P/B1RNQ1RB w gc - 0 9", - {33, 725, 23572, 559823, 18547476, 471443091}}, // 147 - {"nbbr1qkr/p1pppppp/8/1p1n4/3P4/1N3PP1/PPP1P2P/1BBRNQKR w HDhd - 1 9", - {28, 698, 20527, 539625, 16555068, 458045505}}, // 148 - {"1rbbnqkr/1pnppp1p/p5p1/2p5/2P4P/5P2/PP1PP1PR/NRBBNQK1 w Bhb - 1 9", - {24, 554, 14221, 362516, 9863080, 269284081}}, // 149 - {"nrb1qbkr/2pppppp/2n5/p7/2p5/4P3/PPNP1PPP/1RBNQBKR w HBhb - 0 9", - {23, 618, 15572, 443718, 12044358, 360311412}}, // 150 - {"nrb1qkrb/2ppppp1/p3n3/1p1B3p/2P5/6P1/PP1PPPRP/NRBNQK2 w Bgb - 2 9", - {27, 593, 16770, 401967, 11806808, 303338935}}, // 151 - {"nbrn1qkr/ppp1pp2/3p2p1/3Q3P/b7/8/PPPPPP1P/NBRNB1KR w HChc - 2 9", - {39, 1056, 40157, 1133446, 42201531, 1239888683}}, // 152 - {"nr1bbqkr/pp1pp2p/1n3pp1/2p5/8/1P4P1/P1PPPPQP/NRNBBK1R w hb - 0 9", - {25, 585, 15719, 406544, 11582539, 320997679}}, // 153 - {"nr2bbkr/ppp1pppp/1n1p4/8/6PP/1NP4q/PP1PPP2/1RNQBBKR w HBhb - 1 9", - {22, 742, 15984, 545231, 13287051, 457010195}}, // 154 - {"1rnqbkrb/ppp1p1p1/1n3p2/3p3p/P6P/4P3/1PPP1PP1/NRNQBRKB w gb - 0 9", - {22, 574, 14044, 379648, 9968830, 281344367}}, // 155 - {"nb1rqkbr/1pppp1pp/4n3/p4p2/6PP/5P2/PPPPPN2/NBR1QKBR w HCh - 0 9", - {25, 621, 16789, 462600, 13378840, 396575613}}, // 156 - {"nrnbqkbr/2pp2pp/4pp2/pp6/8/1P3P2/P1PPPBPP/NRNBQ1KR w hb - 0 9", - {25, 656, 16951, 466493, 12525939, 358763789}}, // 157 - {"nrnqkbbr/ppppp1p1/7p/5p2/8/P4PP1/NPPPP2P/NR1QKBBR w HBhb - 0 9", - {28, 723, 20621, 547522, 15952533, 439046803}}, // 158 - {"1rnqkr1b/ppppp2p/1n3pp1/8/2P3P1/Pb1N4/1P1PPP1P/NR1QKRBB w FBfb - 0 9", - {26, 713, 19671, 548875, 15865528, 454532806}}, // 159 - {"bbnrnkqr/1pppp1pp/5p2/p7/7P/1P6/PBPPPPPR/1BNRNKQ1 w D - 2 9", - {26, 649, 17834, 502279, 14375839, 435585252}}, // 160 - {"bnrbk1qr/1ppp1ppp/p2np3/8/P7/2N2P2/1PPPP1PP/B1RBNKQR w HC - 0 9", - {26, 621, 17569, 451452, 13514201, 364421088}}, // 161 - {"br1nkbqr/ppppppp1/8/n6p/8/N1P2PP1/PP1PP2P/B1RNKBQR w HCh - 1 9", - {29, 664, 20182, 512316, 16125924, 442508159}}, // 162 - {"bnr1kqrb/pp1pppp1/2n5/2p5/1P4Pp/4N3/P1PPPP1P/BNKR1QRB w gc - 0 9", - {36, 888, 31630, 789863, 27792175, 719015345}}, // 163 - {"1bbrnkqr/pp1p1ppp/2p1p3/1n6/5P2/3Q4/PPPPP1PP/NBBRNK1R w HDhd - 2 9", - {36, 891, 31075, 781792, 26998966, 702903862}}, // 164 + // {"nnrbbrkq/1pp2ppp/3p4/p3p3/3P1P2/1P2P3/P1P3PP/NNRBBKRQ w GC - 1 9", {31, + // 827, 24538, 663082, 19979594, 549437308}}, // 89 + // {"nnrkbbrq/1pp2p1p/p2pp1p1/2P5/8/8/PP1PPPPP/NNRKBBRQ w Ggc - 0 9", {24, + // 762, 19283, 624598, 16838099, 555230555}}, // 90 + // {"nnr1brqb/1ppkp1pp/8/p2p1p2/1P1P4/N1P5/P3PPPP/N1RKBRQB w FC - 1 9", {23, + // 640, 15471, 444905, 11343507, 334123513}}, // 91 + // {"nbnrkrbq/2ppp2p/p4p2/1P4p1/4PP2/8/1PPP2PP/NBNRKRBQ w FDfd - 0 9", {31, + // 826, 26137, 732175, 23555139, 686250413}}, // 92 + // {"1nrbkr1q/1pppp1pp/1n6/p4p2/N1b4P/8/PPPPPPPB/N1RBKR1Q w FCfc - 2 9", + // {27, 862, 24141, 755171, 22027695, 696353497}}, // 93 + // {"nnrkrbbq/pppp2pp/8/4pp2/4P3/P7/1PPPBPPP/NNKRR1BQ w c - 0 9", {25, 792, + // 19883, 636041, 16473376, 532214177}}, // 94 + // {"n1rk1qbb/pppprpp1/2n4p/4p3/2PP3P/8/PP2PPP1/NNRKRQBB w ECc - 1 9", {25, + // 622, 16031, 425247, 11420973, 321855685}}, // 95 + // {"bbq1rnkr/pnp1pp1p/1p1p4/6p1/2P5/2Q1P2P/PP1P1PP1/BB1NRNKR w HEhe - 2 9", + // {36, 870, 30516, 811047, 28127620, 799738334}}, // 96 + // {"bq1brnkr/1p1ppp1p/1np5/p5p1/8/1N5P/PPPPPPP1/BQ1BRNKR w HEhe - 0 9", + // {22, 588, 13524, 380068, 9359618, 273795898}}, // 97 + // {"bq1rn1kr/1pppppbp/Nn4p1/8/8/P7/1PPPPPPP/BQ1RNBKR w HDhd - 1 9", {24, + // 711, 18197, 542570, 14692779, 445827351}}, // 98 + // {"bqnr1kr1/pppppp1p/6p1/5n2/4B3/3N2PP/PbPPPP2/BQNR1KR1 w GDgd - 2 9", + // {31, 1132, 36559, 1261476, 43256823, 1456721391}}, // 99 + // {"qbb1rnkr/ppp3pp/4n3/3ppp2/1P3PP1/8/P1PPPN1P/QBB1RNKR w HEhe - 0 9", + // {28, 696, 20502, 541886, 16492398, 456983120}}, // 100 + // {"qnbbr1kr/pp1ppp1p/4n3/6p1/2p3P1/2PP1P2/PP2P2P/QNBBRNKR w HEhe - 0 9", + // {25, 655, 16520, 450189, 11767038, 335414976}}, // 101 + // {"1nbrnbkr/p1ppp1pp/1p6/5p2/4q1PP/3P4/PPP1PP2/QNBRNBKR w HDhd - 1 9", + // {30, 1162, 33199, 1217278, 36048727, 1290346802}}, // 102 + // {"q1brnkrb/p1pppppp/n7/1p6/P7/3P1P2/QPP1P1PP/1NBRNKRB w GDgd - 0 9", {32, + // 827, 26106, 718243, 23143989, 673147648}}, // 103 + // {"qbnrb1kr/ppp1pp1p/3p4/2n3p1/1P6/6N1/P1PPPPPP/QBNRB1KR w HDhd - 2 9", + // {29, 751, 23132, 610397, 19555214, 530475036}}, // 104 + // {"q1rbbnkr/pppp1p2/2n3pp/2P1p3/3P4/8/PP1NPPPP/Q1RBBNKR w HChc - 2 9", + // {29, 806, 24540, 687251, 21694330, 619907316}}, // 105 + // {"q1r1bbkr/pnpp1ppp/2n1p3/1p6/2P2P2/2N1N3/PP1PP1PP/Q1R1BBKR w HChc - 2 + // 9", {32, 1017, 32098, 986028, 31204371, 958455898}}, // 106 + // {"2rnbkrb/pqppppp1/1pn5/7p/2P5/P1R5/QP1PPPPP/1N1NBKRB w Ggc - 4 9", {26, + // 625, 16506, 434635, 11856964, 336672890}}, // 107 + // {"qbnr1kbr/p2ppppp/2p5/1p6/4n2P/P4N2/1PPP1PP1/QBNR1KBR w HDhd - 0 9", + // {27, 885, 23828, 767273, 21855658, 706272554}}, // 108 + // {"qnrbnk1r/pp1pp2p/5p2/2pbP1p1/3P4/1P6/P1P2PPP/QNRBNKBR w HChc - 0 9", + // {26, 954, 24832, 892456, 24415089, 866744329}}, // 109 + // {"qnrnk1br/p1p2ppp/8/1pbpp3/8/PP2N3/1QPPPPPP/1NR1KBBR w HChc - 0 9", {26, + // 783, 20828, 634267, 17477825, 539674275}}, // 110 + // {"qnrnkrbb/Bpppp2p/6p1/5p2/5P2/3PP3/PPP3PP/QNRNKR1B w FCfc - 1 9", {28, + // 908, 25730, 861240, 25251641, 869525254}}, // 111 + // {"bbnqrn1r/ppppp2k/5p2/6pp/7P/1QP5/PP1PPPP1/B1N1RNKR w HE - 0 9", {33, + // 643, 21790, 487109, 16693640, 410115900}}, // 112 + // {"b1qbrnkr/ppp1pp2/2np4/6pp/4P3/2N4P/PPPP1PP1/BQ1BRNKR w HEhe - 0 9", + // {28, 837, 24253, 745617, 22197063, 696399065}}, // 113 + // {"bnqr1bkr/pp1ppppp/2p5/4N3/5P2/P7/1PPPPnPP/BNQR1BKR w HDhd - 3 9", {25, + // 579, 13909, 341444, 8601011, 225530258}}, // 114 + // {"b1qr1krb/pp1ppppp/n2n4/8/2p5/2P3P1/PP1PPP1P/BNQRNKRB w GDgd - 0 9", + // {28, 707, 19721, 549506, 15583376, 468399900}}, // 115 + // {"nbbqr1kr/1pppp1pp/8/p1n2p2/4P3/PN6/1PPPQPPP/1BB1RNKR w HEhe - 0 9", + // {30, 745, 23416, 597858, 19478789, 515473678}}, // 116 + // {"nqbbrn1r/p1pppp1k/1p4p1/7p/4P3/1R3B2/PPPP1PPP/NQB2NKR w H - 0 9", {24, + // 504, 13512, 317355, 9002073, 228726497}}, // 117 + // {"nqbr1bkr/p1p1ppp1/1p1n4/3pN2p/1P6/8/P1PPPPPP/NQBR1BKR w HDhd - 0 9", + // {29, 898, 26532, 809605, 24703467, 757166494}}, // 118 + // {"nqbrn1rb/pppp1kp1/5p1p/4p3/P4B2/3P2P1/1PP1PP1P/NQ1RNKRB w GD - 0 9", + // {34, 671, 22332, 473110, 15556806, 353235120}}, // 119 + // {"nb1r1nkr/ppp1ppp1/2bp4/7p/3P2qP/P6R/1PP1PPP1/NBQRBNK1 w Dhd - 1 9", + // {38, 1691, 60060, 2526992, 88557078, 3589649998}}, // 120 + // {"n1rbbnkr/1p1pp1pp/p7/2p1qp2/1B3P2/3P4/PPP1P1PP/NQRB1NKR w HChc - 0 9", + // {24, 913, 21595, 807544, 19866918, 737239330}}, // 121 + // {"nqrnbbkr/p2p1p1p/1pp5/1B2p1p1/1P3P2/4P3/P1PP2PP/NQRNB1KR w HChc - 0 9", + // {33, 913, 30159, 843874, 28053260, 804687975}}, // 122 + // {"nqr1bkrb/ppp1pp2/2np2p1/P6p/8/2P4P/1P1PPPP1/NQRNBKRB w GCgc - 0 9", + // {24, 623, 16569, 442531, 12681936, 351623879}}, // 123 + // {"nb1rnkbr/pqppppp1/1p5p/8/1PP4P/8/P2PPPP1/NBQRNKBR w HDhd - 1 9", {31, + // 798, 24862, 694386, 22616076, 666227466}}, // 124 + // {"nqrbnkbr/2p1p1pp/3p4/pp3p2/6PP/3P1N2/PPP1PP2/NQRB1KBR w HChc - 0 9", + // {24, 590, 14409, 383690, 9698432, 274064911}}, // 125 + // {"nqrnkbbr/pp1p1p1p/4p1p1/1p6/8/5P1P/P1PPP1P1/NQRNKBBR w HChc - 0 9", + // {30, 1032, 31481, 1098116, 34914919, 1233362066}}, // 126 + // {"nqrnkrbb/p2ppppp/1p6/2p5/2P3P1/5P2/PP1PPN1P/NQR1KRBB w FCfc - 1 9", + // {30, 775, 23958, 668000, 21141738, 621142773}}, // 127 + // {"bbnrqrk1/pp2pppp/4n3/2pp4/P7/1N5P/BPPPPPP1/B2RQNKR w HD - 2 9", {23, + // 708, 17164, 554089, 14343443, 481405144}}, // 128 + // {"bnr1qnkr/p1pp1p1p/1p4p1/4p1b1/2P1P3/1P6/PB1P1PPP/1NRBQNKR w HChc - 1 + // 9", {30, 931, 29249, 921746, 30026687, 968109774}}, // 129 + // {"b1rqnbkr/ppp1ppp1/3p3p/2n5/P3P3/2NP4/1PP2PPP/B1RQNBKR w HChc - 0 9", + // {24, 596, 15533, 396123, 11099382, 294180723}}, // 130 + // {"bnrqnr1b/pp1pkppp/2p1p3/P7/2P5/7P/1P1PPPP1/BNRQNKRB w GC - 0 9", {24, + // 572, 15293, 390903, 11208688, 302955778}}, // 131 + // {"n1brq1kr/bppppppp/p7/8/4P1Pn/8/PPPP1P2/NBBRQNKR w HDhd - 0 9", {20, + // 570, 13139, 371247, 9919113, 284592289}}, // 132 + // {"1rbbqnkr/ppn1ppp1/3p3p/2p5/3P4/1N4P1/PPPBPP1P/1R1BQNKR w HBhb - 0 9", + // {29, 1009, 29547, 1040816, 31059587, 1111986835}}, // 133 + // {"nrbq2kr/ppppppb1/5n1p/5Pp1/8/P5P1/1PPPP2P/NRBQNBKR w HBhb - 1 9", {20, + // 520, 11745, 316332, 7809837, 216997152}}, // 134 + // {"nrb1nkrb/pp3ppp/1qBpp3/2p5/8/P5P1/1PPPPP1P/NRBQNKR1 w GBgb - 2 9", {32, + // 850, 25642, 734088, 21981567, 664886187}}, // 135 + // {"1br1bnkr/ppqppp1p/1np3p1/8/1PP4P/4N3/P2PPPP1/NBRQB1KR w HChc - 1 9", + // {32, 798, 24765, 691488, 22076141, 670296871}}, // 136 + // {"nrqbb1kr/1p1pp1pp/2p3n1/p4p2/3PP3/P5N1/1PP2PPP/NRQBB1KR w HBhb - 0 9", + // {32, 791, 26213, 684890, 23239122, 634260266}}, // 137 + // {"nrqn1bkr/ppppp1pp/4b3/8/4P1p1/5P2/PPPP3P/NRQNBBKR w HBhb - 0 9", {29, + // 687, 20223, 506088, 15236287, 398759980}}, // 138 + // {"nrqnbrkb/pppp1p2/4p2p/3B2p1/8/1P4P1/PQPPPP1P/NR1NBKR1 w GB - 0 9", {37, + // 764, 27073, 610950, 21284835, 514864869}}, // 139 + // {"nbrq1kbr/Bp3ppp/2pnp3/3p4/5P2/2P4P/PP1PP1P1/NBRQNK1R w HChc - 0 9", + // {40, 1271, 48022, 1547741, 56588117, 1850696281}}, // 140 + // {"nrqbnkbr/1p2ppp1/p1p4p/3p4/1P6/8/PQPPPPPP/1RNBNKBR w HBhb - 0 9", {28, + // 757, 23135, 668025, 21427496, 650939962}}, // 141 + // {"nrqn1bbr/2ppkppp/4p3/pB6/8/2P1P3/PP1P1PPP/NRQNK1BR w HB - 1 9", {27, + // 642, 17096, 442653, 11872805, 327545120}}, // 142 + // {"nrqnkrb1/p1ppp2p/1p4p1/4bp2/4PP1P/4N3/PPPP2P1/NRQ1KRBB w FBfb - 1 9", + // {27, 958, 27397, 960350, 28520172, 995356563}}, // 143 + // {"1bnrnqkr/pbpp2pp/8/1p2pp2/P6P/3P1N2/1PP1PPP1/BBNR1QKR w HDhd - 0 9", + // {27, 859, 23475, 773232, 21581178, 732696327}}, // 144 + // {"b1rbnqkr/1pp1ppp1/2n4p/p2p4/5P2/1PBP4/P1P1P1PP/1NRBNQKR w HChc - 0 9", + // {26, 545, 14817, 336470, 9537260, 233549184}}, // 145 + // {"1nrnqbkr/p1pppppp/1p6/8/2b2P2/P1N5/1PP1P1PP/BNR1QBKR w HChc - 2 9", + // {24, 668, 17716, 494866, 14216070, 406225409}}, // 146 + // {"1nrnqkrb/2ppp1pp/p7/1p3p2/5P2/N5K1/PPPPP2P/B1RNQ1RB w gc - 0 9", {33, + // 725, 23572, 559823, 18547476, 471443091}}, // 147 + // {"nbbr1qkr/p1pppppp/8/1p1n4/3P4/1N3PP1/PPP1P2P/1BBRNQKR w HDhd - 1 9", + // {28, 698, 20527, 539625, 16555068, 458045505}}, // 148 + // {"1rbbnqkr/1pnppp1p/p5p1/2p5/2P4P/5P2/PP1PP1PR/NRBBNQK1 w Bhb - 1 9", + // {24, 554, 14221, 362516, 9863080, 269284081}}, // 149 + // {"nrb1qbkr/2pppppp/2n5/p7/2p5/4P3/PPNP1PPP/1RBNQBKR w HBhb - 0 9", {23, + // 618, 15572, 443718, 12044358, 360311412}}, // 150 + // {"nrb1qkrb/2ppppp1/p3n3/1p1B3p/2P5/6P1/PP1PPPRP/NRBNQK2 w Bgb - 2 9", + // {27, 593, 16770, 401967, 11806808, 303338935}}, // 151 + // {"nbrn1qkr/ppp1pp2/3p2p1/3Q3P/b7/8/PPPPPP1P/NBRNB1KR w HChc - 2 9", {39, + // 1056, 40157, 1133446, 42201531, 1239888683}}, // 152 + // {"nr1bbqkr/pp1pp2p/1n3pp1/2p5/8/1P4P1/P1PPPPQP/NRNBBK1R w hb - 0 9", {25, + // 585, 15719, 406544, 11582539, 320997679}}, // 153 + // {"nr2bbkr/ppp1pppp/1n1p4/8/6PP/1NP4q/PP1PPP2/1RNQBBKR w HBhb - 1 9", {22, + // 742, 15984, 545231, 13287051, 457010195}}, // 154 + // {"1rnqbkrb/ppp1p1p1/1n3p2/3p3p/P6P/4P3/1PPP1PP1/NRNQBRKB w gb - 0 9", + // {22, 574, 14044, 379648, 9968830, 281344367}}, // 155 + // {"nb1rqkbr/1pppp1pp/4n3/p4p2/6PP/5P2/PPPPPN2/NBR1QKBR w HCh - 0 9", {25, + // 621, 16789, 462600, 13378840, 396575613}}, // 156 + // {"nrnbqkbr/2pp2pp/4pp2/pp6/8/1P3P2/P1PPPBPP/NRNBQ1KR w hb - 0 9", {25, + // 656, 16951, 466493, 12525939, 358763789}}, // 157 + // {"nrnqkbbr/ppppp1p1/7p/5p2/8/P4PP1/NPPPP2P/NR1QKBBR w HBhb - 0 9", {28, + // 723, 20621, 547522, 15952533, 439046803}}, // 158 + // {"1rnqkr1b/ppppp2p/1n3pp1/8/2P3P1/Pb1N4/1P1PPP1P/NR1QKRBB w FBfb - 0 9", + // {26, 713, 19671, 548875, 15865528, 454532806}}, // 159 + // {"bbnrnkqr/1pppp1pp/5p2/p7/7P/1P6/PBPPPPPR/1BNRNKQ1 w D - 2 9", {26, 649, + // 17834, 502279, 14375839, 435585252}}, // 160 + // {"bnrbk1qr/1ppp1ppp/p2np3/8/P7/2N2P2/1PPPP1PP/B1RBNKQR w HC - 0 9", {26, + // 621, 17569, 451452, 13514201, 364421088}}, // 161 + // {"br1nkbqr/ppppppp1/8/n6p/8/N1P2PP1/PP1PP2P/B1RNKBQR w HCh - 1 9", {29, + // 664, 20182, 512316, 16125924, 442508159}}, // 162 + // {"bnr1kqrb/pp1pppp1/2n5/2p5/1P4Pp/4N3/P1PPPP1P/BNKR1QRB w gc - 0 9", {36, + // 888, 31630, 789863, 27792175, 719015345}}, // 163 + // {"1bbrnkqr/pp1p1ppp/2p1p3/1n6/5P2/3Q4/PPPPP1PP/NBBRNK1R w HDhd - 2 9", + // {36, 891, 31075, 781792, 26998966, 702903862}}, // 164 {"nrbbnk1r/pp2pppq/8/2pp3p/3P2P1/1N6/PPP1PP1P/1RBBNKQR w HBhb - 0 9", {29, 1036, 31344, 1139166, 35627310, 1310683359}}, // 165 - {"nr1nkbqr/ppp3pp/5p2/3pp3/6b1/3PP3/PPP2PPP/NRBNKBQR w hb - 0 9", - {18, 664, 13306, 483892, 10658989, 386307449}}, // 166 - {"nrbnk1rb/ppp1pq1p/3p4/5pp1/2P1P3/1N6/PP1PKPPP/1RBN1QRB w gb - 2 9", - {25, 966, 24026, 920345, 23957242, 913710194}}, // 167 - {"1brnbkqr/pppppp2/6p1/7p/1Pn5/P1NP4/2P1PPPP/NBR1BKQR w HChc - 0 9", - {22, 627, 13760, 395829, 9627826, 285900573}}, // 168 - {"nrnbbk1r/p1pppppq/8/7p/1p6/P5PP/1PPPPPQ1/NRNBBK1R w HBhb - 2 9", - {29, 888, 26742, 874270, 27229468, 930799376}}, // 169 - {"n1nkb1qr/prppppbp/6p1/1p6/2P2P2/P7/1P1PP1PP/NRNKBBQR w HBh - 1 9", - {29, 804, 24701, 688520, 21952444, 623156747}}, // 170 - {"nr2bqrb/ppkpp1pp/1np5/5p1P/5P2/2P5/PP1PP1P1/NRNKBQRB w GB - 0 9", - {22, 530, 13055, 347657, 9244693, 264088392}}, // 171 - {"nbr1kqbr/p3pppp/2ppn3/1p4P1/4P3/1P6/P1PP1P1P/NBRNKQBR w HChc - 1 9", - {23, 555, 14291, 350917, 9692630, 247479180}}, // 172 - {"nr1bkqbr/1p1pp1pp/pnp2p2/8/6P1/P1PP4/1P2PP1P/NRNBKQBR w HBhb - 0 9", - {22, 565, 13343, 365663, 9305533, 268612479}}, // 173 - {"nr1kqbbr/np2pppp/p1p5/1B1p1P2/8/4P3/PPPP2PP/NRNKQ1BR w HBhb - 0 9", - {32, 730, 23391, 556995, 18103280, 454569900}}, // 174 - {"nrnk1rbb/p1p2ppp/3pq3/Qp2p3/1P1P4/8/P1P1PPPP/NRN1KRBB w fb - 2 9", - {28, 873, 25683, 791823, 23868737, 747991356}}, // 175 - {"bbnrnkrq/pp1ppp1p/6p1/2p5/6P1/P5RP/1PPPPP2/BBNRNK1Q w Dgd - 3 9", - {37, 1260, 45060, 1542086, 54843403, 1898432768}}, // 176 - {"bnrb1rkq/ppnpppp1/3Q4/2p4p/7P/N7/PPPPPPP1/B1RBNKR1 w GC - 2 9", - {38, 878, 31944, 800440, 28784300, 784569826}}, // 177 - {"bnrnkbrq/p1ppppp1/1p5p/8/P2PP3/5P2/1PP3PP/BNRNKBRQ w GCgc - 1 9", - {26, 617, 16992, 419099, 11965544, 311309576}}, // 178 - {"bnrnkrqb/pp2p2p/2pp1pp1/8/P7/2PP1P2/1P2P1PP/BNRNKRQB w FCfc - 0 9", - {26, 721, 19726, 560824, 15966934, 467132503}}, // 179 - {"nbbrnkr1/1pppp1p1/p6q/P4p1p/8/5P2/1PPPP1PP/NBBRNRKQ w gd - 2 9", - {18, 556, 10484, 316634, 6629293, 202528241}}, // 180 - {"nrb1nkrq/2pp1ppp/p4b2/1p2p3/P4B2/3P4/1PP1PPPP/NR1BNRKQ w gb - 0 9", - {24, 562, 14017, 355433, 9227883, 247634489}}, // 181 - {"nrbnkbrq/p3p1pp/1p6/2pp1P2/8/3PP3/PPP2P1P/NRBNKBRQ w GBgb - 0 9", - {31, 746, 24819, 608523, 21019301, 542954168}}, // 182 - {"nrbnkrqb/pppp1p1p/4p1p1/8/7P/2P1P3/PPNP1PP1/1RBNKRQB w FBfb - 0 9", - {20, 459, 9998, 242762, 5760165, 146614723}}, // 183 - {"nbrn1krq/ppp1p2p/6b1/3p1pp1/8/4N1PP/PPPPPP2/NBR1BRKQ w gc - 1 9", - {27, 835, 23632, 766397, 22667987, 760795567}}, // 184 - {"nrnbbkrq/p1pp2pp/5p2/1p6/2P1pP1B/1P6/P2PP1PP/NRNB1KRQ w GBgb - 0 9", - {24, 646, 16102, 444472, 11489727, 324948755}}, // 185 - {"nrn1bbrq/1ppkppp1/p2p3p/8/1P3N2/4P3/P1PP1PPP/NR1KBBRQ w GB - 2 9", - {32, 591, 18722, 381683, 12069159, 269922838}}, // 186 - {"n1krbrqb/1ppppppp/p7/8/4n3/P4P1P/1PPPPQP1/NRNKBR1B w FB - 2 9", - {26, 639, 16988, 417190, 12167153, 312633873}}, // 187 - {"n1rnkrbq/1p1ppp1p/8/p1p1b1p1/3PQ1P1/4N3/PPP1PP1P/NBR1KRB1 w FCfc - 0 9", - {35, 1027, 35731, 1040417, 35738410, 1060661628}}, // 188 - {"nrnbkrbq/2pp1pp1/pp6/4p2p/P7/5PPP/1PPPP3/NRNBKRBQ w FBfb - 0 9", - {26, 628, 16731, 436075, 11920087, 331498921}}, // 189 - {"1rnkrbbq/pp1p2pp/1n3p2/1Bp1p3/1P6/1N2P3/P1PP1PPP/1RNKR1BQ w EBeb - 0 9", - {33, 992, 32244, 983481, 31703749, 980306735}}, // 190 - {"nr1krqbb/p1ppppp1/8/1p5p/1Pn5/5P2/P1PPP1PP/NRNKRQBB w EBeb - 0 9", - {24, 670, 15985, 445492, 11371067, 325556465}}, // 191 - {"bbq1rkr1/1ppppppp/p1n2n2/8/2P2P2/1P6/PQ1PP1PP/BB1NRKNR w HEe - 3 9", - {32, 794, 26846, 689334, 24085223, 645633370}}, // 192 - {"b1nbrknr/1qppp1pp/p4p2/1p6/6P1/P2NP3/1PPP1P1P/BQ1BRKNR w HEhe - 1 9", - {25, 663, 17138, 482994, 13157826, 389603029}}, // 193 - {"bqnrk1nr/pp2ppbp/6p1/2pp4/2P5/5P2/PPQPP1PP/B1NRKBNR w HDhd - 0 9", - {26, 850, 22876, 759768, 21341087, 719712622}}, // 194 - {"bqnrknrb/1ppp1p1p/p7/6p1/1P2p3/P1PN4/3PPPPP/BQ1RKNRB w GDgd - 0 9", - {25, 721, 19290, 581913, 16391601, 511725087}}, // 195 - {"q1b1rknr/pp1pppp1/4n2p/2p1b3/1PP5/4P3/PQ1P1PPP/1BBNRKNR w HEhe - 1 9", - {32, 975, 32566, 955493, 32649943, 962536105}}, // 196 - {"qnbbrknr/1p1ppppp/8/p1p5/5P2/PP1P4/2P1P1PP/QNBBRKNR w HEhe - 0 9", - {27, 573, 16331, 391656, 11562434, 301166330}}, // 197 - {"q1brkb1r/p1pppppp/np3B2/8/6n1/1P5N/P1PPPPPP/QN1RKB1R w HDhd - 0 9", - {32, 984, 31549, 1007217, 32597704, 1075429389}}, // 198 - {"qn1rk1rb/p1pppppp/1p2n3/8/2b5/4NPP1/PPPPP1RP/QNBRK2B w Dgd - 4 9", - {22, 802, 19156, 697722, 17761431, 650603534}}, // 199 - {"qbnrbknr/ppp2p1p/8/3pp1p1/1PP1B3/5N2/P2PPPPP/Q1NRBK1R w HDhd - 0 9", - {34, 943, 32506, 930619, 32523099, 955802240}}, // 200 - {"qnrbb1nr/pp1p1ppp/2p2k2/4p3/4P3/5PPP/PPPP4/QNRBBKNR w HC - 0 9", - {20, 460, 10287, 241640, 5846781, 140714047}}, // 201 - {"qnr1bbnr/ppk1p1pp/3p4/2p2p2/8/2P5/PP1PPPPP/QNKRBBNR w - - 1 9", - {19, 572, 11834, 357340, 7994547, 243724815}}, // 202 - {"qnrkbnrb/1p1p1ppp/2p5/4p3/p7/N1BP4/PPP1PPPP/Q1R1KNRB w gc - 0 9", - {27, 579, 16233, 375168, 10845146, 268229097}}, // 203 - {"qbnrkn1r/1pppp1p1/p3bp2/2BN3p/8/5P2/PPPPP1PP/QBNRK2R w HDhd - 0 9", - {40, 1027, 38728, 1059229, 38511307, 1104094381}}, // 204 - {"qnrbknbr/1pp2ppp/4p3/p6N/2p5/8/PPPPPPPP/Q1RBK1BR w HChc - 0 9", - {22, 510, 11844, 300180, 7403327, 200581103}}, // 205 - {"1qkrnbbr/p1pppppp/2n5/1p6/8/5NP1/PPPPPP1P/QNRK1BBR w HC - 4 9", - {24, 549, 13987, 352037, 9396521, 255676649}}, // 206 - {"q1rknr1b/1ppppppb/2n5/p2B3p/8/1PN3P1/P1PPPP1P/Q1RKNRB1 w FCfc - 3 9", - {31, 924, 28520, 861944, 27463479, 847726572}}, // 207 - {"bbnqrk1r/pp1pppp1/2p4p/8/6n1/1N1P1P2/PPP1P1PP/BBQ1RKNR w HEhe - 4 9", - {24, 804, 20147, 666341, 18024195, 595947631}}, // 208 - {"bn1brknr/ppp1p1pp/5p2/3p4/6qQ/3P3P/PPP1PPP1/BN1BRKNR w HEhe - 4 9", - {25, 854, 22991, 704173, 20290974, 600195008}}, // 209 - {"1nqrkbnr/2pp1ppp/pp2p3/3b4/2P5/N7/PP1PPPPP/B1QRKBNR w HDhd - 0 9", - {22, 651, 16173, 479152, 13133439, 390886040}}, // 210 - {"bnqrk1rb/1pp1pppp/p2p4/4n3/2PPP3/8/PP3PPP/BNQRKNRB w GDgd - 1 9", - {30, 950, 28169, 889687, 27610213, 880739164}}, // 211 - {"nbb1rknr/1ppq1ppp/3p4/p3p3/4P3/1N2R3/PPPP1PPP/1BBQ1KNR w Hhe - 2 9", - {33, 988, 31293, 967575, 30894863, 985384035}}, // 212 - {"nqbbrknr/2ppp2p/pp4p1/5p2/7P/3P1P2/PPPBP1P1/NQ1BRKNR w HEhe - 0 9", - {27, 492, 13266, 276569, 7583292, 175376176}}, // 213 - {"1qbrkb1r/pppppppp/8/3n4/4P1n1/PN6/1PPP1P1P/1QBRKBNR w HDhd - 3 9", - {28, 800, 21982, 630374, 17313279, 507140861}}, // 214 - {"1qbrknrb/1p1ppppp/1np5/8/p4P1P/4P1N1/PPPP2P1/NQBRK1RB w GDgd - 0 9", - {21, 482, 10581, 267935, 6218644, 168704845}}, // 215 - {"nbqrbkr1/ppp1pppp/8/3p4/6n1/2P2PPN/PP1PP2P/NBQRBK1R w HDd - 1 9", - {29, 921, 25748, 840262, 24138518, 806554650}}, // 216 - {"nqrb1knr/1ppbpp1p/p7/3p2p1/2P3P1/5P1P/PP1PP3/NQRBBKNR w HChc - 1 9", - {31, 803, 25857, 665799, 21998733, 583349773}}, // 217 - {"1qrkbbr1/pppp1ppp/1n3n2/4p3/5P2/1N6/PPPPP1PP/1QRKBBNR w HCc - 0 9", - {25, 715, 19118, 556325, 15514933, 459533767}}, // 218 - {"nqrkb1rb/pp2pppp/2p1n3/3p4/3PP1N1/8/PPP2PPP/NQRKB1RB w GCgc - 0 9", - {26, 795, 21752, 679387, 19185851, 616508881}}, // 219 - {"nb1rknbr/pp2ppp1/8/2Bp3p/6P1/2P2P1q/PP1PP2P/NBQRKN1R w HDhd - 0 9", - {35, 1391, 43025, 1726888, 53033675, 2139267832}}, // 220 - {"nqrbkn1r/pp1pp1pp/8/2p2p2/5P2/P3B2P/1PbPP1P1/NQRBKN1R w HChc - 0 9", - {23, 758, 19439, 653854, 18296195, 628403401}}, // 221 - {"nqrknbbr/pp1pppp1/7p/2p5/7P/1P1N4/P1PPPPPB/NQRK1B1R w HChc - 2 9", - {29, 824, 23137, 683686, 19429491, 595493802}}, // 222 - {"1qrknrbb/B1p1pppp/8/1p1p4/2n2P2/1P6/P1PPP1PP/NQRKNR1B w FCfc - 0 9", - {28, 771, 20237, 581721, 16065378, 483037840}}, // 223 - {"bbnrqk1r/1ppppppp/8/7n/1p6/P6P/1BPPPPP1/1BNRQKNR w HDhd - 0 9", - {25, 601, 15471, 396661, 10697065, 289472497}}, // 224 - {"bnrbqknr/ppp3p1/3ppp1Q/7p/3P4/1P6/P1P1PPPP/BNRB1KNR w HChc - 0 9", - {32, 845, 26876, 742888, 23717883, 682154649}}, // 225 - {"bn1qkb1r/pprppppp/8/2p5/2PPP1n1/8/PPR2PPP/BN1QKBNR w Hh - 1 9", - {32, 856, 27829, 768595, 25245957, 727424329}}, // 226 - {"1nrqknrb/p1pp1ppp/1p2p3/3N4/5P1P/5b2/PPPPP3/B1RQKNRB w GCgc - 2 9", - {33, 873, 27685, 779473, 25128076, 745401024}}, // 227 - {"nbbrqrk1/pppppppp/8/2N1n3/P7/6P1/1PPPPP1P/1BBRQKNR w HD - 3 9", - {25, 555, 14339, 342296, 9153089, 234841945}}, // 228 - {"1rbbqknr/1ppp1pp1/1n2p3/p6p/4P1P1/P6N/1PPP1P1P/NRBBQK1R w HBhb - 0 9", - {25, 693, 18652, 528070, 15133381, 439344945}}, // 229 - {"nrq1kbnr/p1pbpppp/3p4/1p6/6P1/1N3N2/PPPPPP1P/1RBQKB1R w HBhb - 4 9", - {24, 648, 16640, 471192, 12871967, 380436777}}, // 230 - {"nr1qknr1/p1pppp1p/b5p1/1p6/8/P4PP1/1bPPP1RP/NRBQKN1B w Bgb - 0 9", - {18, 533, 11215, 331243, 7777833, 234905172}}, // 231 - {"nbrqbknr/1ppp2pp/8/4pp2/p2PP1P1/7N/PPP2P1P/NBRQBK1R w HChc - 0 9", - {29, 803, 24416, 706648, 22305910, 672322762}}, // 232 - {"nr1b1k1r/ppp1pppp/2bp1n2/6P1/2P3q1/5P2/PP1PP2P/NRQBBKNR w HBhb - 1 9", - {27, 1199, 30908, 1296241, 35121759, 1418677099}}, // 233 - {"nrqkbbnr/2pppp1p/p7/1p6/2P1Pp2/8/PPNP2PP/1RQKBBNR w HBhb - 0 9", - {28, 613, 17874, 432750, 13097064, 345294379}}, // 234 - {"1rqkbnrb/pp1ppp1p/1n4p1/B1p5/3PP3/4N3/PPP2PPP/NRQK2RB w GBgb - 0 9", - {33, 723, 23991, 590970, 19715083, 535650233}}, // 235 - {"nbrqkn1r/1pppp2p/5pp1/p2b4/5P2/P2PN3/1PP1P1PP/NBRQK1BR w HChc - 2 9", - {23, 607, 15482, 400970, 11026383, 290708878}}, // 236 - {"nrqbknbr/pp1pppp1/8/2p4p/P3PP2/8/1PPP2PP/NRQBKNBR w HBhb - 1 9", - {26, 700, 19371, 556026, 16058815, 485460242}}, // 237 - {"nrqknbbr/p2pppp1/1pp5/6Qp/3P4/1P3P2/P1P1P1PP/NR1KNBBR w HBhb - 0 9", - {40, 905, 32932, 829746, 29263502, 791963709}}, // 238 - {"nrqknrbb/1p3ppp/p2p4/2p1p3/1P6/3PP1P1/P1P2P1P/NRQKNRBB w FBfb - 0 9", - {29, 780, 22643, 654495, 19532077, 593181101}}, // 239 - {"1bnrkqnr/p1pppp2/7p/1p4p1/4b3/7N/PPPP1PPP/BBNRKQ1R w HDhd - 0 9", - {25, 725, 19808, 565006, 16661676, 487354613}}, // 240 - {"bnrbkq1r/pp2p1pp/5n2/2pp1p2/P7/N1PP4/1P2PPPP/B1RBKQNR w HChc - 1 9", - {24, 745, 18494, 584015, 15079602, 488924040}}, // 241 + // {"nr1nkbqr/ppp3pp/5p2/3pp3/6b1/3PP3/PPP2PPP/NRBNKBQR w hb - 0 9", {18, + // 664, 13306, 483892, 10658989, 386307449}}, // 166 + // {"nrbnk1rb/ppp1pq1p/3p4/5pp1/2P1P3/1N6/PP1PKPPP/1RBN1QRB w gb - 2 9", + // {25, 966, 24026, 920345, 23957242, 913710194}}, // 167 + // {"1brnbkqr/pppppp2/6p1/7p/1Pn5/P1NP4/2P1PPPP/NBR1BKQR w HChc - 0 9", {22, + // 627, 13760, 395829, 9627826, 285900573}}, // 168 + // {"nrnbbk1r/p1pppppq/8/7p/1p6/P5PP/1PPPPPQ1/NRNBBK1R w HBhb - 2 9", {29, + // 888, 26742, 874270, 27229468, 930799376}}, // 169 + // {"n1nkb1qr/prppppbp/6p1/1p6/2P2P2/P7/1P1PP1PP/NRNKBBQR w HBh - 1 9", {29, + // 804, 24701, 688520, 21952444, 623156747}}, // 170 + // {"nr2bqrb/ppkpp1pp/1np5/5p1P/5P2/2P5/PP1PP1P1/NRNKBQRB w GB - 0 9", {22, + // 530, 13055, 347657, 9244693, 264088392}}, // 171 + // {"nbr1kqbr/p3pppp/2ppn3/1p4P1/4P3/1P6/P1PP1P1P/NBRNKQBR w HChc - 1 9", + // {23, 555, 14291, 350917, 9692630, 247479180}}, // 172 + // {"nr1bkqbr/1p1pp1pp/pnp2p2/8/6P1/P1PP4/1P2PP1P/NRNBKQBR w HBhb - 0 9", + // {22, 565, 13343, 365663, 9305533, 268612479}}, // 173 + // {"nr1kqbbr/np2pppp/p1p5/1B1p1P2/8/4P3/PPPP2PP/NRNKQ1BR w HBhb - 0 9", + // {32, 730, 23391, 556995, 18103280, 454569900}}, // 174 + // {"nrnk1rbb/p1p2ppp/3pq3/Qp2p3/1P1P4/8/P1P1PPPP/NRN1KRBB w fb - 2 9", {28, + // 873, 25683, 791823, 23868737, 747991356}}, // 175 + // {"bbnrnkrq/pp1ppp1p/6p1/2p5/6P1/P5RP/1PPPPP2/BBNRNK1Q w Dgd - 3 9", {37, + // 1260, 45060, 1542086, 54843403, 1898432768}}, // 176 + // {"bnrb1rkq/ppnpppp1/3Q4/2p4p/7P/N7/PPPPPPP1/B1RBNKR1 w GC - 2 9", {38, + // 878, 31944, 800440, 28784300, 784569826}}, // 177 + // {"bnrnkbrq/p1ppppp1/1p5p/8/P2PP3/5P2/1PP3PP/BNRNKBRQ w GCgc - 1 9", {26, + // 617, 16992, 419099, 11965544, 311309576}}, // 178 + // {"bnrnkrqb/pp2p2p/2pp1pp1/8/P7/2PP1P2/1P2P1PP/BNRNKRQB w FCfc - 0 9", + // {26, 721, 19726, 560824, 15966934, 467132503}}, // 179 + // {"nbbrnkr1/1pppp1p1/p6q/P4p1p/8/5P2/1PPPP1PP/NBBRNRKQ w gd - 2 9", {18, + // 556, 10484, 316634, 6629293, 202528241}}, // 180 + // {"nrb1nkrq/2pp1ppp/p4b2/1p2p3/P4B2/3P4/1PP1PPPP/NR1BNRKQ w gb - 0 9", + // {24, 562, 14017, 355433, 9227883, 247634489}}, // 181 + // {"nrbnkbrq/p3p1pp/1p6/2pp1P2/8/3PP3/PPP2P1P/NRBNKBRQ w GBgb - 0 9", {31, + // 746, 24819, 608523, 21019301, 542954168}}, // 182 + // {"nrbnkrqb/pppp1p1p/4p1p1/8/7P/2P1P3/PPNP1PP1/1RBNKRQB w FBfb - 0 9", + // {20, 459, 9998, 242762, 5760165, 146614723}}, // 183 + // {"nbrn1krq/ppp1p2p/6b1/3p1pp1/8/4N1PP/PPPPPP2/NBR1BRKQ w gc - 1 9", {27, + // 835, 23632, 766397, 22667987, 760795567}}, // 184 + // {"nrnbbkrq/p1pp2pp/5p2/1p6/2P1pP1B/1P6/P2PP1PP/NRNB1KRQ w GBgb - 0 9", + // {24, 646, 16102, 444472, 11489727, 324948755}}, // 185 + // {"nrn1bbrq/1ppkppp1/p2p3p/8/1P3N2/4P3/P1PP1PPP/NR1KBBRQ w GB - 2 9", {32, + // 591, 18722, 381683, 12069159, 269922838}}, // 186 + // {"n1krbrqb/1ppppppp/p7/8/4n3/P4P1P/1PPPPQP1/NRNKBR1B w FB - 2 9", {26, + // 639, 16988, 417190, 12167153, 312633873}}, // 187 + // {"n1rnkrbq/1p1ppp1p/8/p1p1b1p1/3PQ1P1/4N3/PPP1PP1P/NBR1KRB1 w FCfc - 0 + // 9", {35, 1027, 35731, 1040417, 35738410, 1060661628}}, // 188 + // {"nrnbkrbq/2pp1pp1/pp6/4p2p/P7/5PPP/1PPPP3/NRNBKRBQ w FBfb - 0 9", {26, + // 628, 16731, 436075, 11920087, 331498921}}, // 189 + // {"1rnkrbbq/pp1p2pp/1n3p2/1Bp1p3/1P6/1N2P3/P1PP1PPP/1RNKR1BQ w EBeb - 0 + // 9", {33, 992, 32244, 983481, 31703749, 980306735}}, // 190 + // {"nr1krqbb/p1ppppp1/8/1p5p/1Pn5/5P2/P1PPP1PP/NRNKRQBB w EBeb - 0 9", {24, + // 670, 15985, 445492, 11371067, 325556465}}, // 191 + // {"bbq1rkr1/1ppppppp/p1n2n2/8/2P2P2/1P6/PQ1PP1PP/BB1NRKNR w HEe - 3 9", + // {32, 794, 26846, 689334, 24085223, 645633370}}, // 192 + // {"b1nbrknr/1qppp1pp/p4p2/1p6/6P1/P2NP3/1PPP1P1P/BQ1BRKNR w HEhe - 1 9", + // {25, 663, 17138, 482994, 13157826, 389603029}}, // 193 + // {"bqnrk1nr/pp2ppbp/6p1/2pp4/2P5/5P2/PPQPP1PP/B1NRKBNR w HDhd - 0 9", {26, + // 850, 22876, 759768, 21341087, 719712622}}, // 194 + // {"bqnrknrb/1ppp1p1p/p7/6p1/1P2p3/P1PN4/3PPPPP/BQ1RKNRB w GDgd - 0 9", + // {25, 721, 19290, 581913, 16391601, 511725087}}, // 195 + // {"q1b1rknr/pp1pppp1/4n2p/2p1b3/1PP5/4P3/PQ1P1PPP/1BBNRKNR w HEhe - 1 9", + // {32, 975, 32566, 955493, 32649943, 962536105}}, // 196 + // {"qnbbrknr/1p1ppppp/8/p1p5/5P2/PP1P4/2P1P1PP/QNBBRKNR w HEhe - 0 9", {27, + // 573, 16331, 391656, 11562434, 301166330}}, // 197 + // {"q1brkb1r/p1pppppp/np3B2/8/6n1/1P5N/P1PPPPPP/QN1RKB1R w HDhd - 0 9", + // {32, 984, 31549, 1007217, 32597704, 1075429389}}, // 198 + // {"qn1rk1rb/p1pppppp/1p2n3/8/2b5/4NPP1/PPPPP1RP/QNBRK2B w Dgd - 4 9", {22, + // 802, 19156, 697722, 17761431, 650603534}}, // 199 + // {"qbnrbknr/ppp2p1p/8/3pp1p1/1PP1B3/5N2/P2PPPPP/Q1NRBK1R w HDhd - 0 9", + // {34, 943, 32506, 930619, 32523099, 955802240}}, // 200 + // {"qnrbb1nr/pp1p1ppp/2p2k2/4p3/4P3/5PPP/PPPP4/QNRBBKNR w HC - 0 9", {20, + // 460, 10287, 241640, 5846781, 140714047}}, // 201 + // {"qnr1bbnr/ppk1p1pp/3p4/2p2p2/8/2P5/PP1PPPPP/QNKRBBNR w - - 1 9", {19, + // 572, 11834, 357340, 7994547, 243724815}}, // 202 + // {"qnrkbnrb/1p1p1ppp/2p5/4p3/p7/N1BP4/PPP1PPPP/Q1R1KNRB w gc - 0 9", {27, + // 579, 16233, 375168, 10845146, 268229097}}, // 203 + // {"qbnrkn1r/1pppp1p1/p3bp2/2BN3p/8/5P2/PPPPP1PP/QBNRK2R w HDhd - 0 9", + // {40, 1027, 38728, 1059229, 38511307, 1104094381}}, // 204 + // {"qnrbknbr/1pp2ppp/4p3/p6N/2p5/8/PPPPPPPP/Q1RBK1BR w HChc - 0 9", {22, + // 510, 11844, 300180, 7403327, 200581103}}, // 205 + // {"1qkrnbbr/p1pppppp/2n5/1p6/8/5NP1/PPPPPP1P/QNRK1BBR w HC - 4 9", {24, + // 549, 13987, 352037, 9396521, 255676649}}, // 206 + // {"q1rknr1b/1ppppppb/2n5/p2B3p/8/1PN3P1/P1PPPP1P/Q1RKNRB1 w FCfc - 3 9", + // {31, 924, 28520, 861944, 27463479, 847726572}}, // 207 + // {"bbnqrk1r/pp1pppp1/2p4p/8/6n1/1N1P1P2/PPP1P1PP/BBQ1RKNR w HEhe - 4 9", + // {24, 804, 20147, 666341, 18024195, 595947631}}, // 208 + // {"bn1brknr/ppp1p1pp/5p2/3p4/6qQ/3P3P/PPP1PPP1/BN1BRKNR w HEhe - 4 9", + // {25, 854, 22991, 704173, 20290974, 600195008}}, // 209 + // {"1nqrkbnr/2pp1ppp/pp2p3/3b4/2P5/N7/PP1PPPPP/B1QRKBNR w HDhd - 0 9", {22, + // 651, 16173, 479152, 13133439, 390886040}}, // 210 + // {"bnqrk1rb/1pp1pppp/p2p4/4n3/2PPP3/8/PP3PPP/BNQRKNRB w GDgd - 1 9", {30, + // 950, 28169, 889687, 27610213, 880739164}}, // 211 + // {"nbb1rknr/1ppq1ppp/3p4/p3p3/4P3/1N2R3/PPPP1PPP/1BBQ1KNR w Hhe - 2 9", + // {33, 988, 31293, 967575, 30894863, 985384035}}, // 212 + // {"nqbbrknr/2ppp2p/pp4p1/5p2/7P/3P1P2/PPPBP1P1/NQ1BRKNR w HEhe - 0 9", + // {27, 492, 13266, 276569, 7583292, 175376176}}, // 213 + // {"1qbrkb1r/pppppppp/8/3n4/4P1n1/PN6/1PPP1P1P/1QBRKBNR w HDhd - 3 9", {28, + // 800, 21982, 630374, 17313279, 507140861}}, // 214 + // {"1qbrknrb/1p1ppppp/1np5/8/p4P1P/4P1N1/PPPP2P1/NQBRK1RB w GDgd - 0 9", + // {21, 482, 10581, 267935, 6218644, 168704845}}, // 215 + // {"nbqrbkr1/ppp1pppp/8/3p4/6n1/2P2PPN/PP1PP2P/NBQRBK1R w HDd - 1 9", {29, + // 921, 25748, 840262, 24138518, 806554650}}, // 216 + // {"nqrb1knr/1ppbpp1p/p7/3p2p1/2P3P1/5P1P/PP1PP3/NQRBBKNR w HChc - 1 9", + // {31, 803, 25857, 665799, 21998733, 583349773}}, // 217 + // {"1qrkbbr1/pppp1ppp/1n3n2/4p3/5P2/1N6/PPPPP1PP/1QRKBBNR w HCc - 0 9", + // {25, 715, 19118, 556325, 15514933, 459533767}}, // 218 + // {"nqrkb1rb/pp2pppp/2p1n3/3p4/3PP1N1/8/PPP2PPP/NQRKB1RB w GCgc - 0 9", + // {26, 795, 21752, 679387, 19185851, 616508881}}, // 219 + // {"nb1rknbr/pp2ppp1/8/2Bp3p/6P1/2P2P1q/PP1PP2P/NBQRKN1R w HDhd - 0 9", + // {35, 1391, 43025, 1726888, 53033675, 2139267832}}, // 220 + // {"nqrbkn1r/pp1pp1pp/8/2p2p2/5P2/P3B2P/1PbPP1P1/NQRBKN1R w HChc - 0 9", + // {23, 758, 19439, 653854, 18296195, 628403401}}, // 221 + // {"nqrknbbr/pp1pppp1/7p/2p5/7P/1P1N4/P1PPPPPB/NQRK1B1R w HChc - 2 9", {29, + // 824, 23137, 683686, 19429491, 595493802}}, // 222 + // {"1qrknrbb/B1p1pppp/8/1p1p4/2n2P2/1P6/P1PPP1PP/NQRKNR1B w FCfc - 0 9", + // {28, 771, 20237, 581721, 16065378, 483037840}}, // 223 + // {"bbnrqk1r/1ppppppp/8/7n/1p6/P6P/1BPPPPP1/1BNRQKNR w HDhd - 0 9", {25, + // 601, 15471, 396661, 10697065, 289472497}}, // 224 + // {"bnrbqknr/ppp3p1/3ppp1Q/7p/3P4/1P6/P1P1PPPP/BNRB1KNR w HChc - 0 9", {32, + // 845, 26876, 742888, 23717883, 682154649}}, // 225 + // {"bn1qkb1r/pprppppp/8/2p5/2PPP1n1/8/PPR2PPP/BN1QKBNR w Hh - 1 9", {32, + // 856, 27829, 768595, 25245957, 727424329}}, // 226 + // {"1nrqknrb/p1pp1ppp/1p2p3/3N4/5P1P/5b2/PPPPP3/B1RQKNRB w GCgc - 2 9", + // {33, 873, 27685, 779473, 25128076, 745401024}}, // 227 + // {"nbbrqrk1/pppppppp/8/2N1n3/P7/6P1/1PPPPP1P/1BBRQKNR w HD - 3 9", {25, + // 555, 14339, 342296, 9153089, 234841945}}, // 228 + // {"1rbbqknr/1ppp1pp1/1n2p3/p6p/4P1P1/P6N/1PPP1P1P/NRBBQK1R w HBhb - 0 9", + // {25, 693, 18652, 528070, 15133381, 439344945}}, // 229 + // {"nrq1kbnr/p1pbpppp/3p4/1p6/6P1/1N3N2/PPPPPP1P/1RBQKB1R w HBhb - 4 9", + // {24, 648, 16640, 471192, 12871967, 380436777}}, // 230 + // {"nr1qknr1/p1pppp1p/b5p1/1p6/8/P4PP1/1bPPP1RP/NRBQKN1B w Bgb - 0 9", {18, + // 533, 11215, 331243, 7777833, 234905172}}, // 231 + // {"nbrqbknr/1ppp2pp/8/4pp2/p2PP1P1/7N/PPP2P1P/NBRQBK1R w HChc - 0 9", {29, + // 803, 24416, 706648, 22305910, 672322762}}, // 232 + // {"nr1b1k1r/ppp1pppp/2bp1n2/6P1/2P3q1/5P2/PP1PP2P/NRQBBKNR w HBhb - 1 9", + // {27, 1199, 30908, 1296241, 35121759, 1418677099}}, // 233 + // {"nrqkbbnr/2pppp1p/p7/1p6/2P1Pp2/8/PPNP2PP/1RQKBBNR w HBhb - 0 9", {28, + // 613, 17874, 432750, 13097064, 345294379}}, // 234 + // {"1rqkbnrb/pp1ppp1p/1n4p1/B1p5/3PP3/4N3/PPP2PPP/NRQK2RB w GBgb - 0 9", + // {33, 723, 23991, 590970, 19715083, 535650233}}, // 235 + // {"nbrqkn1r/1pppp2p/5pp1/p2b4/5P2/P2PN3/1PP1P1PP/NBRQK1BR w HChc - 2 9", + // {23, 607, 15482, 400970, 11026383, 290708878}}, // 236 + // {"nrqbknbr/pp1pppp1/8/2p4p/P3PP2/8/1PPP2PP/NRQBKNBR w HBhb - 1 9", {26, + // 700, 19371, 556026, 16058815, 485460242}}, // 237 + // {"nrqknbbr/p2pppp1/1pp5/6Qp/3P4/1P3P2/P1P1P1PP/NR1KNBBR w HBhb - 0 9", + // {40, 905, 32932, 829746, 29263502, 791963709}}, // 238 + // {"nrqknrbb/1p3ppp/p2p4/2p1p3/1P6/3PP1P1/P1P2P1P/NRQKNRBB w FBfb - 0 9", + // {29, 780, 22643, 654495, 19532077, 593181101}}, // 239 + // {"1bnrkqnr/p1pppp2/7p/1p4p1/4b3/7N/PPPP1PPP/BBNRKQ1R w HDhd - 0 9", {25, + // 725, 19808, 565006, 16661676, 487354613}}, // 240 + // {"bnrbkq1r/pp2p1pp/5n2/2pp1p2/P7/N1PP4/1P2PPPP/B1RBKQNR w HChc - 1 9", + // {24, 745, 18494, 584015, 15079602, 488924040}}, // 241 {"2rkqbnr/p1pppppp/2b5/1pn5/1P3P1Q/2B5/P1PPP1PP/1NRK1BNR w HChc - 3 9", {33, 904, 30111, 840025, 28194726, 801757709}}, // 242 - {"bnrkqnrb/2pppp2/8/pp4pp/1P5P/6P1/P1PPPPB1/BNRKQNR1 w GCgc - 0 9", - {34, 1059, 34090, 1054311, 33195397, 1036498304}}, // 243 - {"1bbrkq1r/pppp2pp/1n2pp1n/8/2PP4/1N4P1/PP2PP1P/1BBRKQNR w HDhd - 1 9", - {33, 891, 28907, 814247, 26970098, 788040469}}, // 244 - {"nrbbkqnr/1p2pp1p/p1p3p1/3p4/8/1PP5/P2PPPPP/NRBBKQNR w HBhb - 0 9", - {21, 567, 13212, 376487, 9539687, 284426039}}, // 245 - {"1rbkqbr1/ppp1pppp/1n5n/3p4/3P4/1PP3P1/P3PP1P/NRBKQBNR w HBb - 1 9", - {27, 752, 20686, 606783, 16986290, 521817800}}, // 246 - {"nrbkq1rb/1ppp1pp1/4p1n1/p6p/2PP4/5P2/PPK1P1PP/NRB1QNRB w gb - 0 9", - {35, 697, 23678, 505836, 16906409, 390324794}}, // 247 - {"nbrkbqnr/p2pp1p1/5p2/1pp4p/7P/3P2P1/PPP1PP2/NBKRBQNR w hc - 0 9", - {25, 679, 17223, 484921, 12879258, 376652259}}, // 248 - {"nrkb1qnr/ppppp1p1/6bp/5p2/1PP1P1P1/8/P2P1P1P/NRKBBQNR w HBhb - 1 9", - {32, 761, 24586, 632916, 20671433, 568524724}}, // 249 - {"nrk1bbnr/p1q1pppp/1ppp4/8/3P3P/4K3/PPP1PPP1/NR1QBBNR w hb - 0 9", - {30, 719, 21683, 541389, 16278120, 423649784}}, // 250 - {"nrkqbr1b/1pppp1pp/5pn1/p6N/1P3P2/8/P1PPP1PP/NRKQB1RB w GBb - 0 9", - {26, 494, 13815, 296170, 8763742, 206993496}}, // 251 - {"nbrkq2r/pppp1bpp/4p1n1/5p2/7P/2P3N1/PP1PPPP1/NBKRQ1BR w hc - 0 9", - {27, 701, 19536, 535052, 15394667, 443506342}}, // 252 - {"nrkbqnbr/2ppp2p/pp6/5pp1/P1P5/8/1P1PPPPP/NRKBQNBR w HBhb - 0 9", - {21, 487, 11341, 285387, 7218486, 193586674}}, // 253 - {"nr1qnbbr/pk1pppp1/1pp4p/8/3P4/5P1P/PPP1P1P1/NRKQNBBR w HB - 0 9", - {22, 546, 13615, 352855, 9587439, 259830255}}, // 254 - {"nrkq1rbb/pp1ppp1p/2pn4/8/PP3Pp1/7P/2PPP1P1/NRKQNRBB w FBfb - 0 9", - {26, 839, 22075, 723845, 19867117, 658535326}}, // 255 - {"b2rknqr/pp1ppppp/8/2P5/n7/P7/1PPNPPPb/BBNRK1QR w HDhd - 2 9", - {24, 699, 19523, 575172, 17734818, 535094237}}, // 256 - {"bnrbknqr/pp2p2p/2p3p1/3p1p2/8/3P4/PPPNPPPP/B1RBKNQR w HChc - 0 9", - {23, 580, 14320, 385917, 10133092, 288041554}}, // 257 - {"bnrknb1r/pppp2pp/8/4pp2/6P1/3P3P/qPP1PPQ1/BNRKNB1R w HChc - 0 9", - {28, 1100, 31813, 1217514, 36142423, 1361341249}}, // 258 - {"b1rknqrb/ppp1p1p1/2np1p1p/8/4N3/6PQ/PPPPPP1P/B1RKN1RB w GCgc - 0 9", - {36, 629, 23082, 453064, 16897544, 367503974}}, // 259 - {"nb1rknqr/pbppp2p/6p1/1p3p2/5P2/3KP3/PPPP2PP/NBBR1NQR w hd - 2 9", - {18, 557, 9779, 300744, 5822387, 180936551}}, // 260 - {"nr1bknqr/1ppb1ppp/p7/3pp3/B7/2P3NP/PP1PPPP1/NRB1K1QR w HBhb - 2 9", - {28, 688, 19541, 519785, 15153092, 425149249}}, // 261 - {"nrbkn2r/pppp1pqp/4p1p1/8/3P2P1/P3B3/P1P1PP1P/NR1KNBQR w HBhb - 1 9", - {32, 808, 25578, 676525, 22094260, 609377239}}, // 262 - {"nrbknqrb/2p1ppp1/1p6/p2p2Bp/1P6/3P1P2/P1P1P1PP/NR1KNQRB w GBgb - 0 9", - {30, 625, 18288, 418895, 12225742, 301834282}}, // 263 - {"nbr1knqr/1pp1p1pp/3p1pb1/8/7P/5P2/PPPPPQP1/NBRKBN1R w HC - 2 9", - {29, 863, 25767, 800239, 24965592, 799182442}}, // 264 - {"n1kbbnqr/prp2ppp/1p1p4/4p3/1P2P3/3P1B2/P1P2PPP/NRK1BNQR w HBh - 2 9", - {26, 653, 17020, 449719, 12187583, 336872952}}, // 265 - {"nrknbbqr/pp3p1p/B3p1p1/2pp4/4P3/2N3P1/PPPP1P1P/NRK1B1QR w HBhb - 0 9", - {29, 683, 19755, 501807, 14684565, 394951291}}, // 266 - {"n1knbqrb/pr1p1ppp/Qp6/2p1p3/4P3/6P1/PPPP1P1P/NRKNB1RB w GBg - 2 9", - {31, 552, 17197, 371343, 11663330, 283583340}}, // 267 - {"nbrknqbr/p3p1pp/1p1p1p2/2p5/2Q1PP2/8/PPPP2PP/NBRKN1BR w HChc - 0 9", - {37, 913, 32470, 825748, 28899548, 759875563}}, // 268 - {"nrkb1qbr/pp1pppp1/5n2/7p/2p5/1N1NPP2/PPPP2PP/1RKB1QBR w HBhb - 0 9", - {25, 712, 18813, 543870, 15045589, 445074372}}, // 269 - {"nrk2bbr/pppqpppp/3p4/8/1P3nP1/3P4/P1P1PP1P/NRKNQBBR w HBhb - 1 9", - {24, 814, 19954, 670162, 17603960, 592121050}}, // 270 - {"nrknqrbb/1p2ppp1/2pp4/Q6p/P2P3P/8/1PP1PPP1/NRKN1RBB w FBfb - 0 9", - {34, 513, 16111, 303908, 9569590, 206509331}}, // 271 - {"bbnrk1rq/pp2p1pp/2ppn3/5p2/8/3NNP1P/PPPPP1P1/BB1RK1RQ w GDgd - 1 9", - {28, 697, 20141, 517917, 15301879, 410843713}}, // 272 - {"bnrbknrq/ppppp2p/6p1/5p2/4QPP1/8/PPPPP2P/BNRBKNR1 w GCgc - 0 9", - {37, 901, 32612, 877372, 31385912, 903831981}}, // 273 - {"bnkrnbrq/ppppp1p1/B6p/5p2/8/4P3/PPPP1PPP/BNKRN1RQ w - - 0 9", - {26, 417, 11124, 217095, 5980981, 133080499}}, // 274 - {"bnrk1rqb/2pppp1p/3n4/pp4p1/3Q1P2/2N3P1/PPPPP2P/B1RKNR1B w FCfc - 0 9", - {49, 1655, 74590, 2512003, 107234294, 3651608327}}, // 275 - {"nbbrk1rq/pp2pppp/2pp4/8/2P2n2/6N1/PP1PP1PP/NBBRKR1Q w Dgd - 0 9", - {28, 960, 26841, 884237, 26083252, 846682836}}, // 276 - {"nrbb2rq/pppk1ppp/4p1n1/3p4/6P1/1BP5/PP1PPPQP/NRB1KNR1 w GB - 0 9", - {28, 735, 22048, 593839, 18588316, 512048946}}, // 277 - {"nrbk1brq/p1ppppp1/7p/1p6/4P1nP/P7/1PPP1PP1/NRBKNBRQ w GBgb - 0 9", - {22, 572, 12739, 351494, 8525056, 247615348}}, // 278 - {"nrbk1rqb/1pp2ppp/5n2/p2pp3/5B2/1N1P2P1/PPP1PP1P/1R1KNRQB w FBfb - 0 9", - {35, 927, 31559, 849932, 28465693, 783048748}}, // 279 - {"nbrkb1rq/p1pp1ppp/4n3/4p3/Pp6/6N1/1PPPPPPP/NBRKBRQ1 w Cgc - 0 9", - {20, 456, 10271, 247733, 6124625, 154766108}}, // 280 - {"nrkb1nrq/p2pp1pp/1pp2p2/7b/6PP/5P2/PPPPP2N/NRKBB1RQ w GBgb - 0 9", - {21, 479, 11152, 264493, 6696458, 165253524}}, // 281 - {"nr1nbbr1/pppkpp1p/6p1/3p4/P6P/1P6/1RPPPPP1/N1KNBBRQ w G - 1 9", - {20, 498, 11304, 288813, 7197322, 188021682}}, // 282 - {"nrknbrqb/3p1ppp/ppN1p3/8/6P1/8/PPPPPP1P/1RKNBRQB w FBfb - 0 9", - {32, 526, 17267, 319836, 10755190, 220058991}}, // 283 - {"nbrkn1bq/p1pppr1p/1p6/5pp1/8/1N2PP2/PPPP2PP/1BKRNRBQ w c - 1 9", - {19, 491, 10090, 277313, 6230616, 180748649}}, // 284 - {"nrkbnrbq/ppppppp1/8/8/7p/PP3P2/2PPPRPP/NRKBN1BQ w Bfb - 0 9", - {16, 353, 6189, 156002, 3008668, 82706705}}, // 285 - {"nrknrbbq/p4ppp/2p1p3/1p1p4/1P2P3/2P5/P1NP1PPP/1RKNRBBQ w EBeb - 0 9", - {29, 728, 21915, 587668, 18231199, 511686397}}, // 286 - {"nrknr1bb/pppp1p2/7p/2qPp1p1/8/1P5P/P1P1PPP1/NRKNRQBB w EBeb - 0 9", - {20, 714, 14336, 500458, 11132758, 386064577}}, // 287 - {"bbqnrrkn/ppp2p1p/3pp1p1/8/1PP5/2Q5/P1BPPPPP/B2NRKRN w GE - 0 9", - {39, 593, 23446, 424799, 16764576, 346185058}}, // 288 - {"bqn1rkrn/p1p2ppp/1p1p4/4p3/3PP2b/8/PPP2PPP/BQNBRKRN w GEge - 2 9", - {25, 773, 20042, 616817, 16632403, 515838333}}, // 289 - {"bqnrkb1n/p1p1pprp/3p4/1p2P1p1/2PP4/8/PP3PPP/BQNRKBRN w GDd - 1 9", - {31, 860, 28102, 810379, 27233018, 813751250}}, // 290 - {"bqr1krnb/ppppppp1/7p/3n4/1P4P1/P4N2/2PPPP1P/BQNRKR1B w FDf - 3 9", - {31, 709, 22936, 559830, 18608857, 480498340}}, // 291 - {"qbbn1krn/pp3ppp/4r3/2ppp3/P1P4P/8/1P1PPPP1/QBBNRKRN w GEg - 1 9", - {26, 775, 21100, 649673, 18476807, 582542257}}, // 292 - {"qnbbrkrn/1p1pp2p/p7/2p2pp1/8/4P2P/PPPP1PPK/QNBBRR1N w ge - 0 9", - {25, 599, 15139, 389104, 10260500, 279222412}}, // 293 - {"qnbrkbrn/1ppp2p1/p3p2p/5p2/P4P2/1P6/2PPP1PP/QNBRKBRN w GDgd - 0 9", - {27, 588, 16735, 394829, 11640416, 293541380}}, // 294 - {"1nbrkrnb/p1pppp1p/1pq3p1/8/4P3/P1P4N/1P1P1PPP/QNBRKR1B w FDfd - 1 9", - {18, 609, 11789, 406831, 8604788, 299491047}}, // 295 - {"qb1r1krn/pppp2pp/1n2ppb1/4P3/7P/8/PPPP1PP1/QBNRBKRN w GDgd - 0 9", - {20, 578, 12205, 349453, 7939483, 229142178}}, // 296 - {"qnr1bkrn/p3pppp/1bpp4/1p6/2P2PP1/8/PP1PPN1P/QNRBBKR1 w GCgc - 0 9", - {30, 865, 26617, 771705, 24475596, 719842237}}, // 297 - {"1nkrbbrn/qppppppp/8/8/p2P4/1P5P/P1P1PPP1/QNKRBBRN w - - 0 9", - {27, 672, 18371, 505278, 14065717, 410130412}}, // 298 - {"1qrkbrnb/ppp1p1pp/n2p4/5p2/4N3/8/PPPPPPPP/Q1RKBRNB w Ffc - 2 9", - {25, 718, 18573, 536771, 14404324, 424279467}}, // 299 - {"q1nrkrbn/pp1pppp1/2p4p/8/P7/5Pb1/BPPPPNPP/Q1NRKRB1 w FDfd - 0 9", - {22, 558, 12911, 336042, 8516966, 228074630}}, // 300 - {"qnrbkrbn/1p1p1pp1/p1p5/4p2p/8/3P1P2/PPP1P1PP/QNRBKRBN w FCfc - 0 9", - {28, 669, 17713, 440930, 12055174, 313276304}}, // 301 - {"qnrkr1bn/p1pp1ppp/8/1p2p3/3P1P2/bP4P1/P1P1P2P/QNRKRBBN w ECec - 1 9", - {23, 845, 20973, 759778, 19939053, 718075943}}, // 302 - {"q1krrnbb/p1p1pppp/2np4/1pB5/5P2/8/PPPPP1PP/QNRKRN1B w EC - 0 9", - {29, 776, 21966, 631941, 18110831, 549019739}}, // 303 - {"bbn1rkrn/pp1p1ppp/8/2p1p1q1/6P1/P7/BPPPPP1P/B1NQRKRN w GEge - 0 9", - {26, 936, 25177, 906801, 24984621, 901444251}}, // 304 - {"bn1brkrn/pp1qpp1p/2p3p1/3p4/1PPP4/P7/4PPPP/BNQBRKRN w GEge - 1 9", - {29, 755, 22858, 645963, 20128587, 600207069}}, // 305 - {"b2rkbrn/p1pppppp/qp6/8/1n6/2B2P2/P1PPP1PP/1NQRKBRN w GDgd - 0 9", - {24, 878, 21440, 791007, 20840078, 775795187}}, // 306 - {"b2rkrnb/pqp1pppp/n7/1p1p4/P7/N1P2N2/1P1PPPPP/B1QRKR1B w FDfd - 4 9", - {26, 724, 19558, 571891, 16109522, 492933398}}, // 307 - {"1bbqrkrn/ppppp1p1/8/5p1p/P1n3P1/3P4/1PP1PP1P/NBBQRRKN w ge - 1 9", - {25, 678, 17351, 461211, 12173245, 329661421}}, // 308 - {"nqb1rrkn/ppp1bppp/3pp3/8/3P4/1P6/PQP1PPPP/N1BBRRKN w - - 1 9", - {23, 503, 12465, 290341, 7626054, 188215608}}, // 309 - {"nqbrkbr1/p1pppppp/1p6/2N2n2/2P5/5P2/PP1PP1PP/1QBRKBRN w GDgd - 1 9", - {29, 688, 20289, 506302, 15167248, 399015237}}, // 310 - {"nqbrkrn1/1ppppp2/6pp/p7/1P6/2Q5/P1PPPPPP/N1BRKRNB w FDfd - 0 9", - {36, 602, 20985, 397340, 13706856, 291708797}}, // 311 - {"nbqrbrkn/pp1p1pp1/2p5/4p2p/2P3P1/1P3P2/P2PP2P/NBQRBKRN w GD - 0 9", - {34, 655, 22581, 474396, 16613630, 379344541}}, // 312 - {"nqrbbrkn/1p1pppp1/8/p1p4p/4P2P/1N4P1/PPPP1P2/1QRBBKRN w GC - 0 9", - {23, 597, 14468, 400357, 10096863, 294900903}}, // 313 - {"nqrkbbrn/2p1p1pp/pp1p1p2/8/P2N4/2P5/1P1PPPPP/1QRKBBRN w GCgc - 0 9", - {32, 744, 23310, 550728, 17597164, 428786656}}, // 314 - {"n1krbrnb/q1pppppp/p7/1p6/3Q4/2P2P2/PP1PP1PP/N1RKBRNB w FC - 1 9", - {43, 1038, 41327, 1074450, 40918952, 1126603824}}, // 315 - {"nb1rkrbn/p1pp1p1p/qp6/4p1p1/5PP1/P7/1PPPPB1P/NBQRKR1N w FDfd - 2 9", - {26, 645, 16463, 445464, 11911314, 342563372}}, // 316 - {"nqr1krbn/pppp1ppp/8/8/3pP3/5P2/PPPb1NPP/NQRBKRB1 w FCfc - 3 9", - {2, 51, 1047, 27743, 612305, 17040200}}, // 317 - {"n1rkrbbn/pqppppp1/7p/1p6/8/1NPP4/PP1KPPPP/1QR1RBBN w ec - 0 9", - {25, 674, 17553, 505337, 13421727, 403551903}}, // 318 + // {"bnrkqnrb/2pppp2/8/pp4pp/1P5P/6P1/P1PPPPB1/BNRKQNR1 w GCgc - 0 9", {34, + // 1059, 34090, 1054311, 33195397, 1036498304}}, // 243 + // {"1bbrkq1r/pppp2pp/1n2pp1n/8/2PP4/1N4P1/PP2PP1P/1BBRKQNR w HDhd - 1 9", + // {33, 891, 28907, 814247, 26970098, 788040469}}, // 244 + // {"nrbbkqnr/1p2pp1p/p1p3p1/3p4/8/1PP5/P2PPPPP/NRBBKQNR w HBhb - 0 9", {21, + // 567, 13212, 376487, 9539687, 284426039}}, // 245 + // {"1rbkqbr1/ppp1pppp/1n5n/3p4/3P4/1PP3P1/P3PP1P/NRBKQBNR w HBb - 1 9", + // {27, 752, 20686, 606783, 16986290, 521817800}}, // 246 + // {"nrbkq1rb/1ppp1pp1/4p1n1/p6p/2PP4/5P2/PPK1P1PP/NRB1QNRB w gb - 0 9", + // {35, 697, 23678, 505836, 16906409, 390324794}}, // 247 + // {"nbrkbqnr/p2pp1p1/5p2/1pp4p/7P/3P2P1/PPP1PP2/NBKRBQNR w hc - 0 9", {25, + // 679, 17223, 484921, 12879258, 376652259}}, // 248 + // {"nrkb1qnr/ppppp1p1/6bp/5p2/1PP1P1P1/8/P2P1P1P/NRKBBQNR w HBhb - 1 9", + // {32, 761, 24586, 632916, 20671433, 568524724}}, // 249 + // {"nrk1bbnr/p1q1pppp/1ppp4/8/3P3P/4K3/PPP1PPP1/NR1QBBNR w hb - 0 9", {30, + // 719, 21683, 541389, 16278120, 423649784}}, // 250 + // {"nrkqbr1b/1pppp1pp/5pn1/p6N/1P3P2/8/P1PPP1PP/NRKQB1RB w GBb - 0 9", {26, + // 494, 13815, 296170, 8763742, 206993496}}, // 251 + // {"nbrkq2r/pppp1bpp/4p1n1/5p2/7P/2P3N1/PP1PPPP1/NBKRQ1BR w hc - 0 9", {27, + // 701, 19536, 535052, 15394667, 443506342}}, // 252 + // {"nrkbqnbr/2ppp2p/pp6/5pp1/P1P5/8/1P1PPPPP/NRKBQNBR w HBhb - 0 9", {21, + // 487, 11341, 285387, 7218486, 193586674}}, // 253 + // {"nr1qnbbr/pk1pppp1/1pp4p/8/3P4/5P1P/PPP1P1P1/NRKQNBBR w HB - 0 9", {22, + // 546, 13615, 352855, 9587439, 259830255}}, // 254 + // {"nrkq1rbb/pp1ppp1p/2pn4/8/PP3Pp1/7P/2PPP1P1/NRKQNRBB w FBfb - 0 9", {26, + // 839, 22075, 723845, 19867117, 658535326}}, // 255 + // {"b2rknqr/pp1ppppp/8/2P5/n7/P7/1PPNPPPb/BBNRK1QR w HDhd - 2 9", {24, 699, + // 19523, 575172, 17734818, 535094237}}, // 256 + // {"bnrbknqr/pp2p2p/2p3p1/3p1p2/8/3P4/PPPNPPPP/B1RBKNQR w HChc - 0 9", {23, + // 580, 14320, 385917, 10133092, 288041554}}, // 257 + // {"bnrknb1r/pppp2pp/8/4pp2/6P1/3P3P/qPP1PPQ1/BNRKNB1R w HChc - 0 9", {28, + // 1100, 31813, 1217514, 36142423, 1361341249}}, // 258 + // {"b1rknqrb/ppp1p1p1/2np1p1p/8/4N3/6PQ/PPPPPP1P/B1RKN1RB w GCgc - 0 9", + // {36, 629, 23082, 453064, 16897544, 367503974}}, // 259 + // {"nb1rknqr/pbppp2p/6p1/1p3p2/5P2/3KP3/PPPP2PP/NBBR1NQR w hd - 2 9", {18, + // 557, 9779, 300744, 5822387, 180936551}}, // 260 + // {"nr1bknqr/1ppb1ppp/p7/3pp3/B7/2P3NP/PP1PPPP1/NRB1K1QR w HBhb - 2 9", + // {28, 688, 19541, 519785, 15153092, 425149249}}, // 261 + // {"nrbkn2r/pppp1pqp/4p1p1/8/3P2P1/P3B3/P1P1PP1P/NR1KNBQR w HBhb - 1 9", + // {32, 808, 25578, 676525, 22094260, 609377239}}, // 262 + // {"nrbknqrb/2p1ppp1/1p6/p2p2Bp/1P6/3P1P2/P1P1P1PP/NR1KNQRB w GBgb - 0 9", + // {30, 625, 18288, 418895, 12225742, 301834282}}, // 263 + // {"nbr1knqr/1pp1p1pp/3p1pb1/8/7P/5P2/PPPPPQP1/NBRKBN1R w HC - 2 9", {29, + // 863, 25767, 800239, 24965592, 799182442}}, // 264 + // {"n1kbbnqr/prp2ppp/1p1p4/4p3/1P2P3/3P1B2/P1P2PPP/NRK1BNQR w HBh - 2 9", + // {26, 653, 17020, 449719, 12187583, 336872952}}, // 265 + // {"nrknbbqr/pp3p1p/B3p1p1/2pp4/4P3/2N3P1/PPPP1P1P/NRK1B1QR w HBhb - 0 9", + // {29, 683, 19755, 501807, 14684565, 394951291}}, // 266 + // {"n1knbqrb/pr1p1ppp/Qp6/2p1p3/4P3/6P1/PPPP1P1P/NRKNB1RB w GBg - 2 9", + // {31, 552, 17197, 371343, 11663330, 283583340}}, // 267 + // {"nbrknqbr/p3p1pp/1p1p1p2/2p5/2Q1PP2/8/PPPP2PP/NBRKN1BR w HChc - 0 9", + // {37, 913, 32470, 825748, 28899548, 759875563}}, // 268 + // {"nrkb1qbr/pp1pppp1/5n2/7p/2p5/1N1NPP2/PPPP2PP/1RKB1QBR w HBhb - 0 9", + // {25, 712, 18813, 543870, 15045589, 445074372}}, // 269 + // {"nrk2bbr/pppqpppp/3p4/8/1P3nP1/3P4/P1P1PP1P/NRKNQBBR w HBhb - 1 9", {24, + // 814, 19954, 670162, 17603960, 592121050}}, // 270 + // {"nrknqrbb/1p2ppp1/2pp4/Q6p/P2P3P/8/1PP1PPP1/NRKN1RBB w FBfb - 0 9", {34, + // 513, 16111, 303908, 9569590, 206509331}}, // 271 + // {"bbnrk1rq/pp2p1pp/2ppn3/5p2/8/3NNP1P/PPPPP1P1/BB1RK1RQ w GDgd - 1 9", + // {28, 697, 20141, 517917, 15301879, 410843713}}, // 272 + // {"bnrbknrq/ppppp2p/6p1/5p2/4QPP1/8/PPPPP2P/BNRBKNR1 w GCgc - 0 9", {37, + // 901, 32612, 877372, 31385912, 903831981}}, // 273 + // {"bnkrnbrq/ppppp1p1/B6p/5p2/8/4P3/PPPP1PPP/BNKRN1RQ w - - 0 9", {26, 417, + // 11124, 217095, 5980981, 133080499}}, // 274 + // {"bnrk1rqb/2pppp1p/3n4/pp4p1/3Q1P2/2N3P1/PPPPP2P/B1RKNR1B w FCfc - 0 9", + // {49, 1655, 74590, 2512003, 107234294, 3651608327}}, // 275 + // {"nbbrk1rq/pp2pppp/2pp4/8/2P2n2/6N1/PP1PP1PP/NBBRKR1Q w Dgd - 0 9", {28, + // 960, 26841, 884237, 26083252, 846682836}}, // 276 + // {"nrbb2rq/pppk1ppp/4p1n1/3p4/6P1/1BP5/PP1PPPQP/NRB1KNR1 w GB - 0 9", {28, + // 735, 22048, 593839, 18588316, 512048946}}, // 277 + // {"nrbk1brq/p1ppppp1/7p/1p6/4P1nP/P7/1PPP1PP1/NRBKNBRQ w GBgb - 0 9", {22, + // 572, 12739, 351494, 8525056, 247615348}}, // 278 + // {"nrbk1rqb/1pp2ppp/5n2/p2pp3/5B2/1N1P2P1/PPP1PP1P/1R1KNRQB w FBfb - 0 9", + // {35, 927, 31559, 849932, 28465693, 783048748}}, // 279 + // {"nbrkb1rq/p1pp1ppp/4n3/4p3/Pp6/6N1/1PPPPPPP/NBRKBRQ1 w Cgc - 0 9", {20, + // 456, 10271, 247733, 6124625, 154766108}}, // 280 + // {"nrkb1nrq/p2pp1pp/1pp2p2/7b/6PP/5P2/PPPPP2N/NRKBB1RQ w GBgb - 0 9", {21, + // 479, 11152, 264493, 6696458, 165253524}}, // 281 + // {"nr1nbbr1/pppkpp1p/6p1/3p4/P6P/1P6/1RPPPPP1/N1KNBBRQ w G - 1 9", {20, + // 498, 11304, 288813, 7197322, 188021682}}, // 282 + // {"nrknbrqb/3p1ppp/ppN1p3/8/6P1/8/PPPPPP1P/1RKNBRQB w FBfb - 0 9", {32, + // 526, 17267, 319836, 10755190, 220058991}}, // 283 + // {"nbrkn1bq/p1pppr1p/1p6/5pp1/8/1N2PP2/PPPP2PP/1BKRNRBQ w c - 1 9", {19, + // 491, 10090, 277313, 6230616, 180748649}}, // 284 + // {"nrkbnrbq/ppppppp1/8/8/7p/PP3P2/2PPPRPP/NRKBN1BQ w Bfb - 0 9", {16, 353, + // 6189, 156002, 3008668, 82706705}}, // 285 + // {"nrknrbbq/p4ppp/2p1p3/1p1p4/1P2P3/2P5/P1NP1PPP/1RKNRBBQ w EBeb - 0 9", + // {29, 728, 21915, 587668, 18231199, 511686397}}, // 286 + // {"nrknr1bb/pppp1p2/7p/2qPp1p1/8/1P5P/P1P1PPP1/NRKNRQBB w EBeb - 0 9", + // {20, 714, 14336, 500458, 11132758, 386064577}}, // 287 + // {"bbqnrrkn/ppp2p1p/3pp1p1/8/1PP5/2Q5/P1BPPPPP/B2NRKRN w GE - 0 9", {39, + // 593, 23446, 424799, 16764576, 346185058}}, // 288 + // {"bqn1rkrn/p1p2ppp/1p1p4/4p3/3PP2b/8/PPP2PPP/BQNBRKRN w GEge - 2 9", {25, + // 773, 20042, 616817, 16632403, 515838333}}, // 289 + // {"bqnrkb1n/p1p1pprp/3p4/1p2P1p1/2PP4/8/PP3PPP/BQNRKBRN w GDd - 1 9", {31, + // 860, 28102, 810379, 27233018, 813751250}}, // 290 + // {"bqr1krnb/ppppppp1/7p/3n4/1P4P1/P4N2/2PPPP1P/BQNRKR1B w FDf - 3 9", {31, + // 709, 22936, 559830, 18608857, 480498340}}, // 291 + // {"qbbn1krn/pp3ppp/4r3/2ppp3/P1P4P/8/1P1PPPP1/QBBNRKRN w GEg - 1 9", {26, + // 775, 21100, 649673, 18476807, 582542257}}, // 292 + // {"qnbbrkrn/1p1pp2p/p7/2p2pp1/8/4P2P/PPPP1PPK/QNBBRR1N w ge - 0 9", {25, + // 599, 15139, 389104, 10260500, 279222412}}, // 293 + // {"qnbrkbrn/1ppp2p1/p3p2p/5p2/P4P2/1P6/2PPP1PP/QNBRKBRN w GDgd - 0 9", + // {27, 588, 16735, 394829, 11640416, 293541380}}, // 294 + // {"1nbrkrnb/p1pppp1p/1pq3p1/8/4P3/P1P4N/1P1P1PPP/QNBRKR1B w FDfd - 1 9", + // {18, 609, 11789, 406831, 8604788, 299491047}}, // 295 + // {"qb1r1krn/pppp2pp/1n2ppb1/4P3/7P/8/PPPP1PP1/QBNRBKRN w GDgd - 0 9", {20, + // 578, 12205, 349453, 7939483, 229142178}}, // 296 + // {"qnr1bkrn/p3pppp/1bpp4/1p6/2P2PP1/8/PP1PPN1P/QNRBBKR1 w GCgc - 0 9", + // {30, 865, 26617, 771705, 24475596, 719842237}}, // 297 + // {"1nkrbbrn/qppppppp/8/8/p2P4/1P5P/P1P1PPP1/QNKRBBRN w - - 0 9", {27, 672, + // 18371, 505278, 14065717, 410130412}}, // 298 + // {"1qrkbrnb/ppp1p1pp/n2p4/5p2/4N3/8/PPPPPPPP/Q1RKBRNB w Ffc - 2 9", {25, + // 718, 18573, 536771, 14404324, 424279467}}, // 299 + // {"q1nrkrbn/pp1pppp1/2p4p/8/P7/5Pb1/BPPPPNPP/Q1NRKRB1 w FDfd - 0 9", {22, + // 558, 12911, 336042, 8516966, 228074630}}, // 300 + // {"qnrbkrbn/1p1p1pp1/p1p5/4p2p/8/3P1P2/PPP1P1PP/QNRBKRBN w FCfc - 0 9", + // {28, 669, 17713, 440930, 12055174, 313276304}}, // 301 + // {"qnrkr1bn/p1pp1ppp/8/1p2p3/3P1P2/bP4P1/P1P1P2P/QNRKRBBN w ECec - 1 9", + // {23, 845, 20973, 759778, 19939053, 718075943}}, // 302 + // {"q1krrnbb/p1p1pppp/2np4/1pB5/5P2/8/PPPPP1PP/QNRKRN1B w EC - 0 9", {29, + // 776, 21966, 631941, 18110831, 549019739}}, // 303 + // {"bbn1rkrn/pp1p1ppp/8/2p1p1q1/6P1/P7/BPPPPP1P/B1NQRKRN w GEge - 0 9", + // {26, 936, 25177, 906801, 24984621, 901444251}}, // 304 + // {"bn1brkrn/pp1qpp1p/2p3p1/3p4/1PPP4/P7/4PPPP/BNQBRKRN w GEge - 1 9", {29, + // 755, 22858, 645963, 20128587, 600207069}}, // 305 + // {"b2rkbrn/p1pppppp/qp6/8/1n6/2B2P2/P1PPP1PP/1NQRKBRN w GDgd - 0 9", {24, + // 878, 21440, 791007, 20840078, 775795187}}, // 306 + // {"b2rkrnb/pqp1pppp/n7/1p1p4/P7/N1P2N2/1P1PPPPP/B1QRKR1B w FDfd - 4 9", + // {26, 724, 19558, 571891, 16109522, 492933398}}, // 307 + // {"1bbqrkrn/ppppp1p1/8/5p1p/P1n3P1/3P4/1PP1PP1P/NBBQRRKN w ge - 1 9", {25, + // 678, 17351, 461211, 12173245, 329661421}}, // 308 + // {"nqb1rrkn/ppp1bppp/3pp3/8/3P4/1P6/PQP1PPPP/N1BBRRKN w - - 1 9", {23, + // 503, 12465, 290341, 7626054, 188215608}}, // 309 + // {"nqbrkbr1/p1pppppp/1p6/2N2n2/2P5/5P2/PP1PP1PP/1QBRKBRN w GDgd - 1 9", + // {29, 688, 20289, 506302, 15167248, 399015237}}, // 310 + // {"nqbrkrn1/1ppppp2/6pp/p7/1P6/2Q5/P1PPPPPP/N1BRKRNB w FDfd - 0 9", {36, + // 602, 20985, 397340, 13706856, 291708797}}, // 311 + // {"nbqrbrkn/pp1p1pp1/2p5/4p2p/2P3P1/1P3P2/P2PP2P/NBQRBKRN w GD - 0 9", + // {34, 655, 22581, 474396, 16613630, 379344541}}, // 312 + // {"nqrbbrkn/1p1pppp1/8/p1p4p/4P2P/1N4P1/PPPP1P2/1QRBBKRN w GC - 0 9", {23, + // 597, 14468, 400357, 10096863, 294900903}}, // 313 + // {"nqrkbbrn/2p1p1pp/pp1p1p2/8/P2N4/2P5/1P1PPPPP/1QRKBBRN w GCgc - 0 9", + // {32, 744, 23310, 550728, 17597164, 428786656}}, // 314 + // {"n1krbrnb/q1pppppp/p7/1p6/3Q4/2P2P2/PP1PP1PP/N1RKBRNB w FC - 1 9", {43, + // 1038, 41327, 1074450, 40918952, 1126603824}}, // 315 + // {"nb1rkrbn/p1pp1p1p/qp6/4p1p1/5PP1/P7/1PPPPB1P/NBQRKR1N w FDfd - 2 9", + // {26, 645, 16463, 445464, 11911314, 342563372}}, // 316 + // {"nqr1krbn/pppp1ppp/8/8/3pP3/5P2/PPPb1NPP/NQRBKRB1 w FCfc - 3 9", {2, 51, + // 1047, 27743, 612305, 17040200}}, // 317 + // {"n1rkrbbn/pqppppp1/7p/1p6/8/1NPP4/PP1KPPPP/1QR1RBBN w ec - 0 9", {25, + // 674, 17553, 505337, 13421727, 403551903}}, // 318 {"1qrkrnbb/1p1p1ppp/pnp1p3/8/3PP3/P6P/1PP2PP1/NQRKRNBB w ECec - 0 9", {24, 688, 17342, 511444, 13322502, 403441498}}, // 319 - {"1bnrqkrn/2ppppp1/p7/1p1b3p/3PP1P1/8/PPPQ1P1P/BBNR1KRN w GDgd - 1 9", - {35, 925, 32238, 857060, 30458921, 824344087}}, // 320 - {"bnrbqkr1/ppp2pp1/6n1/3pp2p/1P6/2N3N1/P1PPPPPP/B1RBQRK1 w gc - 0 9", - {23, 704, 17345, 539587, 14154852, 450893738}}, // 321 - {"1nrqkbrn/p1pppppp/8/1p1b4/P6P/5P2/1PPPP1P1/BNRQKBRN w GCgc - 1 9", - {19, 505, 10619, 281422, 6450025, 175593967}}, // 322 - {"b1rqkrnb/ppppppp1/8/6p1/3n4/NP6/P1PPPP1P/B1RQKRNB w FCfc - 0 9", - {25, 614, 15578, 377660, 10391021, 259629603}}, // 323 - {"nbbrqkrn/ppp3p1/3pp3/5p1p/1P2P3/P7/2PPQPPP/NBBR1KRN w GDgd - 0 9", - {30, 833, 25719, 717713, 22873901, 649556666}}, // 324 - {"nr1bqrk1/ppp1pppp/6n1/3pP3/8/5PQb/PPPP2PP/NRBB1KRN w GB - 3 9", - {26, 734, 20161, 582591, 17199594, 512134836}}, // 325 - {"1rbqkbr1/ppppp1pp/1n6/4np2/3P1P2/6P1/PPPQP2P/NRB1KBRN w GBgb - 1 9", - {27, 662, 17897, 447464, 13038519, 338365642}}, // 326 - {"nr1qkr1b/ppp1pp1p/4bn2/3p2p1/4P3/1Q6/PPPP1PPP/NRB1KRNB w FBfb - 4 9", - {33, 939, 30923, 942138, 30995969, 991509814}}, // 327 - {"nb1qbkrn/pprp1pp1/7p/2p1pB2/Q1PP4/8/PP2PPPP/N1R1BKRN w GCg - 2 9", - {47, 1128, 50723, 1306753, 56747878, 1560584212}}, // 328 - {"nrqb1rkn/pp2pppp/2bp4/2p5/6P1/2P3N1/PP1PPP1P/NRQBBRK1 w - - 3 9", - {24, 828, 21148, 723705, 19506135, 668969549}}, // 329 - {"nrq1bbrn/ppkpp2p/2p3p1/P4p2/8/4P1N1/1PPP1PPP/NRQKBBR1 w GB - 0 9", - {25, 525, 13533, 309994, 8250997, 201795680}}, // 330 - {"Br1kbrn1/pqpppp2/8/6pp/3b2P1/1N6/PPPPPP1P/1RQKBRN1 w FBfb - 3 9", - {20, 790, 18175, 695905, 17735648, 669854148}}, // 331 - {"nbrqkrbn/2p1p1pp/p7/1p1p1p2/4P1P1/5P2/PPPP3P/NBRQKRBN w FCfc - 0 9", - {29, 771, 22489, 647106, 19192982, 591335970}}, // 332 - {"1rqbkrbn/1ppppp1p/1n6/p1N3p1/8/2P4P/PP1PPPP1/1RQBKRBN w FBfb - 0 9", - {29, 502, 14569, 287739, 8652810, 191762235}}, // 333 - {"1rqkrbbn/ppnpp1pp/8/2p5/6p1/3P4/PPP1PPPP/NRK1RBBN w eb - 0 9", - {19, 531, 10812, 300384, 6506674, 184309316}}, // 334 - {"nrqkrnbb/p1pp2pp/5p2/4P3/2p5/4N3/PP1PP1PP/NRQKR1BB w EBeb - 0 9", - {26, 800, 23256, 756695, 23952941, 809841274}}, // 335 - {"bbnrkqrn/pp3pp1/4p2p/2pp4/4P1P1/1PB5/P1PP1P1P/1BNRKQRN w GDgd - 0 9", - {33, 915, 30536, 878648, 29602610, 881898159}}, // 336 - {"bnrbkqr1/1p2pppp/6n1/p1pp4/7P/P3P3/1PPPKPP1/BNRB1QRN w gc - 0 9", - {19, 457, 9332, 238944, 5356253, 144653627}}, // 337 - {"b1rkqbrn/pp1p2pp/2n1p3/2p2p2/3P2PP/8/PPP1PP2/BNKRQBRN w gc - 0 9", - {30, 985, 30831, 1011700, 32684185, 1080607773}}, // 338 - {"b1rkqrnb/2ppppp1/np6/p6p/1P6/P2P3P/2P1PPP1/BNRKQRNB w FCfc - 0 9", - {26, 692, 18732, 517703, 14561181, 413226841}}, // 339 - {"nbbrkqrn/1ppp1p2/p6p/4p1p1/5P2/1P5P/P1PPPNP1/NBBRKQR1 w GDgd - 0 9", - {22, 561, 13222, 367487, 9307003, 273928315}}, // 340 - {"nrbbkqrn/p1pppppp/8/1p6/4P3/7Q/PPPP1PPP/NRBBK1RN w GBgb - 0 9", - {38, 769, 28418, 632310, 23091070, 560139600}}, // 341 - {"nrbkqbrn/1pppp2p/8/p4pp1/P4PQ1/8/1PPPP1PP/NRBK1BRN w GBgb - 0 9", - {23, 507, 13067, 321423, 8887567, 237475184}}, // 342 - {"nr1kqr1b/pp2pppp/5n2/2pp4/P5b1/5P2/1PPPPRPP/NRBK1QNB w Bfb - 2 9", - {18, 626, 12386, 434138, 9465555, 335004239}}, // 343 - {"nbkrbqrn/1pppppp1/8/4P2p/pP6/P7/2PP1PPP/NBRKBQRN w GC - 0 9", - {22, 329, 8475, 148351, 4160034, 82875306}}, // 344 - {"nrkb1qrn/pp1pp1pp/8/5p1b/P1p4P/6N1/1PPPPPP1/NRKBBQR1 w GBgb - 2 9", - {16, 479, 9037, 275354, 5862341, 184959796}}, // 345 - {"1rkq1brn/ppppp1pp/1n6/3b1p2/3N3P/5P2/PPPPP1P1/1RKQBBRN w GBgb - 3 9", - {23, 614, 15324, 418395, 11090645, 313526088}}, // 346 - {"nrk1brnb/pp1ppppp/2p5/3q4/5P2/PP6/1KPPP1PP/NR1QBRNB w fb - 1 9", - {25, 942, 21765, 792179, 19318837, 685549171}}, // 347 - {"nbrkqr1n/1pppp2p/p4pp1/2Bb4/5P2/6P1/PPPPP2P/NBRKQ1RN w Cfc - 2 9", - {30, 841, 24775, 677876, 20145765, 557578726}}, // 348 - {"n1kbqrbn/2p1pppp/1r6/pp1p4/P7/3P4/1PP1PPPP/NRKBQRBN w FBf - 2 9", - {21, 591, 14101, 394289, 10295086, 292131422}}, // 349 - {"nrkqrbb1/ppp1pppp/3p4/8/4P3/2Pn1P2/PP4PP/NRKQRBBN w EBeb - 0 9", - {4, 88, 3090, 73414, 2640555, 66958031}}, // 350 - {"nrkqrnbb/ppppp1p1/7p/1P3p2/3P4/2P5/P3PPPP/NRKQRNBB w EBeb - 0 9", - {29, 689, 21091, 508789, 16226660, 408570219}}, // 351 - {"bbnr1rqn/pp2pkpp/2pp1p2/8/4P1P1/8/PPPP1P1P/BBNRKRQN w FD - 0 9", - {21, 463, 11135, 256244, 6826249, 165025370}}, // 352 - {"bnrbk1qn/1pppprpp/8/p4p1P/6P1/3P4/PPP1PP2/BNRBKRQN w FCc - 0 9", - {22, 459, 11447, 268157, 7371098, 190583454}}, // 353 - {"1nrkrbqn/p1pp1ppp/4p3/1p6/1PP5/6PB/P2PPPbP/BNRKR1QN w ECec - 0 9", - {30, 931, 29012, 887414, 28412902, 869228014}}, // 354 - {"b1rkr1nb/pppppqp1/n4B2/7p/8/1P4P1/P1PPPP1P/1NKRRQNB w ec - 1 9", - {36, 934, 31790, 930926, 30392925, 952871799}}, // 355 - {"nbbrkrqn/p1ppp1p1/8/1p3p1p/2P3PP/8/PP1PPPQ1/NBBRKR1N w FDfd - 0 9", - {34, 938, 31848, 921716, 31185844, 944483246}}, // 356 - {"1rbbkrqn/ppp1pp2/1n1p2p1/7p/P3P1P1/3P4/1PP2P1P/NRBBKRQN w FBfb - 0 9", - {26, 646, 18083, 472744, 14006203, 384101783}}, // 357 - {"nrbkrbq1/Qpppp1pp/2n5/5p2/P4P2/6N1/1PPPP1PP/NRBKRB2 w EBeb - 1 9", - {27, 619, 16713, 421845, 11718463, 313794027}}, // 358 - {"1rbkr1nb/pppp1qpp/1n6/4pp2/1PP1P3/8/PB1P1PPP/NR1KRQNB w EBeb - 1 9", - {32, 1029, 32970, 1080977, 35483796, 1181835398}}, // 359 - {"nbrk1rqn/p1ppp2p/1p6/5ppb/8/1N2P2P/PPPP1PP1/1BKRBRQN w fc - 0 9", - {18, 594, 12350, 408544, 9329122, 315021712}}, // 360 - {"nrkbbrqn/3pppp1/7p/ppp5/P7/1N5P/1PPPPPP1/1RKBBRQN w FBfb - 0 9", - {19, 417, 9026, 218513, 5236331, 137024458}}, // 361 - {"nrkr1bqn/ppp1pppp/3p4/1b6/7P/P7/1PPPPPP1/NRKRBBQN w DBdb - 1 9", - {17, 457, 9083, 243872, 5503579, 150091997}}, // 362 - {"nrkrbqnb/p4ppp/1p2p3/2pp4/6P1/2P2N2/PPNPPP1P/1RKRBQ1B w DBdb - 0 9", - {27, 755, 21012, 620093, 17883987, 547233320}}, // 363 - {"nbkrr1bn/ppB2ppp/4p3/2qp4/4P3/5P2/PPPP2PP/NBRKRQ1N w EC - 1 9", - {37, 1473, 51939, 1956521, 68070015, 2490912491}}, // 364 - {"n1kbrqbn/p1pp1pp1/4p2p/2B5/1r3P2/8/PPPPP1PP/NRKBRQ1N w EBe - 2 9", - {30, 1029, 30874, 1053163, 32318550, 1106487743}}, // 365 - {"nrkrqbbn/2pppp1p/8/pp6/1P1P2p1/P5P1/2P1PP1P/NRKRQBBN w DBdb - 0 9", - {22, 421, 10034, 221927, 5754555, 141245633}}, // 366 - {"nrkr1nbb/1ppp2pp/p3q3/4pp2/2P5/P3P3/1PKP1PPP/NR1RQNBB w db - 0 9", - {22, 619, 13953, 411392, 9905109, 301403003}}, // 367 - {"bbnrkrnq/1pp1p2p/6p1/p2p1p2/8/1P2P3/P1PP1PPP/BBNRKRNQ w FDfd - 0 9", - {27, 805, 21915, 688224, 19133881, 620749189}}, // 368 - {"bnrbkrn1/pp1ppp2/2p3pp/8/2Pq4/P4PP1/1P1PP2P/BNRBKRNQ w FCfc - 1 9", - {20, 770, 16593, 577980, 13581691, 456736500}}, // 369 - {"b1rkrbnq/1pp1pppp/2np4/p5N1/8/1P2P3/P1PP1PPP/BNRKRB1Q w ECec - 0 9", - {37, 740, 27073, 581744, 21156664, 485803600}}, // 370 - {"b1krrnqb/pp1ppp1p/n1p3p1/2N5/6P1/8/PPPPPP1P/B1RKRNQB w EC - 0 9", - {34, 850, 28494, 752350, 25360295, 698159474}}, // 371 - {"1bbr1rnq/ppppkppp/8/3np3/4P3/3P4/PPP1KPPP/NBBRR1NQ w - - 1 9", - {27, 704, 18290, 480474, 12817011, 341026662}}, // 372 - {"nrbbk1nq/p1p1prpp/1p6/N2p1p2/P7/8/1PPPPPPP/R1BBKRNQ w Fb - 2 9", - {23, 552, 13710, 348593, 9236564, 248469879}}, // 373 - {"1rbkrb1q/1pppp1pp/1n5n/p4p2/P3P3/1P6/2PPNPPP/NRBKRB1Q w EBeb - 1 9", - {22, 415, 10198, 217224, 5735644, 135295774}}, // 374 - {"nrbkr1qb/1pp1pppp/6n1/p2p4/2P1P3/1N4N1/PP1P1PPP/1RBKR1QB w EBeb - 0 9", - {27, 709, 19126, 506214, 14192779, 380516508}}, // 375 - {"nbrkbrnq/p3p1pp/1pp2p2/3p4/1PP5/4P3/P1KP1PPP/NBR1BRNQ w fc - 0 9", - {24, 715, 18009, 535054, 14322279, 427269976}}, // 376 - {"nrk1brnq/pp1p1pp1/7p/b1p1p3/1P6/6P1/P1PPPPQP/NRKBBRN1 w FBfb - 2 9", - {29, 675, 20352, 492124, 15316285, 389051744}}, // 377 - {"nrkr1bnq/1p2pppp/p2p4/1bp5/PP6/1R5N/2PPPPPP/N1KRBB1Q w Ddb - 2 9", - {27, 744, 20494, 571209, 16188945, 458900901}}, // 378 - {"nrk1b1qb/pppn1ppp/3rp3/3p4/2P3P1/3P4/PPN1PP1P/1RKRBNQB w DBb - 3 9", - {35, 941, 33203, 935791, 33150360, 968024386}}, // 379 - {"nb1rrnbq/ppkp1ppp/8/2p1p3/P7/1N2P3/1PPP1PPP/1BKRRNBQ w - - 1 9", - {19, 451, 9655, 235472, 5506897, 139436165}}, // 380 - {"nrkbrnbq/4pppp/1ppp4/p7/2P1P3/3P2N1/PP3PPP/NRKBR1BQ w EBeb - 0 9", - {29, 591, 17132, 384358, 11245508, 270967202}}, // 381 - {"nrkrnbbq/3p1ppp/1p6/p1p1p3/3P2P1/P4Q2/1PP1PP1P/NRKRNBB1 w DBdb - 0 9", - {38, 792, 28597, 640961, 22654797, 540864616}}, // 382 - {"nr1rnqbb/ppp1pp1p/3k2p1/3p4/1P5P/3P1N2/P1P1PPP1/NRKR1QBB w DB - 1 9", - {25, 758, 18547, 543643, 13890077, 402109399}}, // 383 - {"bbqrnnkr/1ppp1p1p/5p2/p5p1/P7/1P4P1/2PPPP1P/1BQRNNKR w HDhd - 0 9", - {20, 322, 7224, 145818, 3588435, 82754650}}, // 384 - {"bqrb2k1/pppppppr/5nnp/8/3P1P2/4P1N1/PPP3PP/BQRBN1KR w HCc - 1 9", - {25, 597, 15872, 397970, 11162476, 295682250}}, // 385 - {"bqrnn1kr/1pppbppp/8/4p3/1p6/2P1N2P/P2PPPP1/BQR1NBKR w HChc - 1 9", - {34, 921, 31695, 864023, 30126510, 850296236}}, // 386 - {"bqr1nkr1/pppppp2/2n3p1/7p/1P1b1P2/8/PQP1P1PP/B1RNNKRB w GCgc - 0 9", - {23, 788, 21539, 686795, 20849374, 645694580}}, // 387 - {"qbbrnn1r/1pppp1pk/p7/5p1p/P2P3P/3N4/1PP1PPP1/QBBR1NKR w HD - 0 9", - {34, 713, 24475, 562189, 19494094, 482645160}}, // 388 - {"qrbb2kr/p1pppppp/1p1n4/8/1P3n2/P7/Q1PPP1PP/1RBBNNKR w HBhb - 0 9", - {28, 977, 26955, 949925, 27802999, 992109168}}, // 389 - {"qrb2bkr/1pp1pppp/2np1n2/pN6/3P4/4B3/PPP1PPPP/QR2NBKR w HBhb - 0 9", - {27, 730, 20534, 585091, 17005916, 507008968}}, // 390 - {"qrbnnkrb/pp2pp1p/8/2pp2p1/7P/P1P5/QP1PPPP1/1RBNNKRB w GBgb - 0 9", - {24, 813, 21142, 707925, 19615756, 655850285}}, // 391 - {"1brnb1kr/p1pppppp/1p6/8/4q2n/1P2P1P1/PNPP1P1P/QBR1BNKR w HChc - 3 9", - {17, 734, 13462, 530809, 11032633, 416356876}}, // 392 - {"1rnbbnkr/1pp1pppp/1q1p4/p7/4P3/5PN1/PPPP1BPP/QRNB2KR w HBhb - 1 9", - {26, 809, 21764, 706677, 20292750, 675408811}}, // 393 - {"qrnnbb1Q/ppp1pk1p/3p2p1/5p2/PP6/5P2/2PPP1PP/1RNNBBKR w HB - 0 9", - {37, 751, 27902, 603931, 22443036, 515122176}}, // 394 - {"qrnnbkrb/p3p1pp/3p1p2/1pp5/PP2P3/8/2PP1PPP/QRNNBRKB w gb - 0 9", - {30, 906, 27955, 872526, 27658191, 890966633}}, // 395 + // {"1bnrqkrn/2ppppp1/p7/1p1b3p/3PP1P1/8/PPPQ1P1P/BBNR1KRN w GDgd - 1 9", + // {35, 925, 32238, 857060, 30458921, 824344087}}, // 320 + // {"bnrbqkr1/ppp2pp1/6n1/3pp2p/1P6/2N3N1/P1PPPPPP/B1RBQRK1 w gc - 0 9", + // {23, 704, 17345, 539587, 14154852, 450893738}}, // 321 + // {"1nrqkbrn/p1pppppp/8/1p1b4/P6P/5P2/1PPPP1P1/BNRQKBRN w GCgc - 1 9", {19, + // 505, 10619, 281422, 6450025, 175593967}}, // 322 + // {"b1rqkrnb/ppppppp1/8/6p1/3n4/NP6/P1PPPP1P/B1RQKRNB w FCfc - 0 9", {25, + // 614, 15578, 377660, 10391021, 259629603}}, // 323 + // {"nbbrqkrn/ppp3p1/3pp3/5p1p/1P2P3/P7/2PPQPPP/NBBR1KRN w GDgd - 0 9", {30, + // 833, 25719, 717713, 22873901, 649556666}}, // 324 + // {"nr1bqrk1/ppp1pppp/6n1/3pP3/8/5PQb/PPPP2PP/NRBB1KRN w GB - 3 9", {26, + // 734, 20161, 582591, 17199594, 512134836}}, // 325 + // {"1rbqkbr1/ppppp1pp/1n6/4np2/3P1P2/6P1/PPPQP2P/NRB1KBRN w GBgb - 1 9", + // {27, 662, 17897, 447464, 13038519, 338365642}}, // 326 + // {"nr1qkr1b/ppp1pp1p/4bn2/3p2p1/4P3/1Q6/PPPP1PPP/NRB1KRNB w FBfb - 4 9", + // {33, 939, 30923, 942138, 30995969, 991509814}}, // 327 + // {"nb1qbkrn/pprp1pp1/7p/2p1pB2/Q1PP4/8/PP2PPPP/N1R1BKRN w GCg - 2 9", {47, + // 1128, 50723, 1306753, 56747878, 1560584212}}, // 328 + // {"nrqb1rkn/pp2pppp/2bp4/2p5/6P1/2P3N1/PP1PPP1P/NRQBBRK1 w - - 3 9", {24, + // 828, 21148, 723705, 19506135, 668969549}}, // 329 + // {"nrq1bbrn/ppkpp2p/2p3p1/P4p2/8/4P1N1/1PPP1PPP/NRQKBBR1 w GB - 0 9", {25, + // 525, 13533, 309994, 8250997, 201795680}}, // 330 + // {"Br1kbrn1/pqpppp2/8/6pp/3b2P1/1N6/PPPPPP1P/1RQKBRN1 w FBfb - 3 9", {20, + // 790, 18175, 695905, 17735648, 669854148}}, // 331 + // {"nbrqkrbn/2p1p1pp/p7/1p1p1p2/4P1P1/5P2/PPPP3P/NBRQKRBN w FCfc - 0 9", + // {29, 771, 22489, 647106, 19192982, 591335970}}, // 332 + // {"1rqbkrbn/1ppppp1p/1n6/p1N3p1/8/2P4P/PP1PPPP1/1RQBKRBN w FBfb - 0 9", + // {29, 502, 14569, 287739, 8652810, 191762235}}, // 333 + // {"1rqkrbbn/ppnpp1pp/8/2p5/6p1/3P4/PPP1PPPP/NRK1RBBN w eb - 0 9", {19, + // 531, 10812, 300384, 6506674, 184309316}}, // 334 + // {"nrqkrnbb/p1pp2pp/5p2/4P3/2p5/4N3/PP1PP1PP/NRQKR1BB w EBeb - 0 9", {26, + // 800, 23256, 756695, 23952941, 809841274}}, // 335 + // {"bbnrkqrn/pp3pp1/4p2p/2pp4/4P1P1/1PB5/P1PP1P1P/1BNRKQRN w GDgd - 0 9", + // {33, 915, 30536, 878648, 29602610, 881898159}}, // 336 + // {"bnrbkqr1/1p2pppp/6n1/p1pp4/7P/P3P3/1PPPKPP1/BNRB1QRN w gc - 0 9", {19, + // 457, 9332, 238944, 5356253, 144653627}}, // 337 + // {"b1rkqbrn/pp1p2pp/2n1p3/2p2p2/3P2PP/8/PPP1PP2/BNKRQBRN w gc - 0 9", {30, + // 985, 30831, 1011700, 32684185, 1080607773}}, // 338 + // {"b1rkqrnb/2ppppp1/np6/p6p/1P6/P2P3P/2P1PPP1/BNRKQRNB w FCfc - 0 9", {26, + // 692, 18732, 517703, 14561181, 413226841}}, // 339 + // {"nbbrkqrn/1ppp1p2/p6p/4p1p1/5P2/1P5P/P1PPPNP1/NBBRKQR1 w GDgd - 0 9", + // {22, 561, 13222, 367487, 9307003, 273928315}}, // 340 + // {"nrbbkqrn/p1pppppp/8/1p6/4P3/7Q/PPPP1PPP/NRBBK1RN w GBgb - 0 9", {38, + // 769, 28418, 632310, 23091070, 560139600}}, // 341 + // {"nrbkqbrn/1pppp2p/8/p4pp1/P4PQ1/8/1PPPP1PP/NRBK1BRN w GBgb - 0 9", {23, + // 507, 13067, 321423, 8887567, 237475184}}, // 342 + // {"nr1kqr1b/pp2pppp/5n2/2pp4/P5b1/5P2/1PPPPRPP/NRBK1QNB w Bfb - 2 9", {18, + // 626, 12386, 434138, 9465555, 335004239}}, // 343 + // {"nbkrbqrn/1pppppp1/8/4P2p/pP6/P7/2PP1PPP/NBRKBQRN w GC - 0 9", {22, 329, + // 8475, 148351, 4160034, 82875306}}, // 344 + // {"nrkb1qrn/pp1pp1pp/8/5p1b/P1p4P/6N1/1PPPPPP1/NRKBBQR1 w GBgb - 2 9", + // {16, 479, 9037, 275354, 5862341, 184959796}}, // 345 + // {"1rkq1brn/ppppp1pp/1n6/3b1p2/3N3P/5P2/PPPPP1P1/1RKQBBRN w GBgb - 3 9", + // {23, 614, 15324, 418395, 11090645, 313526088}}, // 346 + // {"nrk1brnb/pp1ppppp/2p5/3q4/5P2/PP6/1KPPP1PP/NR1QBRNB w fb - 1 9", {25, + // 942, 21765, 792179, 19318837, 685549171}}, // 347 + // {"nbrkqr1n/1pppp2p/p4pp1/2Bb4/5P2/6P1/PPPPP2P/NBRKQ1RN w Cfc - 2 9", {30, + // 841, 24775, 677876, 20145765, 557578726}}, // 348 + // {"n1kbqrbn/2p1pppp/1r6/pp1p4/P7/3P4/1PP1PPPP/NRKBQRBN w FBf - 2 9", {21, + // 591, 14101, 394289, 10295086, 292131422}}, // 349 + // {"nrkqrbb1/ppp1pppp/3p4/8/4P3/2Pn1P2/PP4PP/NRKQRBBN w EBeb - 0 9", {4, + // 88, 3090, 73414, 2640555, 66958031}}, // 350 + // {"nrkqrnbb/ppppp1p1/7p/1P3p2/3P4/2P5/P3PPPP/NRKQRNBB w EBeb - 0 9", {29, + // 689, 21091, 508789, 16226660, 408570219}}, // 351 + // {"bbnr1rqn/pp2pkpp/2pp1p2/8/4P1P1/8/PPPP1P1P/BBNRKRQN w FD - 0 9", {21, + // 463, 11135, 256244, 6826249, 165025370}}, // 352 + // {"bnrbk1qn/1pppprpp/8/p4p1P/6P1/3P4/PPP1PP2/BNRBKRQN w FCc - 0 9", {22, + // 459, 11447, 268157, 7371098, 190583454}}, // 353 + // {"1nrkrbqn/p1pp1ppp/4p3/1p6/1PP5/6PB/P2PPPbP/BNRKR1QN w ECec - 0 9", {30, + // 931, 29012, 887414, 28412902, 869228014}}, // 354 + // {"b1rkr1nb/pppppqp1/n4B2/7p/8/1P4P1/P1PPPP1P/1NKRRQNB w ec - 1 9", {36, + // 934, 31790, 930926, 30392925, 952871799}}, // 355 + // {"nbbrkrqn/p1ppp1p1/8/1p3p1p/2P3PP/8/PP1PPPQ1/NBBRKR1N w FDfd - 0 9", + // {34, 938, 31848, 921716, 31185844, 944483246}}, // 356 + // {"1rbbkrqn/ppp1pp2/1n1p2p1/7p/P3P1P1/3P4/1PP2P1P/NRBBKRQN w FBfb - 0 9", + // {26, 646, 18083, 472744, 14006203, 384101783}}, // 357 + // {"nrbkrbq1/Qpppp1pp/2n5/5p2/P4P2/6N1/1PPPP1PP/NRBKRB2 w EBeb - 1 9", {27, + // 619, 16713, 421845, 11718463, 313794027}}, // 358 + // {"1rbkr1nb/pppp1qpp/1n6/4pp2/1PP1P3/8/PB1P1PPP/NR1KRQNB w EBeb - 1 9", + // {32, 1029, 32970, 1080977, 35483796, 1181835398}}, // 359 + // {"nbrk1rqn/p1ppp2p/1p6/5ppb/8/1N2P2P/PPPP1PP1/1BKRBRQN w fc - 0 9", {18, + // 594, 12350, 408544, 9329122, 315021712}}, // 360 + // {"nrkbbrqn/3pppp1/7p/ppp5/P7/1N5P/1PPPPPP1/1RKBBRQN w FBfb - 0 9", {19, + // 417, 9026, 218513, 5236331, 137024458}}, // 361 + // {"nrkr1bqn/ppp1pppp/3p4/1b6/7P/P7/1PPPPPP1/NRKRBBQN w DBdb - 1 9", {17, + // 457, 9083, 243872, 5503579, 150091997}}, // 362 + // {"nrkrbqnb/p4ppp/1p2p3/2pp4/6P1/2P2N2/PPNPPP1P/1RKRBQ1B w DBdb - 0 9", + // {27, 755, 21012, 620093, 17883987, 547233320}}, // 363 + // {"nbkrr1bn/ppB2ppp/4p3/2qp4/4P3/5P2/PPPP2PP/NBRKRQ1N w EC - 1 9", {37, + // 1473, 51939, 1956521, 68070015, 2490912491}}, // 364 + // {"n1kbrqbn/p1pp1pp1/4p2p/2B5/1r3P2/8/PPPPP1PP/NRKBRQ1N w EBe - 2 9", {30, + // 1029, 30874, 1053163, 32318550, 1106487743}}, // 365 + // {"nrkrqbbn/2pppp1p/8/pp6/1P1P2p1/P5P1/2P1PP1P/NRKRQBBN w DBdb - 0 9", + // {22, 421, 10034, 221927, 5754555, 141245633}}, // 366 + // {"nrkr1nbb/1ppp2pp/p3q3/4pp2/2P5/P3P3/1PKP1PPP/NR1RQNBB w db - 0 9", {22, + // 619, 13953, 411392, 9905109, 301403003}}, // 367 + // {"bbnrkrnq/1pp1p2p/6p1/p2p1p2/8/1P2P3/P1PP1PPP/BBNRKRNQ w FDfd - 0 9", + // {27, 805, 21915, 688224, 19133881, 620749189}}, // 368 + // {"bnrbkrn1/pp1ppp2/2p3pp/8/2Pq4/P4PP1/1P1PP2P/BNRBKRNQ w FCfc - 1 9", + // {20, 770, 16593, 577980, 13581691, 456736500}}, // 369 + // {"b1rkrbnq/1pp1pppp/2np4/p5N1/8/1P2P3/P1PP1PPP/BNRKRB1Q w ECec - 0 9", + // {37, 740, 27073, 581744, 21156664, 485803600}}, // 370 + // {"b1krrnqb/pp1ppp1p/n1p3p1/2N5/6P1/8/PPPPPP1P/B1RKRNQB w EC - 0 9", {34, + // 850, 28494, 752350, 25360295, 698159474}}, // 371 + // {"1bbr1rnq/ppppkppp/8/3np3/4P3/3P4/PPP1KPPP/NBBRR1NQ w - - 1 9", {27, + // 704, 18290, 480474, 12817011, 341026662}}, // 372 + // {"nrbbk1nq/p1p1prpp/1p6/N2p1p2/P7/8/1PPPPPPP/R1BBKRNQ w Fb - 2 9", {23, + // 552, 13710, 348593, 9236564, 248469879}}, // 373 + // {"1rbkrb1q/1pppp1pp/1n5n/p4p2/P3P3/1P6/2PPNPPP/NRBKRB1Q w EBeb - 1 9", + // {22, 415, 10198, 217224, 5735644, 135295774}}, // 374 + // {"nrbkr1qb/1pp1pppp/6n1/p2p4/2P1P3/1N4N1/PP1P1PPP/1RBKR1QB w EBeb - 0 9", + // {27, 709, 19126, 506214, 14192779, 380516508}}, // 375 + // {"nbrkbrnq/p3p1pp/1pp2p2/3p4/1PP5/4P3/P1KP1PPP/NBR1BRNQ w fc - 0 9", {24, + // 715, 18009, 535054, 14322279, 427269976}}, // 376 + // {"nrk1brnq/pp1p1pp1/7p/b1p1p3/1P6/6P1/P1PPPPQP/NRKBBRN1 w FBfb - 2 9", + // {29, 675, 20352, 492124, 15316285, 389051744}}, // 377 + // {"nrkr1bnq/1p2pppp/p2p4/1bp5/PP6/1R5N/2PPPPPP/N1KRBB1Q w Ddb - 2 9", {27, + // 744, 20494, 571209, 16188945, 458900901}}, // 378 + // {"nrk1b1qb/pppn1ppp/3rp3/3p4/2P3P1/3P4/PPN1PP1P/1RKRBNQB w DBb - 3 9", + // {35, 941, 33203, 935791, 33150360, 968024386}}, // 379 + // {"nb1rrnbq/ppkp1ppp/8/2p1p3/P7/1N2P3/1PPP1PPP/1BKRRNBQ w - - 1 9", {19, + // 451, 9655, 235472, 5506897, 139436165}}, // 380 + // {"nrkbrnbq/4pppp/1ppp4/p7/2P1P3/3P2N1/PP3PPP/NRKBR1BQ w EBeb - 0 9", {29, + // 591, 17132, 384358, 11245508, 270967202}}, // 381 + // {"nrkrnbbq/3p1ppp/1p6/p1p1p3/3P2P1/P4Q2/1PP1PP1P/NRKRNBB1 w DBdb - 0 9", + // {38, 792, 28597, 640961, 22654797, 540864616}}, // 382 + // {"nr1rnqbb/ppp1pp1p/3k2p1/3p4/1P5P/3P1N2/P1P1PPP1/NRKR1QBB w DB - 1 9", + // {25, 758, 18547, 543643, 13890077, 402109399}}, // 383 + // {"bbqrnnkr/1ppp1p1p/5p2/p5p1/P7/1P4P1/2PPPP1P/1BQRNNKR w HDhd - 0 9", + // {20, 322, 7224, 145818, 3588435, 82754650}}, // 384 + // {"bqrb2k1/pppppppr/5nnp/8/3P1P2/4P1N1/PPP3PP/BQRBN1KR w HCc - 1 9", {25, + // 597, 15872, 397970, 11162476, 295682250}}, // 385 + // {"bqrnn1kr/1pppbppp/8/4p3/1p6/2P1N2P/P2PPPP1/BQR1NBKR w HChc - 1 9", {34, + // 921, 31695, 864023, 30126510, 850296236}}, // 386 + // {"bqr1nkr1/pppppp2/2n3p1/7p/1P1b1P2/8/PQP1P1PP/B1RNNKRB w GCgc - 0 9", + // {23, 788, 21539, 686795, 20849374, 645694580}}, // 387 + // {"qbbrnn1r/1pppp1pk/p7/5p1p/P2P3P/3N4/1PP1PPP1/QBBR1NKR w HD - 0 9", {34, + // 713, 24475, 562189, 19494094, 482645160}}, // 388 + // {"qrbb2kr/p1pppppp/1p1n4/8/1P3n2/P7/Q1PPP1PP/1RBBNNKR w HBhb - 0 9", {28, + // 977, 26955, 949925, 27802999, 992109168}}, // 389 + // {"qrb2bkr/1pp1pppp/2np1n2/pN6/3P4/4B3/PPP1PPPP/QR2NBKR w HBhb - 0 9", + // {27, 730, 20534, 585091, 17005916, 507008968}}, // 390 + // {"qrbnnkrb/pp2pp1p/8/2pp2p1/7P/P1P5/QP1PPPP1/1RBNNKRB w GBgb - 0 9", {24, + // 813, 21142, 707925, 19615756, 655850285}}, // 391 + // {"1brnb1kr/p1pppppp/1p6/8/4q2n/1P2P1P1/PNPP1P1P/QBR1BNKR w HChc - 3 9", + // {17, 734, 13462, 530809, 11032633, 416356876}}, // 392 + // {"1rnbbnkr/1pp1pppp/1q1p4/p7/4P3/5PN1/PPPP1BPP/QRNB2KR w HBhb - 1 9", + // {26, 809, 21764, 706677, 20292750, 675408811}}, // 393 + // {"qrnnbb1Q/ppp1pk1p/3p2p1/5p2/PP6/5P2/2PPP1PP/1RNNBBKR w HB - 0 9", {37, + // 751, 27902, 603931, 22443036, 515122176}}, // 394 + // {"qrnnbkrb/p3p1pp/3p1p2/1pp5/PP2P3/8/2PP1PPP/QRNNBRKB w gb - 0 9", {30, + // 906, 27955, 872526, 27658191, 890966633}}, // 395 {"qbrnnkbr/1p2pp1p/p1p3p1/3p4/6P1/P1N4P/1PPPPP2/QBR1NKBR w HChc - 0 9", {26, 701, 18930, 521377, 14733245, 416881799}}, // 396 - {"qr1b1kbr/1p1ppppp/1n1n4/p1p5/4P3/5NPP/PPPP1P2/QRNB1KBR w HBhb - 1 9", - {26, 649, 17235, 451997, 12367604, 342165821}}, // 397 - {"qrnnkb1r/1pppppp1/7p/p4b2/4P3/5P1P/PPPP2PR/QRNNKBB1 w Bhb - 1 9", - {34, 941, 31720, 901240, 30307554, 888709821}}, // 398 - {"qr1nkrbb/p2ppppp/1pp5/8/3Pn3/1NP3P1/PP2PP1P/QR1NKRBB w FBfb - 1 9", - {19, 505, 11107, 294251, 7046501, 190414579}}, // 399 - {"bbrqn1kr/1pppp1pp/4n3/5p2/p5P1/3P4/PPP1PPKP/BBRQNN1R w hc - 0 9", - {24, 573, 12963, 335845, 8191054, 227555387}}, // 400 - {"brqb1nkr/pppppp1p/8/4N1pn/5P2/6P1/PPPPP2P/BRQB1NKR w HBhb - 0 9", - {26, 550, 14338, 331666, 8903754, 223437427}}, // 401 - {"brqnn1kr/pp3ppp/2pbp3/3p4/8/2NPP3/PPP1BPPP/BRQ1N1KR w HBhb - 0 9", - {27, 780, 20760, 589328, 16243731, 463883447}}, // 402 - {"brq1nkrb/ppp2ppp/8/n2pp2P/P7/4P3/1PPP1PP1/BRQNNKRB w GBgb - 1 9", - {17, 426, 8295, 235162, 5048497, 153986034}}, // 403 - {"rbbqn1kr/pp2p1pp/6n1/2pp1p2/2P4P/P7/BP1PPPP1/R1BQNNKR w HAha - 0 9", - {27, 916, 25798, 890435, 26302461, 924181432}}, // 404 - {"1qbbn1kr/1ppppppp/r3n3/8/p1P5/P7/1P1PPPPP/RQBBNNKR w HAh - 1 9", - {29, 817, 24530, 720277, 22147642, 670707652}}, // 405 - {"rqbnnbkr/ppp1ppp1/7p/3p4/PP6/7P/1NPPPPP1/RQB1NBKR w HAa - 1 9", - {23, 572, 14509, 381474, 10416981, 288064942}}, // 406 - {"r1bnnkrb/q1ppp1pp/p7/1p3pB1/2P1P3/3P4/PP3PPP/RQ1NNKRB w GAga - 2 9", - {31, 925, 27776, 860969, 26316355, 843078864}}, // 407 - {"rbqnb1kr/ppppp1pp/5p2/5N2/7P/1n3P2/PPPPP1P1/RBQNB1KR w HAha - 1 9", - {32, 864, 27633, 766551, 24738875, 707188107}}, // 408 - {"rqnbbn1r/ppppppp1/6k1/8/6Pp/2PN4/PP1PPPKP/RQ1BBN1R w - - 0 9", - {27, 566, 15367, 347059, 9714509, 234622128}}, // 409 - {"rqnnbbkr/p1p2pp1/1p1p3p/4p3/4NP2/6P1/PPPPP2P/RQN1BBKR w HAha - 0 9", - {27, 631, 17923, 452734, 13307890, 356279813}}, // 410 - {"1qnnbrkb/rppp1ppp/p3p3/8/4P3/2PP1P2/PP4PP/RQNNBKRB w GA - 1 9", - {24, 479, 12135, 271469, 7204345, 175460841}}, // 411 - {"rbqnn1br/p1pppk1p/1p4p1/5p2/8/P1P2P2/1PBPP1PP/R1QNNKBR w HA - 0 9", - {31, 756, 23877, 625194, 20036784, 554292502}}, // 412 - {"rqnbnkbr/1ppppp2/p5p1/8/1P4p1/4PP2/P1PP3P/RQNBNKBR w HAha - 0 9", - {24, 715, 18536, 575589, 16013189, 515078271}}, // 413 - {"rq1nkbbr/1p2pppp/p2n4/2pp4/1P4P1/P2N4/2PPPP1P/RQ1NKBBR w HAha - 1 9", - {27, 694, 19840, 552904, 16685687, 494574415}}, // 414 - {"r1nnkrbb/pp1pppp1/2p3q1/7p/8/1PPP3P/P3PPP1/RQNNKRBB w FAfa - 1 9", - {18, 520, 10808, 329085, 7508201, 235103697}}, // 415 - {"bbrnqk1r/pppp3p/6p1/4pp2/3P2P1/8/PPP1PP1P/BBRN1NKR w HC - 0 9", - {22, 566, 12965, 362624, 8721079, 259069471}}, // 416 - {"brnb1nkr/pppqpp2/3p2pp/8/3PP3/1P6/PBP2PPP/1RNBQNKR w HBhb - 0 9", - {32, 859, 28517, 817464, 27734108, 829785474}}, // 417 - {"brnq1b1r/ppp1ppkp/3p1np1/8/8/5P1P/PPPPPKPR/BRNQNB2 w - - 0 9", - {21, 511, 10951, 273756, 6372681, 167139732}}, // 418 - {"brnq1rkb/1pppppp1/3n3p/p7/8/P4NP1/1PPPPPRP/BRNQ1K1B w B - 0 9", - {25, 548, 14049, 341208, 9015901, 235249649}}, // 419 - {"rbb1qnkr/p1ppp1pp/1p3p2/6n1/8/1PN1P2P/P1PP1PP1/RBB1QNKR w HAha - 0 9", - {25, 673, 16412, 467660, 12099119, 361714466}}, // 420 - {"rnbb1nkr/1ppp1ppp/4p3/p5q1/6P1/1PP5/PB1PPP1P/RN1BQNKR w HAha - 1 9", - {19, 663, 14149, 489653, 11491355, 399135495}}, // 421 - {"rnbqnbkr/1pp1p2p/3p1p2/p5p1/5PP1/2P5/PPNPP2P/RNBQ1BKR w HAha - 0 9", - {24, 647, 16679, 461931, 12649636, 361157611}}, // 422 - {"rnb2krb/pppqppnp/8/3p2p1/1P4P1/7P/P1PPPPB1/RNBQNKR1 w GAga - 1 9", - {24, 722, 18749, 605229, 16609220, 563558512}}, // 423 - {"rbnqb1kr/pppn1pp1/3p3p/4p3/1P6/P7/R1PPPPPP/1BNQBNKR w Hha - 1 9", - {20, 538, 12277, 345704, 8687621, 255304141}}, // 424 - {"rnqb1nkr/p1pbp1pp/8/1pPp1p2/P2P4/8/1P2PPPP/RNQBBNKR w HAha - 1 9", - {35, 764, 26952, 632796, 22592380, 564255328}}, // 425 - {"rnq1bbkr/1p1ppp1p/4n3/p1p3p1/P1PP4/8/RP2PPPP/1NQNBBKR w Hha - 0 9", - {29, 709, 21296, 570580, 17597398, 506140370}}, // 426 - {"1nqnbkrb/1pppp2p/r7/p4pp1/3P4/8/PPPBPPPP/RNQNK1RB w g - 0 9", - {27, 1028, 28534, 1050834, 30251988, 1096869832}}, // 427 - {"rbnqnkbr/p1pp1p1p/8/1p2p3/3P2pP/2P5/PP2PPP1/RBNQNKBR w HAha - 0 9", - {32, 832, 27120, 750336, 24945574, 724171581}}, // 428 - {"rnq1nkbr/1p1p1ppp/2p1pb2/p7/7P/2P5/PPNPPPPB/RNQB1K1R w HAha - 2 9", - {31, 779, 24010, 638640, 19919434, 551494771}}, // 429 - {"rnqnk1br/p1ppp1bp/1p3p2/6p1/4N3/P5P1/1PPPPP1P/R1QNKBBR w HAha - 2 9", - {25, 717, 19396, 576577, 16525239, 507175842}}, // 430 - {"rnq1krbb/p1p1pppp/8/1p1p4/1n5B/2N2P2/PPPPP1PP/RNQ1KR1B w FAfa - 0 9", - {28, 867, 24029, 735686, 21112751, 654808184}}, // 431 - {"bbrnnqkr/1pp1pppp/3p4/p7/P3P3/7P/1PPP1PP1/BBRNNQKR w HChc - 0 9", - {24, 405, 11025, 210557, 6196438, 131401224}}, // 432 - {"brnbnqkr/p1ppp3/1p5p/5Pp1/5P2/3N4/PPPPP2P/BRNB1QKR w HBhb g6 0 9", - {25, 785, 21402, 698331, 20687969, 695850727}}, // 433 - {"br1nqbkr/1ppppp2/pn6/6pp/2PP4/1N4P1/PP2PP1P/BR1NQBKR w HBhb - 0 9", - {25, 596, 16220, 421882, 12185361, 337805606}}, // 434 - {"1rnnqkrb/p2ppp1p/1pp5/2N3p1/8/1P6/P1PPPPKP/BR1NQ1RB w gb - 0 9", - {38, 960, 34831, 913665, 32490040, 880403591}}, // 435 - {"rbbnnqkr/pp3pp1/2p1p3/3p3p/3P3P/1PP5/P3PPP1/RBBNNQKR w HAha - 0 9", - {30, 785, 23079, 656618, 19885037, 599219582}}, // 436 - {"rn1bnqkr/p1ppppp1/8/1p5p/P4P1P/3N4/1PPPP1b1/RNBB1QKR w HAha - 0 9", - {27, 752, 21735, 613194, 18862234, 547415271}}, // 437 - {"1nbnqbkr/1p1p1ppp/r3p3/p1p5/P3P3/3Q4/1PPP1PPP/RNBN1BKR w HAh - 2 9", - {33, 721, 24278, 572535, 19648535, 496023732}}, // 438 - {"rnbnqkrb/2pppppp/1p6/p7/1PP5/4N2P/P2PPPP1/RNB1QKRB w GAg - 0 9", - {23, 570, 14225, 374196, 10022614, 279545007}}, // 439 - {"rbnnbq1r/ppppppkp/6p1/N7/4P3/P7/1PPP1PPP/RB1NBQKR w HA - 5 9", - {27, 620, 18371, 440594, 13909432, 349478320}}, // 440 - {"r1nbbqkr/pppppp1p/8/8/1n3Pp1/3N1QP1/PPPPP2P/RN1BB1KR w HAha - 0 9", - {31, 791, 25431, 682579, 22408813, 636779732}}, // 441 - {"rnq1bbkr/pp1p1ppp/2pnp3/8/7P/1QP5/PP1PPPPR/RNN1BBK1 w Aha - 2 9", - {28, 559, 16838, 390887, 12242780, 315431511}}, // 442 - {"rnnqbrkb/2ppppp1/1p1N4/p6p/4P3/8/PPPP1PPP/R1NQBKRB w GA - 0 9", - {32, 638, 20591, 438792, 14395828, 331782223}}, // 443 - {"rbnnq1br/pppp1kp1/4pp2/7p/PP6/2PP4/4PPPP/RBNNQKBR w HA - 0 9", - {21, 521, 12201, 320429, 8239159, 227346638}}, // 444 - {"rnnbqkbr/p2ppp2/7p/1pp3p1/2P2N2/8/PP1PPPPP/RN1BQKBR w HAha - 0 9", - {25, 528, 13896, 326094, 9079829, 232750602}}, // 445 - {"rnn1kbbr/ppppqp2/6p1/2N1p2p/P7/2P5/1P1PPPPP/RN1QKBBR w HAha - 2 9", - {27, 801, 22088, 707078, 20334071, 682580976}}, // 446 - {"rnnqkrbb/p1p1p1pp/1p3p2/8/3p2Q1/P1P1P3/1P1P1PPP/RNN1KRBB w FAfa - 0 9", - {37, 1014, 34735, 998999, 32921537, 988770109}}, // 447 - {"bbrnk1qr/1pppppp1/p4n1p/8/P2P2N1/8/1PP1PPPP/BBR1NKQR w HC - 1 9", - {21, 481, 11213, 279993, 7015419, 187564853}}, // 448 - {"brnbnkqr/1pp1p1p1/p2p1p2/7p/1P4PP/8/PBPPPP2/1RNBNKQR w HBhb - 0 9", - {31, 743, 24260, 660177, 22391185, 653721389}}, // 449 - {"br2kbqr/ppppp1pp/3n1p2/3P4/3n3P/3N4/PPP1PPP1/BR1NKBQR w HBhb - 3 9", - {25, 872, 22039, 748726, 20281962, 685749952}}, // 450 - {"br1nkqrb/ppppppp1/8/7p/4P3/n1P2PP1/PP1P3P/BRNNKQRB w GBgb - 0 9", - {28, 607, 16934, 396483, 11607818, 294181806}}, // 451 - {"rbbn1kqr/pp1pp1p1/2pn3p/5p2/5P2/1P1N4/PNPPP1PP/RBB2KQR w HAha - 1 9", - {27, 725, 21543, 616082, 19239812, 581716972}}, // 452 - {"rnbbnk1r/pp1ppp1p/6q1/2p5/PP4p1/4P3/2PP1PPP/RNBBNKQR w HAha - 1 9", - {25, 1072, 26898, 1088978, 28469879, 1122703887}}, // 453 - {"rnbnkbqr/1pp3pp/3p4/p3pp2/3P2P1/2N1N3/PPP1PP1P/R1B1KBQR w HAha - 0 9", - {31, 1028, 32907, 1095472, 36025223, 1211187800}}, // 454 - {"r1bnkqrb/1ppppppp/p3n3/8/6P1/4N3/PPPPPPRP/RNB1KQ1B w Aga - 1 9", - {23, 457, 11416, 250551, 6666787, 159759052}}, // 455 - {"rbn1bkqr/p1pp1pp1/1pn5/4p2p/7P/1PBP4/P1P1PPP1/RBNN1KQR w HAha - 0 9", - {23, 470, 11649, 264274, 6963287, 172833738}}, // 456 - {"rnnbbkqr/3ppppp/p7/1pp5/P6P/6P1/1PPPPP2/RNNBBKQR w HAha - 0 9", - {26, 569, 15733, 375556, 11008114, 284485303}}, // 457 - {"r1nk1bqr/1pppp1pp/2n5/p4p1b/5P2/1N4B1/PPPPP1PP/RN1K1BQR w HAha - 2 9", - {25, 824, 21983, 738366, 20904119, 716170771}}, // 458 - {"r1nkbqrb/p2pppp1/npp4p/8/4PP2/2N4P/PPPP2P1/R1NKBQRB w GAga - 0 9", - {31, 548, 17480, 349633, 11469548, 255067638}}, // 459 - {"rbnnkqbr/ppppp2p/5p2/6p1/2P1B3/P6P/1P1PPPP1/R1NNKQBR w HAha - 1 9", - {31, 809, 24956, 680747, 21247414, 606221516}}, // 460 - {"1r1bkqbr/pppp1ppp/2nnp3/8/2P5/N4P2/PP1PP1PP/1RNBKQBR w Hh - 0 9", - {28, 810, 22844, 694599, 20188622, 636748147}}, // 461 - {"rn1kqbbr/p1pppp1p/1p4p1/1n6/1P2P3/4Q2P/P1PP1PP1/RNNK1BBR w HAha - 1 9", - {39, 848, 30100, 724426, 25594662, 659615710}}, // 462 - {"rn1kqrbb/pppppppp/8/8/2nP2P1/1P2P3/P1P2P1P/RNNKQRBB w FAfa - 1 9", - {29, 766, 21701, 567971, 16944425, 456898648}}, // 463 - {"b1rnnkrq/bpppppp1/7p/8/1p6/2B5/PNPPPPPP/1BR1NKRQ w GCgc - 2 9", - {25, 667, 17253, 472678, 12865247, 365621294}}, // 464 - {"brnb1krq/pppppppp/8/5P2/2P1n2P/8/PP1PP1P1/BRNBNKRQ w GBgb - 1 9", - {23, 620, 14882, 402561, 10776855, 300125003}}, // 465 - {"b1nnkbrq/pr1pppp1/1p5p/2p5/P2N1P2/8/1PPPP1PP/BR1NKBRQ w GBg - 0 9", - {24, 472, 12181, 267398, 7370758, 178605165}}, // 466 - {"br1nkrqb/p1p1p1pp/3n4/1p1p1p2/5N1P/4P3/PPPP1PP1/BR1NKRQB w FBfb - 0 9", - {24, 775, 19398, 624309, 16429837, 539767605}}, // 467 - {"rbbnnkrq/p2pp1pp/2p5/5p2/1pPP1B2/P7/1P2PPPP/RB1NNKRQ w GAga - 0 9", - {34, 921, 30474, 849933, 28095833, 806446436}}, // 468 - {"rnbbnkr1/1p1ppp1p/2p3p1/p7/2Pq4/1P1P4/P2BPPPP/RN1BNKRQ w GAga - 2 9", - {26, 1139, 29847, 1204863, 32825932, 1281760240}}, // 469 - {"1rbnkbrq/pppppp2/n5pp/2P5/P7/4N3/1P1PPPPP/RNB1KBRQ w GAg - 2 9", - {23, 574, 14146, 391413, 10203438, 301874034}}, // 470 - {"1nbnkr1b/rppppppq/p7/7p/1P5P/3P2P1/P1P1PP2/RNBNKRQB w FAf - 1 9", - {33, 823, 26696, 724828, 23266182, 672294132}}, // 471 - {"rbn1bkrq/ppppp3/4n2p/5pp1/1PN5/2P5/P2PPPPP/RBN1BKRQ w GAga - 0 9", - {27, 859, 24090, 796482, 23075785, 789152120}}, // 472 + // {"qr1b1kbr/1p1ppppp/1n1n4/p1p5/4P3/5NPP/PPPP1P2/QRNB1KBR w HBhb - 1 9", + // {26, 649, 17235, 451997, 12367604, 342165821}}, // 397 + // {"qrnnkb1r/1pppppp1/7p/p4b2/4P3/5P1P/PPPP2PR/QRNNKBB1 w Bhb - 1 9", {34, + // 941, 31720, 901240, 30307554, 888709821}}, // 398 + // {"qr1nkrbb/p2ppppp/1pp5/8/3Pn3/1NP3P1/PP2PP1P/QR1NKRBB w FBfb - 1 9", + // {19, 505, 11107, 294251, 7046501, 190414579}}, // 399 + // {"bbrqn1kr/1pppp1pp/4n3/5p2/p5P1/3P4/PPP1PPKP/BBRQNN1R w hc - 0 9", {24, + // 573, 12963, 335845, 8191054, 227555387}}, // 400 + // {"brqb1nkr/pppppp1p/8/4N1pn/5P2/6P1/PPPPP2P/BRQB1NKR w HBhb - 0 9", {26, + // 550, 14338, 331666, 8903754, 223437427}}, // 401 + // {"brqnn1kr/pp3ppp/2pbp3/3p4/8/2NPP3/PPP1BPPP/BRQ1N1KR w HBhb - 0 9", {27, + // 780, 20760, 589328, 16243731, 463883447}}, // 402 + // {"brq1nkrb/ppp2ppp/8/n2pp2P/P7/4P3/1PPP1PP1/BRQNNKRB w GBgb - 1 9", {17, + // 426, 8295, 235162, 5048497, 153986034}}, // 403 + // {"rbbqn1kr/pp2p1pp/6n1/2pp1p2/2P4P/P7/BP1PPPP1/R1BQNNKR w HAha - 0 9", + // {27, 916, 25798, 890435, 26302461, 924181432}}, // 404 + // {"1qbbn1kr/1ppppppp/r3n3/8/p1P5/P7/1P1PPPPP/RQBBNNKR w HAh - 1 9", {29, + // 817, 24530, 720277, 22147642, 670707652}}, // 405 + // {"rqbnnbkr/ppp1ppp1/7p/3p4/PP6/7P/1NPPPPP1/RQB1NBKR w HAa - 1 9", {23, + // 572, 14509, 381474, 10416981, 288064942}}, // 406 + // {"r1bnnkrb/q1ppp1pp/p7/1p3pB1/2P1P3/3P4/PP3PPP/RQ1NNKRB w GAga - 2 9", + // {31, 925, 27776, 860969, 26316355, 843078864}}, // 407 + // {"rbqnb1kr/ppppp1pp/5p2/5N2/7P/1n3P2/PPPPP1P1/RBQNB1KR w HAha - 1 9", + // {32, 864, 27633, 766551, 24738875, 707188107}}, // 408 + // {"rqnbbn1r/ppppppp1/6k1/8/6Pp/2PN4/PP1PPPKP/RQ1BBN1R w - - 0 9", {27, + // 566, 15367, 347059, 9714509, 234622128}}, // 409 + // {"rqnnbbkr/p1p2pp1/1p1p3p/4p3/4NP2/6P1/PPPPP2P/RQN1BBKR w HAha - 0 9", + // {27, 631, 17923, 452734, 13307890, 356279813}}, // 410 + // {"1qnnbrkb/rppp1ppp/p3p3/8/4P3/2PP1P2/PP4PP/RQNNBKRB w GA - 1 9", {24, + // 479, 12135, 271469, 7204345, 175460841}}, // 411 + // {"rbqnn1br/p1pppk1p/1p4p1/5p2/8/P1P2P2/1PBPP1PP/R1QNNKBR w HA - 0 9", + // {31, 756, 23877, 625194, 20036784, 554292502}}, // 412 + // {"rqnbnkbr/1ppppp2/p5p1/8/1P4p1/4PP2/P1PP3P/RQNBNKBR w HAha - 0 9", {24, + // 715, 18536, 575589, 16013189, 515078271}}, // 413 + // {"rq1nkbbr/1p2pppp/p2n4/2pp4/1P4P1/P2N4/2PPPP1P/RQ1NKBBR w HAha - 1 9", + // {27, 694, 19840, 552904, 16685687, 494574415}}, // 414 + // {"r1nnkrbb/pp1pppp1/2p3q1/7p/8/1PPP3P/P3PPP1/RQNNKRBB w FAfa - 1 9", {18, + // 520, 10808, 329085, 7508201, 235103697}}, // 415 + // {"bbrnqk1r/pppp3p/6p1/4pp2/3P2P1/8/PPP1PP1P/BBRN1NKR w HC - 0 9", {22, + // 566, 12965, 362624, 8721079, 259069471}}, // 416 + // {"brnb1nkr/pppqpp2/3p2pp/8/3PP3/1P6/PBP2PPP/1RNBQNKR w HBhb - 0 9", {32, + // 859, 28517, 817464, 27734108, 829785474}}, // 417 + // {"brnq1b1r/ppp1ppkp/3p1np1/8/8/5P1P/PPPPPKPR/BRNQNB2 w - - 0 9", {21, + // 511, 10951, 273756, 6372681, 167139732}}, // 418 + // {"brnq1rkb/1pppppp1/3n3p/p7/8/P4NP1/1PPPPPRP/BRNQ1K1B w B - 0 9", {25, + // 548, 14049, 341208, 9015901, 235249649}}, // 419 + // {"rbb1qnkr/p1ppp1pp/1p3p2/6n1/8/1PN1P2P/P1PP1PP1/RBB1QNKR w HAha - 0 9", + // {25, 673, 16412, 467660, 12099119, 361714466}}, // 420 + // {"rnbb1nkr/1ppp1ppp/4p3/p5q1/6P1/1PP5/PB1PPP1P/RN1BQNKR w HAha - 1 9", + // {19, 663, 14149, 489653, 11491355, 399135495}}, // 421 + // {"rnbqnbkr/1pp1p2p/3p1p2/p5p1/5PP1/2P5/PPNPP2P/RNBQ1BKR w HAha - 0 9", + // {24, 647, 16679, 461931, 12649636, 361157611}}, // 422 + // {"rnb2krb/pppqppnp/8/3p2p1/1P4P1/7P/P1PPPPB1/RNBQNKR1 w GAga - 1 9", {24, + // 722, 18749, 605229, 16609220, 563558512}}, // 423 + // {"rbnqb1kr/pppn1pp1/3p3p/4p3/1P6/P7/R1PPPPPP/1BNQBNKR w Hha - 1 9", {20, + // 538, 12277, 345704, 8687621, 255304141}}, // 424 + // {"rnqb1nkr/p1pbp1pp/8/1pPp1p2/P2P4/8/1P2PPPP/RNQBBNKR w HAha - 1 9", {35, + // 764, 26952, 632796, 22592380, 564255328}}, // 425 + // {"rnq1bbkr/1p1ppp1p/4n3/p1p3p1/P1PP4/8/RP2PPPP/1NQNBBKR w Hha - 0 9", + // {29, 709, 21296, 570580, 17597398, 506140370}}, // 426 + // {"1nqnbkrb/1pppp2p/r7/p4pp1/3P4/8/PPPBPPPP/RNQNK1RB w g - 0 9", {27, + // 1028, 28534, 1050834, 30251988, 1096869832}}, // 427 + // {"rbnqnkbr/p1pp1p1p/8/1p2p3/3P2pP/2P5/PP2PPP1/RBNQNKBR w HAha - 0 9", + // {32, 832, 27120, 750336, 24945574, 724171581}}, // 428 + // {"rnq1nkbr/1p1p1ppp/2p1pb2/p7/7P/2P5/PPNPPPPB/RNQB1K1R w HAha - 2 9", + // {31, 779, 24010, 638640, 19919434, 551494771}}, // 429 + // {"rnqnk1br/p1ppp1bp/1p3p2/6p1/4N3/P5P1/1PPPPP1P/R1QNKBBR w HAha - 2 9", + // {25, 717, 19396, 576577, 16525239, 507175842}}, // 430 + // {"rnq1krbb/p1p1pppp/8/1p1p4/1n5B/2N2P2/PPPPP1PP/RNQ1KR1B w FAfa - 0 9", + // {28, 867, 24029, 735686, 21112751, 654808184}}, // 431 + // {"bbrnnqkr/1pp1pppp/3p4/p7/P3P3/7P/1PPP1PP1/BBRNNQKR w HChc - 0 9", {24, + // 405, 11025, 210557, 6196438, 131401224}}, // 432 + // {"brnbnqkr/p1ppp3/1p5p/5Pp1/5P2/3N4/PPPPP2P/BRNB1QKR w HBhb g6 0 9", {25, + // 785, 21402, 698331, 20687969, 695850727}}, // 433 + // {"br1nqbkr/1ppppp2/pn6/6pp/2PP4/1N4P1/PP2PP1P/BR1NQBKR w HBhb - 0 9", + // {25, 596, 16220, 421882, 12185361, 337805606}}, // 434 + // {"1rnnqkrb/p2ppp1p/1pp5/2N3p1/8/1P6/P1PPPPKP/BR1NQ1RB w gb - 0 9", {38, + // 960, 34831, 913665, 32490040, 880403591}}, // 435 + // {"rbbnnqkr/pp3pp1/2p1p3/3p3p/3P3P/1PP5/P3PPP1/RBBNNQKR w HAha - 0 9", + // {30, 785, 23079, 656618, 19885037, 599219582}}, // 436 + // {"rn1bnqkr/p1ppppp1/8/1p5p/P4P1P/3N4/1PPPP1b1/RNBB1QKR w HAha - 0 9", + // {27, 752, 21735, 613194, 18862234, 547415271}}, // 437 + // {"1nbnqbkr/1p1p1ppp/r3p3/p1p5/P3P3/3Q4/1PPP1PPP/RNBN1BKR w HAh - 2 9", + // {33, 721, 24278, 572535, 19648535, 496023732}}, // 438 + // {"rnbnqkrb/2pppppp/1p6/p7/1PP5/4N2P/P2PPPP1/RNB1QKRB w GAg - 0 9", {23, + // 570, 14225, 374196, 10022614, 279545007}}, // 439 + // {"rbnnbq1r/ppppppkp/6p1/N7/4P3/P7/1PPP1PPP/RB1NBQKR w HA - 5 9", {27, + // 620, 18371, 440594, 13909432, 349478320}}, // 440 + // {"r1nbbqkr/pppppp1p/8/8/1n3Pp1/3N1QP1/PPPPP2P/RN1BB1KR w HAha - 0 9", + // {31, 791, 25431, 682579, 22408813, 636779732}}, // 441 + // {"rnq1bbkr/pp1p1ppp/2pnp3/8/7P/1QP5/PP1PPPPR/RNN1BBK1 w Aha - 2 9", {28, + // 559, 16838, 390887, 12242780, 315431511}}, // 442 + // {"rnnqbrkb/2ppppp1/1p1N4/p6p/4P3/8/PPPP1PPP/R1NQBKRB w GA - 0 9", {32, + // 638, 20591, 438792, 14395828, 331782223}}, // 443 + // {"rbnnq1br/pppp1kp1/4pp2/7p/PP6/2PP4/4PPPP/RBNNQKBR w HA - 0 9", {21, + // 521, 12201, 320429, 8239159, 227346638}}, // 444 + // {"rnnbqkbr/p2ppp2/7p/1pp3p1/2P2N2/8/PP1PPPPP/RN1BQKBR w HAha - 0 9", {25, + // 528, 13896, 326094, 9079829, 232750602}}, // 445 + // {"rnn1kbbr/ppppqp2/6p1/2N1p2p/P7/2P5/1P1PPPPP/RN1QKBBR w HAha - 2 9", + // {27, 801, 22088, 707078, 20334071, 682580976}}, // 446 + // {"rnnqkrbb/p1p1p1pp/1p3p2/8/3p2Q1/P1P1P3/1P1P1PPP/RNN1KRBB w FAfa - 0 9", + // {37, 1014, 34735, 998999, 32921537, 988770109}}, // 447 + // {"bbrnk1qr/1pppppp1/p4n1p/8/P2P2N1/8/1PP1PPPP/BBR1NKQR w HC - 1 9", {21, + // 481, 11213, 279993, 7015419, 187564853}}, // 448 + // {"brnbnkqr/1pp1p1p1/p2p1p2/7p/1P4PP/8/PBPPPP2/1RNBNKQR w HBhb - 0 9", + // {31, 743, 24260, 660177, 22391185, 653721389}}, // 449 + // {"br2kbqr/ppppp1pp/3n1p2/3P4/3n3P/3N4/PPP1PPP1/BR1NKBQR w HBhb - 3 9", + // {25, 872, 22039, 748726, 20281962, 685749952}}, // 450 + // {"br1nkqrb/ppppppp1/8/7p/4P3/n1P2PP1/PP1P3P/BRNNKQRB w GBgb - 0 9", {28, + // 607, 16934, 396483, 11607818, 294181806}}, // 451 + // {"rbbn1kqr/pp1pp1p1/2pn3p/5p2/5P2/1P1N4/PNPPP1PP/RBB2KQR w HAha - 1 9", + // {27, 725, 21543, 616082, 19239812, 581716972}}, // 452 + // {"rnbbnk1r/pp1ppp1p/6q1/2p5/PP4p1/4P3/2PP1PPP/RNBBNKQR w HAha - 1 9", + // {25, 1072, 26898, 1088978, 28469879, 1122703887}}, // 453 + // {"rnbnkbqr/1pp3pp/3p4/p3pp2/3P2P1/2N1N3/PPP1PP1P/R1B1KBQR w HAha - 0 9", + // {31, 1028, 32907, 1095472, 36025223, 1211187800}}, // 454 + // {"r1bnkqrb/1ppppppp/p3n3/8/6P1/4N3/PPPPPPRP/RNB1KQ1B w Aga - 1 9", {23, + // 457, 11416, 250551, 6666787, 159759052}}, // 455 + // {"rbn1bkqr/p1pp1pp1/1pn5/4p2p/7P/1PBP4/P1P1PPP1/RBNN1KQR w HAha - 0 9", + // {23, 470, 11649, 264274, 6963287, 172833738}}, // 456 + // {"rnnbbkqr/3ppppp/p7/1pp5/P6P/6P1/1PPPPP2/RNNBBKQR w HAha - 0 9", {26, + // 569, 15733, 375556, 11008114, 284485303}}, // 457 + // {"r1nk1bqr/1pppp1pp/2n5/p4p1b/5P2/1N4B1/PPPPP1PP/RN1K1BQR w HAha - 2 9", + // {25, 824, 21983, 738366, 20904119, 716170771}}, // 458 + // {"r1nkbqrb/p2pppp1/npp4p/8/4PP2/2N4P/PPPP2P1/R1NKBQRB w GAga - 0 9", {31, + // 548, 17480, 349633, 11469548, 255067638}}, // 459 + // {"rbnnkqbr/ppppp2p/5p2/6p1/2P1B3/P6P/1P1PPPP1/R1NNKQBR w HAha - 1 9", + // {31, 809, 24956, 680747, 21247414, 606221516}}, // 460 + // {"1r1bkqbr/pppp1ppp/2nnp3/8/2P5/N4P2/PP1PP1PP/1RNBKQBR w Hh - 0 9", {28, + // 810, 22844, 694599, 20188622, 636748147}}, // 461 + // {"rn1kqbbr/p1pppp1p/1p4p1/1n6/1P2P3/4Q2P/P1PP1PP1/RNNK1BBR w HAha - 1 9", + // {39, 848, 30100, 724426, 25594662, 659615710}}, // 462 + // {"rn1kqrbb/pppppppp/8/8/2nP2P1/1P2P3/P1P2P1P/RNNKQRBB w FAfa - 1 9", {29, + // 766, 21701, 567971, 16944425, 456898648}}, // 463 + // {"b1rnnkrq/bpppppp1/7p/8/1p6/2B5/PNPPPPPP/1BR1NKRQ w GCgc - 2 9", {25, + // 667, 17253, 472678, 12865247, 365621294}}, // 464 + // {"brnb1krq/pppppppp/8/5P2/2P1n2P/8/PP1PP1P1/BRNBNKRQ w GBgb - 1 9", {23, + // 620, 14882, 402561, 10776855, 300125003}}, // 465 + // {"b1nnkbrq/pr1pppp1/1p5p/2p5/P2N1P2/8/1PPPP1PP/BR1NKBRQ w GBg - 0 9", + // {24, 472, 12181, 267398, 7370758, 178605165}}, // 466 + // {"br1nkrqb/p1p1p1pp/3n4/1p1p1p2/5N1P/4P3/PPPP1PP1/BR1NKRQB w FBfb - 0 9", + // {24, 775, 19398, 624309, 16429837, 539767605}}, // 467 + // {"rbbnnkrq/p2pp1pp/2p5/5p2/1pPP1B2/P7/1P2PPPP/RB1NNKRQ w GAga - 0 9", + // {34, 921, 30474, 849933, 28095833, 806446436}}, // 468 + // {"rnbbnkr1/1p1ppp1p/2p3p1/p7/2Pq4/1P1P4/P2BPPPP/RN1BNKRQ w GAga - 2 9", + // {26, 1139, 29847, 1204863, 32825932, 1281760240}}, // 469 + // {"1rbnkbrq/pppppp2/n5pp/2P5/P7/4N3/1P1PPPPP/RNB1KBRQ w GAg - 2 9", {23, + // 574, 14146, 391413, 10203438, 301874034}}, // 470 + // {"1nbnkr1b/rppppppq/p7/7p/1P5P/3P2P1/P1P1PP2/RNBNKRQB w FAf - 1 9", {33, + // 823, 26696, 724828, 23266182, 672294132}}, // 471 + // {"rbn1bkrq/ppppp3/4n2p/5pp1/1PN5/2P5/P2PPPPP/RBN1BKRQ w GAga - 0 9", {27, + // 859, 24090, 796482, 23075785, 789152120}}, // 472 {"r1nbbkrq/1ppp2pp/2n2p2/p3p3/5P2/1N4BP/PPPPP1P1/RN1B1KRQ w GAga - 0 9", {25, 774, 20141, 618805, 16718577, 515864053}}, // 473 - {"rnnkbbrq/1pppp1p1/5p2/7p/p6P/3N1P2/PPPPP1PQ/RN1KBBR1 w GAga - 0 9", - {29, 673, 20098, 504715, 15545590, 416359581}}, // 474 - {"r1nkbrqb/pppp1p2/n3p1p1/7p/2P2P2/1P6/P2PPQPP/RNNKBR1B w FAfa - 0 9", - {27, 722, 21397, 593762, 18742426, 537750982}}, // 475 - {"rbnnkr1q/1ppp2pp/p4p2/P2bp3/4P2P/8/1PPP1PP1/RBNNKRBQ w FAfa - 1 9", - {26, 848, 23387, 741674, 21591790, 675163653}}, // 476 - {"rn1bkrb1/1ppppp1p/pn4p1/8/P2q3P/3P4/NPP1PPP1/RN1BKRBQ w FAfa - 1 9", - {22, 803, 18322, 632920, 15847763, 536419559}}, // 477 - {"rn1krbbq/pppp1npp/4pp2/8/4P2P/3P2P1/PPP2P2/RNNKRBBQ w EAea - 1 9", - {29, 810, 23968, 670500, 20361517, 575069358}}, // 478 - {"rnn1rqbb/ppkp1pp1/2p1p2p/2P5/8/3P1P2/PP2P1PP/RNNKRQBB w EA - 0 9", - {22, 506, 11973, 292344, 7287368, 189865944}}, // 479 - {"bbqr1knr/pppppp1p/8/4n1p1/2P1P3/6P1/PPQP1P1P/BB1RNKNR w HDhd - 0 9", - {26, 650, 18253, 481200, 14301029, 394943978}}, // 480 - {"bq1bnknr/pprppp1p/8/2p3p1/4PPP1/8/PPPP3P/BQRBNKNR w HCh - 0 9", - {24, 548, 14021, 347611, 9374021, 250988458}}, // 481 - {"bqrnkb1r/1p2pppp/p1pp3n/5Q2/2P4P/5N2/PP1PPPP1/B1RNKB1R w HChc - 0 9", - {46, 823, 33347, 673905, 26130444, 582880996}}, // 482 - {"bq1rknrb/pppppp1p/4n3/6p1/4P1P1/3P1P2/PPP4P/BQRNKNRB w GCg - 0 9", - {23, 618, 14815, 419474, 10606831, 315124518}}, // 483 - {"q1brnknr/pp1pp1p1/8/2p2p1p/5b2/P4N2/1PPPP1PP/QBBRK1NR w hd - 0 9", - {22, 675, 15778, 473994, 12077228, 368479752}}, // 484 - {"qrbbnknr/1p1ppp1p/p1p5/8/1P2P1p1/3P1B2/P1P2PPP/QRB1NKNR w HBhb - 0 9", - {32, 722, 24049, 569905, 19584539, 484814878}}, // 485 - {"qrb1kbnr/p3pppp/2n5/1ppp4/7P/3P1P2/PPP1P1PR/QRBNKBN1 w Bhb - 0 9", - {26, 831, 22606, 724505, 20500804, 662608969}}, // 486 - {"qrbnknrb/ppp1pp2/6p1/7p/PPNp4/8/2PPPPPP/QRB1KNRB w GBgb - 0 9", - {31, 840, 26762, 742772, 24422614, 701363800}}, // 487 - {"qbrnbknr/pp1pp1pp/8/2p2p2/3Q4/PP6/2PPPPPP/1BRNBKNR w HChc - 0 9", - {38, 1121, 39472, 1198438, 41108769, 1285503872}}, // 488 - {"qr1bbk1r/pppppp1p/1n6/5np1/4B3/1PP5/P2PPPPP/QRN1BKNR w HBhb - 0 9", - {25, 694, 16938, 472950, 12164609, 345122090}}, // 489 - {"qrnkbbnr/1p1pp2p/p7/2p1Npp1/6P1/7P/PPPPPP2/QR1KBBNR w HBhb - 0 9", - {27, 586, 16348, 393391, 11409633, 298054792}}, // 490 - {"qrnkbnrb/pp1p1p2/2p1p1pp/4N3/P4P2/8/1PPPP1PP/QR1KBNRB w GBgb - 0 9", - {32, 645, 20737, 460319, 15037464, 358531599}}, // 491 - {"qbrnknbr/1pppppp1/p6p/8/1P6/3PP3/PQP2PPP/1BRNKNBR w HChc - 3 9", - {26, 595, 16755, 415022, 12214768, 323518628}}, // 492 - {"qrnbk1br/1ppppp1p/p5p1/8/4Pn2/4K1P1/PPPP1P1P/QRNB1NBR w hb - 0 9", - {24, 609, 13776, 359415, 8538539, 230364479}}, // 493 - {"qrnk1bbr/1pnp1ppp/p1p1p3/8/3Q4/1P1N3P/P1PPPPP1/1RNK1BBR w HBhb - 0 9", - {43, 1106, 42898, 1123080, 41695761, 1113836402}}, // 494 - {"qrnknrb1/pppppp2/8/6pp/4P2P/3P1P2/PbP3P1/QRNKNRBB w FBfb - 0 9", - {24, 658, 17965, 488373, 14457245, 400971226}}, // 495 - {"bbrqnrk1/ppp2ppp/7n/3pp3/8/P4N1N/1PPPPPPP/BBRQ1RK1 w - - 1 9", - {22, 503, 12078, 310760, 8080951, 224960353}}, // 496 - {"brqbnk1r/1ppp1ppp/8/p3pn2/8/2PP1P2/PP2PKPP/BRQBN1NR w hb - 1 9", - {25, 745, 19387, 570459, 15520298, 460840861}}, // 497 - {"brqnkbnr/pp2pp1p/3p4/2p5/5p2/3P3P/PPP1PPP1/B1RNKBNR w Hhb - 0 9", - {19, 516, 10755, 312996, 6995034, 214340699}}, // 498 - {"brq1kn1b/1ppppprp/2n3p1/p7/P1N5/6P1/1PPPPP1P/BRQNK1RB w GBb - 2 9", - {29, 557, 16739, 352277, 10840256, 249999654}}, // 499 - {"rbbq1k1r/ppp1pppp/7n/1n1p4/5P2/P2P4/1PPBP1PP/RB1QNKNR w HAha - 1 9", - {25, 769, 20110, 638340, 17438715, 570893953}}, // 500 - {"r1bbnk1r/qpp1pppp/p6n/3p4/1P6/5N1P/P1PPPPP1/RQBBK1NR w ha - 0 9", - {23, 728, 18209, 587364, 16053564, 529082811}}, // 501 - {"rqbnkbnr/1pp2p1p/3p4/p3p1p1/8/2P2P2/PP1PPNPP/RQBNKB1R w HAha - 0 9", - {26, 772, 21903, 653704, 19571559, 593915677}}, // 502 - {"r1bnknrb/pqppp1p1/1p5p/5p2/7P/3P2N1/PPP1PPP1/RQBNK1RB w GAga - 2 9", - {27, 748, 20291, 597105, 16324542, 506453626}}, // 503 - {"rbqnbknr/pp1pppp1/8/2p5/3P3p/5N1P/PPP1PPPR/RBQNBK2 w Aha - 0 9", - {30, 859, 26785, 819631, 26363334, 842796987}}, // 504 - {"rqnbbrk1/ppppppp1/8/5n1p/3P3P/2B3P1/PPP1PP2/RQNB1KNR w HA - 0 9", - {22, 505, 11452, 283464, 7055215, 186760784}}, // 505 - {"rqnkbbnr/pp2p1p1/8/2pp1p1p/3PPP2/8/PPP1N1PP/RQNKBB1R w HAha - 0 9", - {28, 832, 23142, 722857, 20429246, 663183060}}, // 506 - {"rqnkbnr1/pppp2bp/6p1/4pp2/1P2P3/3NN3/P1PP1PPP/RQ1KB1RB w GAga - 0 9", - {28, 641, 18835, 459993, 14038570, 364210162}}, // 507 - {"rbq2kbr/pppppppp/2n5/P7/3P1n2/2P5/1P2PPPP/RBQNKNBR w HA - 1 9", - {31, 889, 27028, 766181, 24299415, 692180754}}, // 508 - {"rq1bkn1r/ppppp2p/3n4/5pp1/2b3P1/1N1P1P2/PPP1P2P/RQ1BKNBR w HAha - 1 9", - {28, 810, 22667, 657520, 18719949, 556282676}}, // 509 - {"r1nknbbr/p2ppp1p/1pp3p1/8/1P6/4P3/P1PPNPPq/R1QKNBBR w HAha - 0 9", - {24, 797, 22144, 719069, 21862776, 716521139}}, // 510 - {"rqnknrbb/ppp1p3/5ppp/2Np4/2P5/4P3/PP1P1PPP/RQNK1RBB w FAfa - 0 9", - {34, 686, 23277, 515541, 17664543, 423574794}}, // 511 - {"1brnqknr/2p1pppp/p2p4/1P6/6P1/4Nb2/PP1PPP1P/BBR1QKNR w HChc - 1 9", - {34, 1019, 32982, 1003103, 33322477, 1043293394}}, // 512 - {"brn1qknr/1p1pppp1/pb5p/Q1p5/3P3P/8/PPP1PPPR/BRNB1KN1 w Bhb - 2 9", - {32, 642, 20952, 464895, 15454749, 371861782}}, // 513 - {"brnqkbnr/pppppp2/8/6pp/6P1/P2P1P2/1PP1P2P/BRNQKBNR w HBhb - 0 9", - {20, 441, 9782, 240220, 5770284, 153051835}}, // 514 - {"2nqknrb/1rpppppp/5B2/pp6/1PP1b3/3P4/P3PPPP/1RNQKNRB w GBg - 1 9", - {35, 1042, 36238, 1101159, 38505058, 1202668717}}, // 515 - {"rb1nqknr/1pp1pppp/8/3p4/p2P4/6PN/PPPQPP1P/RBBN1K1R w HAha - 0 9", - {29, 692, 21237, 555018, 17820605, 497251206}}, // 516 - {"rnbbqknr/pppp4/5p2/4p1pp/P7/2N2PP1/1PPPP2P/R1BBQKNR w HAha - 0 9", - {23, 595, 14651, 415772, 10881112, 329010121}}, // 517 - {"rn1qkbnr/p1p1pp1p/bp4p1/3p4/1P6/4P3/P1PP1PPP/RNBQKBNR w HAha - 0 9", - {30, 794, 24319, 690811, 21657601, 647745807}}, // 518 - {"r1bqk1rb/pppnpppp/5n2/3p4/2P3PP/2N5/PP1PPP2/R1BQKNRB w GAga - 1 9", - {32, 821, 27121, 733155, 24923473, 710765657}}, // 519 - {"rbnqbknr/1p1ppp1p/6p1/p1p5/7P/3P4/PPP1PPP1/RBNQBKNR w HAha - 0 9", - {24, 720, 18842, 575027, 15992882, 501093456}}, // 520 - {"r1qbbk1r/pp1ppppp/n1p5/5n2/B1P3P1/8/PP1PPP1P/RNQ1BKNR w HAha - 0 9", - {27, 831, 22293, 698986, 19948650, 637973209}}, // 521 - {"rnqkbb1r/p1pppppp/8/8/1p4n1/PP4PP/2PPPP2/RNQKBBNR w HAha - 0 9", - {18, 463, 9519, 256152, 6065231, 172734380}}, // 522 - {"rnqk1nrb/pppbpp2/7p/3p2p1/4B3/2N1N1P1/PPPPPP1P/R1QKB1R1 w GAga - 0 9", - {34, 1171, 38128, 1318217, 42109356, 1465473753}}, // 523 - {"rbnqknbr/1pp1ppp1/3p4/7p/p2P2PP/2P5/PP2PP2/RBNQKNBR w HAha - 0 9", - {32, 867, 28342, 798722, 26632459, 781067145}}, // 524 - {"rn1bknbr/pq2pppp/1p6/2pp4/P7/1P1P4/2PNPPPP/RNQBK1BR w HAha - 0 9", - {24, 627, 16652, 462942, 13200921, 385193532}}, // 525 - {"r1qk1bbr/ppp1pp1p/2np1n2/6p1/2PP4/3BP3/PP3PPP/RNQKN1BR w HAha - 2 9", - {31, 992, 30213, 986631, 30397368, 1011631987}}, // 526 - {"r1qknrbb/pppp1p2/2n3p1/4p2p/8/QPP5/P1NPPPPP/RN1K1RBB w FAfa - 2 9", - {30, 702, 21563, 532939, 16813114, 438096194}}, // 527 - {"bbkr1qnr/2pppppp/2n5/pp6/8/PPN5/1BPPPPPP/1BR1KQNR w HC - 2 9", - {25, 573, 15183, 380910, 10554668, 283975400}}, // 528 - {"1rnbkqnr/1bpppppp/1p6/7P/p2P4/5P2/PPP1P1P1/BRNBKQNR w HBhb - 0 9", - {21, 503, 11790, 301084, 7679979, 207799378}}, // 529 - {"brnkqbnr/2p1pppp/1p6/3p4/1pP5/P6P/3PPPP1/BRNKQBNR w HBhb - 0 9", - {28, 743, 21054, 587192, 17354516, 507176753}}, // 530 - {"br1kqnrb/npp1pppp/8/3p4/p4N2/PP6/2PPPPPP/BR1KQNRB w GBgb - 0 9", - {31, 808, 25585, 698475, 22376575, 640362920}}, // 531 - {"rbbnkq1r/pppppp1p/7n/6p1/P5P1/2P2N2/1P1PPP1P/RBBNKQ1R w HAha - 1 9", - {29, 580, 17585, 404831, 12730970, 325226128}}, // 532 - {"rnbbk1nr/pp2qppp/2ppp3/8/3P4/P1N4N/1PP1PPPP/R1BBKQ1R w HAha - 0 9", - {29, 838, 24197, 721884, 21100580, 646624429}}, // 533 - {"rnbk1b1r/ppppn1pp/4pp2/7q/7P/P5PB/1PPPPP2/RNBKQ1NR w HAha - 3 9", - {20, 729, 16633, 576199, 14507076, 498621813}}, // 534 - {"r2kqnrb/pbppppp1/np5p/8/4Q1P1/3P4/PPP1PP1P/RNBK1NRB w GAga - 2 9", - {47, 1219, 55009, 1486353, 65239153, 1834391369}}, // 535 - {"rbnkbq1r/p1p2ppp/1p2pn2/3p4/P3P3/3P4/1PP1KPPP/RBN1BQNR w ha - 2 9", - {29, 923, 27179, 883866, 26202752, 868565895}}, // 536 - {"rk1bb1nr/ppppqppp/n7/1N2p3/6P1/7N/PPPPPP1P/R1KBBQ1R w HA - 6 9", - {27, 703, 19478, 559525, 16049807, 492966455}}, // 537 - {"rnkqbbnr/p1ppp2p/1p4p1/8/1B3p1P/2NP4/PPP1PPP1/R1KQ1BNR w HAha - 0 9", - {29, 610, 18855, 438277, 14020041, 355083962}}, // 538 - {"rnkqb1rb/pp1p1ppp/4p3/2P3n1/8/1PP5/P3PPPP/RNKQBNRB w GAga - 0 9", - {29, 675, 20699, 535821, 17000613, 476598337}}, // 539 - {"rb1kqnbr/pp1pp1p1/1np2p2/7p/P1P3PP/8/1P1PPP2/RBNKQNBR w HAha - 0 9", - {31, 1077, 33661, 1183381, 37415304, 1328374620}}, // 540 - {"rnkbq1br/ppp2ppp/3p4/Q3p1n1/5P2/3P2P1/PPP1P2P/RNKB1NBR w HAha - 0 9", - {41, 1201, 46472, 1420367, 52991625, 1675608008}}, // 541 - {"rn1qnbbr/pp2pppp/2ppk3/8/2PP4/3Q1N2/PP2PPPP/RNK2BBR w HA - 1 9", - {34, 666, 22474, 472299, 15860369, 353831792}}, // 542 - {"rnkqnr1b/ppppp1pp/5p2/8/Q1P2P2/8/PP1P2PP/RbK1NRBB w FAfa - 0 9", - {36, 876, 31987, 788580, 29022529, 736717252}}, // 543 - {"bbrn1nqr/ppp1k1pp/5p2/3pp3/7P/3PN3/PPP1PPP1/BBRK1NQR w - - 1 9", - {24, 583, 15063, 383532, 10522064, 280707118}}, // 544 - {"brnbkn1r/1pppp1p1/4q3/p4p1p/7P/1N3P2/PPPPP1PQ/BR1BKN1R w HBhb - 2 9", - {27, 935, 26120, 885699, 26000648, 873063158}}, // 545 - {"br1knbqr/pp2p1pp/1n6/2pp1p2/6P1/2P4B/PP1PPPQP/BRNKN2R w HBhb - 0 9", - {27, 681, 19202, 510687, 14954779, 415624943}}, // 546 - {"brnk1qrb/p1ppppp1/1p5p/8/P3n3/1N4P1/1PPPPPRP/BR1KNQ1B w Bgb - 0 9", - {22, 638, 13991, 412346, 9760752, 293499724}}, // 547 - {"rbbnknqr/pppp3p/5pp1/8/1P1pP3/7P/P1P2PP1/RBBNKNQR w HAha - 0 9", - {29, 756, 21616, 614074, 17602252, 528140595}}, // 548 - {"1nbbknqr/rpp1ppp1/1Q1p3p/p7/2P2PP1/8/PP1PP2P/RNBBKN1R w HAh - 2 9", - {37, 977, 34977, 944867, 33695089, 940198007}}, // 549 + // {"rnnkbbrq/1pppp1p1/5p2/7p/p6P/3N1P2/PPPPP1PQ/RN1KBBR1 w GAga - 0 9", + // {29, 673, 20098, 504715, 15545590, 416359581}}, // 474 + // {"r1nkbrqb/pppp1p2/n3p1p1/7p/2P2P2/1P6/P2PPQPP/RNNKBR1B w FAfa - 0 9", + // {27, 722, 21397, 593762, 18742426, 537750982}}, // 475 + // {"rbnnkr1q/1ppp2pp/p4p2/P2bp3/4P2P/8/1PPP1PP1/RBNNKRBQ w FAfa - 1 9", + // {26, 848, 23387, 741674, 21591790, 675163653}}, // 476 + // {"rn1bkrb1/1ppppp1p/pn4p1/8/P2q3P/3P4/NPP1PPP1/RN1BKRBQ w FAfa - 1 9", + // {22, 803, 18322, 632920, 15847763, 536419559}}, // 477 + // {"rn1krbbq/pppp1npp/4pp2/8/4P2P/3P2P1/PPP2P2/RNNKRBBQ w EAea - 1 9", {29, + // 810, 23968, 670500, 20361517, 575069358}}, // 478 + // {"rnn1rqbb/ppkp1pp1/2p1p2p/2P5/8/3P1P2/PP2P1PP/RNNKRQBB w EA - 0 9", {22, + // 506, 11973, 292344, 7287368, 189865944}}, // 479 + // {"bbqr1knr/pppppp1p/8/4n1p1/2P1P3/6P1/PPQP1P1P/BB1RNKNR w HDhd - 0 9", + // {26, 650, 18253, 481200, 14301029, 394943978}}, // 480 + // {"bq1bnknr/pprppp1p/8/2p3p1/4PPP1/8/PPPP3P/BQRBNKNR w HCh - 0 9", {24, + // 548, 14021, 347611, 9374021, 250988458}}, // 481 + // {"bqrnkb1r/1p2pppp/p1pp3n/5Q2/2P4P/5N2/PP1PPPP1/B1RNKB1R w HChc - 0 9", + // {46, 823, 33347, 673905, 26130444, 582880996}}, // 482 + // {"bq1rknrb/pppppp1p/4n3/6p1/4P1P1/3P1P2/PPP4P/BQRNKNRB w GCg - 0 9", {23, + // 618, 14815, 419474, 10606831, 315124518}}, // 483 + // {"q1brnknr/pp1pp1p1/8/2p2p1p/5b2/P4N2/1PPPP1PP/QBBRK1NR w hd - 0 9", {22, + // 675, 15778, 473994, 12077228, 368479752}}, // 484 + // {"qrbbnknr/1p1ppp1p/p1p5/8/1P2P1p1/3P1B2/P1P2PPP/QRB1NKNR w HBhb - 0 9", + // {32, 722, 24049, 569905, 19584539, 484814878}}, // 485 + // {"qrb1kbnr/p3pppp/2n5/1ppp4/7P/3P1P2/PPP1P1PR/QRBNKBN1 w Bhb - 0 9", {26, + // 831, 22606, 724505, 20500804, 662608969}}, // 486 + // {"qrbnknrb/ppp1pp2/6p1/7p/PPNp4/8/2PPPPPP/QRB1KNRB w GBgb - 0 9", {31, + // 840, 26762, 742772, 24422614, 701363800}}, // 487 + // {"qbrnbknr/pp1pp1pp/8/2p2p2/3Q4/PP6/2PPPPPP/1BRNBKNR w HChc - 0 9", {38, + // 1121, 39472, 1198438, 41108769, 1285503872}}, // 488 + // {"qr1bbk1r/pppppp1p/1n6/5np1/4B3/1PP5/P2PPPPP/QRN1BKNR w HBhb - 0 9", + // {25, 694, 16938, 472950, 12164609, 345122090}}, // 489 + // {"qrnkbbnr/1p1pp2p/p7/2p1Npp1/6P1/7P/PPPPPP2/QR1KBBNR w HBhb - 0 9", {27, + // 586, 16348, 393391, 11409633, 298054792}}, // 490 + // {"qrnkbnrb/pp1p1p2/2p1p1pp/4N3/P4P2/8/1PPPP1PP/QR1KBNRB w GBgb - 0 9", + // {32, 645, 20737, 460319, 15037464, 358531599}}, // 491 + // {"qbrnknbr/1pppppp1/p6p/8/1P6/3PP3/PQP2PPP/1BRNKNBR w HChc - 3 9", {26, + // 595, 16755, 415022, 12214768, 323518628}}, // 492 + // {"qrnbk1br/1ppppp1p/p5p1/8/4Pn2/4K1P1/PPPP1P1P/QRNB1NBR w hb - 0 9", {24, + // 609, 13776, 359415, 8538539, 230364479}}, // 493 + // {"qrnk1bbr/1pnp1ppp/p1p1p3/8/3Q4/1P1N3P/P1PPPPP1/1RNK1BBR w HBhb - 0 9", + // {43, 1106, 42898, 1123080, 41695761, 1113836402}}, // 494 + // {"qrnknrb1/pppppp2/8/6pp/4P2P/3P1P2/PbP3P1/QRNKNRBB w FBfb - 0 9", {24, + // 658, 17965, 488373, 14457245, 400971226}}, // 495 + // {"bbrqnrk1/ppp2ppp/7n/3pp3/8/P4N1N/1PPPPPPP/BBRQ1RK1 w - - 1 9", {22, + // 503, 12078, 310760, 8080951, 224960353}}, // 496 + // {"brqbnk1r/1ppp1ppp/8/p3pn2/8/2PP1P2/PP2PKPP/BRQBN1NR w hb - 1 9", {25, + // 745, 19387, 570459, 15520298, 460840861}}, // 497 + // {"brqnkbnr/pp2pp1p/3p4/2p5/5p2/3P3P/PPP1PPP1/B1RNKBNR w Hhb - 0 9", {19, + // 516, 10755, 312996, 6995034, 214340699}}, // 498 + // {"brq1kn1b/1ppppprp/2n3p1/p7/P1N5/6P1/1PPPPP1P/BRQNK1RB w GBb - 2 9", + // {29, 557, 16739, 352277, 10840256, 249999654}}, // 499 + // {"rbbq1k1r/ppp1pppp/7n/1n1p4/5P2/P2P4/1PPBP1PP/RB1QNKNR w HAha - 1 9", + // {25, 769, 20110, 638340, 17438715, 570893953}}, // 500 + // {"r1bbnk1r/qpp1pppp/p6n/3p4/1P6/5N1P/P1PPPPP1/RQBBK1NR w ha - 0 9", {23, + // 728, 18209, 587364, 16053564, 529082811}}, // 501 + // {"rqbnkbnr/1pp2p1p/3p4/p3p1p1/8/2P2P2/PP1PPNPP/RQBNKB1R w HAha - 0 9", + // {26, 772, 21903, 653704, 19571559, 593915677}}, // 502 + // {"r1bnknrb/pqppp1p1/1p5p/5p2/7P/3P2N1/PPP1PPP1/RQBNK1RB w GAga - 2 9", + // {27, 748, 20291, 597105, 16324542, 506453626}}, // 503 + // {"rbqnbknr/pp1pppp1/8/2p5/3P3p/5N1P/PPP1PPPR/RBQNBK2 w Aha - 0 9", {30, + // 859, 26785, 819631, 26363334, 842796987}}, // 504 + // {"rqnbbrk1/ppppppp1/8/5n1p/3P3P/2B3P1/PPP1PP2/RQNB1KNR w HA - 0 9", {22, + // 505, 11452, 283464, 7055215, 186760784}}, // 505 + // {"rqnkbbnr/pp2p1p1/8/2pp1p1p/3PPP2/8/PPP1N1PP/RQNKBB1R w HAha - 0 9", + // {28, 832, 23142, 722857, 20429246, 663183060}}, // 506 + // {"rqnkbnr1/pppp2bp/6p1/4pp2/1P2P3/3NN3/P1PP1PPP/RQ1KB1RB w GAga - 0 9", + // {28, 641, 18835, 459993, 14038570, 364210162}}, // 507 + // {"rbq2kbr/pppppppp/2n5/P7/3P1n2/2P5/1P2PPPP/RBQNKNBR w HA - 1 9", {31, + // 889, 27028, 766181, 24299415, 692180754}}, // 508 + // {"rq1bkn1r/ppppp2p/3n4/5pp1/2b3P1/1N1P1P2/PPP1P2P/RQ1BKNBR w HAha - 1 9", + // {28, 810, 22667, 657520, 18719949, 556282676}}, // 509 + // {"r1nknbbr/p2ppp1p/1pp3p1/8/1P6/4P3/P1PPNPPq/R1QKNBBR w HAha - 0 9", {24, + // 797, 22144, 719069, 21862776, 716521139}}, // 510 + // {"rqnknrbb/ppp1p3/5ppp/2Np4/2P5/4P3/PP1P1PPP/RQNK1RBB w FAfa - 0 9", {34, + // 686, 23277, 515541, 17664543, 423574794}}, // 511 + // {"1brnqknr/2p1pppp/p2p4/1P6/6P1/4Nb2/PP1PPP1P/BBR1QKNR w HChc - 1 9", + // {34, 1019, 32982, 1003103, 33322477, 1043293394}}, // 512 + // {"brn1qknr/1p1pppp1/pb5p/Q1p5/3P3P/8/PPP1PPPR/BRNB1KN1 w Bhb - 2 9", {32, + // 642, 20952, 464895, 15454749, 371861782}}, // 513 + // {"brnqkbnr/pppppp2/8/6pp/6P1/P2P1P2/1PP1P2P/BRNQKBNR w HBhb - 0 9", {20, + // 441, 9782, 240220, 5770284, 153051835}}, // 514 + // {"2nqknrb/1rpppppp/5B2/pp6/1PP1b3/3P4/P3PPPP/1RNQKNRB w GBg - 1 9", {35, + // 1042, 36238, 1101159, 38505058, 1202668717}}, // 515 + // {"rb1nqknr/1pp1pppp/8/3p4/p2P4/6PN/PPPQPP1P/RBBN1K1R w HAha - 0 9", {29, + // 692, 21237, 555018, 17820605, 497251206}}, // 516 + // {"rnbbqknr/pppp4/5p2/4p1pp/P7/2N2PP1/1PPPP2P/R1BBQKNR w HAha - 0 9", {23, + // 595, 14651, 415772, 10881112, 329010121}}, // 517 + // {"rn1qkbnr/p1p1pp1p/bp4p1/3p4/1P6/4P3/P1PP1PPP/RNBQKBNR w HAha - 0 9", + // {30, 794, 24319, 690811, 21657601, 647745807}}, // 518 + // {"r1bqk1rb/pppnpppp/5n2/3p4/2P3PP/2N5/PP1PPP2/R1BQKNRB w GAga - 1 9", + // {32, 821, 27121, 733155, 24923473, 710765657}}, // 519 + // {"rbnqbknr/1p1ppp1p/6p1/p1p5/7P/3P4/PPP1PPP1/RBNQBKNR w HAha - 0 9", {24, + // 720, 18842, 575027, 15992882, 501093456}}, // 520 + // {"r1qbbk1r/pp1ppppp/n1p5/5n2/B1P3P1/8/PP1PPP1P/RNQ1BKNR w HAha - 0 9", + // {27, 831, 22293, 698986, 19948650, 637973209}}, // 521 + // {"rnqkbb1r/p1pppppp/8/8/1p4n1/PP4PP/2PPPP2/RNQKBBNR w HAha - 0 9", {18, + // 463, 9519, 256152, 6065231, 172734380}}, // 522 + // {"rnqk1nrb/pppbpp2/7p/3p2p1/4B3/2N1N1P1/PPPPPP1P/R1QKB1R1 w GAga - 0 9", + // {34, 1171, 38128, 1318217, 42109356, 1465473753}}, // 523 + // {"rbnqknbr/1pp1ppp1/3p4/7p/p2P2PP/2P5/PP2PP2/RBNQKNBR w HAha - 0 9", {32, + // 867, 28342, 798722, 26632459, 781067145}}, // 524 + // {"rn1bknbr/pq2pppp/1p6/2pp4/P7/1P1P4/2PNPPPP/RNQBK1BR w HAha - 0 9", {24, + // 627, 16652, 462942, 13200921, 385193532}}, // 525 + // {"r1qk1bbr/ppp1pp1p/2np1n2/6p1/2PP4/3BP3/PP3PPP/RNQKN1BR w HAha - 2 9", + // {31, 992, 30213, 986631, 30397368, 1011631987}}, // 526 + // {"r1qknrbb/pppp1p2/2n3p1/4p2p/8/QPP5/P1NPPPPP/RN1K1RBB w FAfa - 2 9", + // {30, 702, 21563, 532939, 16813114, 438096194}}, // 527 + // {"bbkr1qnr/2pppppp/2n5/pp6/8/PPN5/1BPPPPPP/1BR1KQNR w HC - 2 9", {25, + // 573, 15183, 380910, 10554668, 283975400}}, // 528 + // {"1rnbkqnr/1bpppppp/1p6/7P/p2P4/5P2/PPP1P1P1/BRNBKQNR w HBhb - 0 9", {21, + // 503, 11790, 301084, 7679979, 207799378}}, // 529 + // {"brnkqbnr/2p1pppp/1p6/3p4/1pP5/P6P/3PPPP1/BRNKQBNR w HBhb - 0 9", {28, + // 743, 21054, 587192, 17354516, 507176753}}, // 530 + // {"br1kqnrb/npp1pppp/8/3p4/p4N2/PP6/2PPPPPP/BR1KQNRB w GBgb - 0 9", {31, + // 808, 25585, 698475, 22376575, 640362920}}, // 531 + // {"rbbnkq1r/pppppp1p/7n/6p1/P5P1/2P2N2/1P1PPP1P/RBBNKQ1R w HAha - 1 9", + // {29, 580, 17585, 404831, 12730970, 325226128}}, // 532 + // {"rnbbk1nr/pp2qppp/2ppp3/8/3P4/P1N4N/1PP1PPPP/R1BBKQ1R w HAha - 0 9", + // {29, 838, 24197, 721884, 21100580, 646624429}}, // 533 + // {"rnbk1b1r/ppppn1pp/4pp2/7q/7P/P5PB/1PPPPP2/RNBKQ1NR w HAha - 3 9", {20, + // 729, 16633, 576199, 14507076, 498621813}}, // 534 + // {"r2kqnrb/pbppppp1/np5p/8/4Q1P1/3P4/PPP1PP1P/RNBK1NRB w GAga - 2 9", {47, + // 1219, 55009, 1486353, 65239153, 1834391369}}, // 535 + // {"rbnkbq1r/p1p2ppp/1p2pn2/3p4/P3P3/3P4/1PP1KPPP/RBN1BQNR w ha - 2 9", + // {29, 923, 27179, 883866, 26202752, 868565895}}, // 536 + // {"rk1bb1nr/ppppqppp/n7/1N2p3/6P1/7N/PPPPPP1P/R1KBBQ1R w HA - 6 9", {27, + // 703, 19478, 559525, 16049807, 492966455}}, // 537 + // {"rnkqbbnr/p1ppp2p/1p4p1/8/1B3p1P/2NP4/PPP1PPP1/R1KQ1BNR w HAha - 0 9", + // {29, 610, 18855, 438277, 14020041, 355083962}}, // 538 + // {"rnkqb1rb/pp1p1ppp/4p3/2P3n1/8/1PP5/P3PPPP/RNKQBNRB w GAga - 0 9", {29, + // 675, 20699, 535821, 17000613, 476598337}}, // 539 + // {"rb1kqnbr/pp1pp1p1/1np2p2/7p/P1P3PP/8/1P1PPP2/RBNKQNBR w HAha - 0 9", + // {31, 1077, 33661, 1183381, 37415304, 1328374620}}, // 540 + // {"rnkbq1br/ppp2ppp/3p4/Q3p1n1/5P2/3P2P1/PPP1P2P/RNKB1NBR w HAha - 0 9", + // {41, 1201, 46472, 1420367, 52991625, 1675608008}}, // 541 + // {"rn1qnbbr/pp2pppp/2ppk3/8/2PP4/3Q1N2/PP2PPPP/RNK2BBR w HA - 1 9", {34, + // 666, 22474, 472299, 15860369, 353831792}}, // 542 + // {"rnkqnr1b/ppppp1pp/5p2/8/Q1P2P2/8/PP1P2PP/RbK1NRBB w FAfa - 0 9", {36, + // 876, 31987, 788580, 29022529, 736717252}}, // 543 + // {"bbrn1nqr/ppp1k1pp/5p2/3pp3/7P/3PN3/PPP1PPP1/BBRK1NQR w - - 1 9", {24, + // 583, 15063, 383532, 10522064, 280707118}}, // 544 + // {"brnbkn1r/1pppp1p1/4q3/p4p1p/7P/1N3P2/PPPPP1PQ/BR1BKN1R w HBhb - 2 9", + // {27, 935, 26120, 885699, 26000648, 873063158}}, // 545 + // {"br1knbqr/pp2p1pp/1n6/2pp1p2/6P1/2P4B/PP1PPPQP/BRNKN2R w HBhb - 0 9", + // {27, 681, 19202, 510687, 14954779, 415624943}}, // 546 + // {"brnk1qrb/p1ppppp1/1p5p/8/P3n3/1N4P1/1PPPPPRP/BR1KNQ1B w Bgb - 0 9", + // {22, 638, 13991, 412346, 9760752, 293499724}}, // 547 + // {"rbbnknqr/pppp3p/5pp1/8/1P1pP3/7P/P1P2PP1/RBBNKNQR w HAha - 0 9", {29, + // 756, 21616, 614074, 17602252, 528140595}}, // 548 + // {"1nbbknqr/rpp1ppp1/1Q1p3p/p7/2P2PP1/8/PP1PP2P/RNBBKN1R w HAh - 2 9", + // {37, 977, 34977, 944867, 33695089, 940198007}}, // 549 {"rnb2bqr/ppkpppp1/3n3p/2p5/6PP/2N2P2/PPPPP3/R1BKNBQR w HA - 2 9", {30, 647, 20365, 467780, 15115531, 369257622}}, // 550 - {"rn1k1qrb/p1pppppp/bp6/8/4n3/P4BPP/1PPPPP2/RNBKNQR1 w GAga - 2 9", - {22, 670, 14998, 451517, 11199653, 339919682}}, // 551 - {"rb2bnqr/nppkpppp/3p4/p7/1P6/P2N2P1/2PPPP1P/RB1KBNQR w HA - 3 9", - {22, 479, 11475, 264739, 6831555, 167329117}}, // 552 - {"r1kbb1qr/2pppppp/np2n3/p7/2P3P1/8/PP1PPPQP/RNKBBN1R w HAha - 1 9", - {32, 723, 23953, 581832, 19472074, 504622114}}, // 553 - {"rnknbb1r/p1ppp1pp/8/1p1P1p1q/8/P1P5/1P2PPPP/RNKNBBQR w HAha - 1 9", - {19, 607, 12733, 417451, 9753617, 325177085}}, // 554 - {"rnkn1qrb/pp1bp1pp/2p5/1N1p1p2/8/2P5/PPKPPPPP/R2NBQRB w ga - 2 9", - {27, 533, 14549, 330747, 9206957, 232664675}}, // 555 - {"r1nknqbr/pp2p1pp/2p2p2/3p4/6P1/PP1P4/2P1PP1b/RBNKNQBR w HAha - 0 9", - {20, 582, 13777, 409166, 10708639, 326565393}}, // 556 - {"rnkb1qbr/p1pp1p1p/1p2pn2/1Q4p1/4P3/N4P2/PPPP2PP/R1KBN1BR w HAha - 0 9", - {40, 1038, 39356, 1051441, 39145902, 1079612614}}, // 557 - {"rn2qbbr/1pkppp1p/p3n1p1/8/8/2P2P2/PP1PP1PP/RNKN1BBR w HA - 0 9", - {24, 605, 14888, 385964, 9687507, 260874068}}, // 558 - {"rn1nqrbb/p1kppp1p/8/1pp3p1/1P6/2N1P3/P1PP1PPP/RK1NQRBB w - - 0 9", - {21, 540, 12489, 337997, 8436136, 237525904}}, // 559 - {"bbrnknrq/1pp3pp/p2p1p2/4p3/P7/1P2N3/2PPPPPP/BBRN1RKQ w gc - 0 9", - {24, 527, 13900, 326175, 9139962, 226253685}}, // 560 - {"brnb1nrq/pppp1kpp/4p3/8/5p1P/P1P3P1/1P1PPP2/BRNBKNRQ w GB - 1 9", - {29, 773, 23904, 638768, 20503775, 560338709}}, // 561 - {"br1k1brq/ppppp2p/1n1n1pp1/8/P1P5/3P2P1/1P2PP1P/BRNKNBRQ w GBgb - 0 9", - {28, 811, 23550, 664880, 19913758, 565143976}}, // 562 - {"1r1knrqb/n1pppppp/p1b5/1p6/8/3N1P2/PPPPP1PP/BRNK1RQB w fb - 3 9", - {29, 753, 23210, 620019, 20044474, 558383603}}, // 563 - {"rbbnk1rq/pppppppp/8/3Pn3/8/4P1P1/PPP2P1P/RBBNKNRQ w GAga - 1 9", - {22, 551, 12619, 324608, 8204171, 217689974}}, // 564 - {"rnbbk1rq/2pppp1p/p3n1p1/1p6/P3N3/8/1PPPPPPP/RNBB1KRQ w ga - 0 9", - {26, 742, 20061, 599527, 16787080, 525678162}}, // 565 - {"rnbkn1rq/ppppppb1/6p1/7p/2B2P2/1P2P3/P1PP2PP/RNBKN1RQ w GAga - 1 9", - {28, 799, 23210, 689436, 20755098, 639632905}}, // 566 - {"rn1knrqb/p2pppp1/b1p5/1p5p/2P2P2/1P6/P2PP1PP/RNBKNRQB w FAfa - 1 9", - {30, 579, 18481, 397545, 13257198, 311282465}}, // 567 - {"rbnkbnrq/pp2p1Np/2p2p2/8/3p4/8/PPPPPPPP/RBNKBR1Q w Aga - 0 9", - {23, 670, 16435, 501883, 13012378, 411860744}}, // 568 - {"rk1bbnrq/ppp1pppp/n7/3p4/5P2/3P2NP/PPP1P1P1/RNKBB1RQ w GA - 0 9", - {26, 597, 16238, 402506, 11269462, 296701249}}, // 569 - {"r1knbbrq/pppp2p1/2n1p2p/5p2/4P3/P1PP4/1P3PPP/RNKNBBRQ w GAga - 1 9", - {20, 596, 13091, 399069, 9416862, 293659781}}, // 570 - {"rnknbrqb/p1p1pp1p/3p4/1p1N2p1/8/N7/PPPPPPPP/1RK1BRQB w Ffa - 0 9", - {26, 724, 18942, 552040, 15257204, 461293885}}, // 571 - {"rbnknrb1/1p1ppp1p/p1p3p1/8/1P3P2/1R6/PqPPP1PP/RBNKN1BQ w Afa - 0 9", - {31, 1183, 34723, 1289502, 38722152, 1421492227}}, // 572 - {"rnkbnrbq/2p1ppp1/p7/1p1p3p/3P4/1P4P1/P1P1PP1P/RNKBNRBQ w FAfa - 0 9", - {24, 506, 12748, 301464, 8086100, 207129256}}, // 573 - {"r1knrbbq/pp1ppppp/2p1n3/8/2P3P1/P7/1PKPPP1P/RN1NRBBQ w ea - 0 9", - {28, 570, 16037, 352471, 10278695, 242592363}}, // 574 - {"rnknrq1b/ppp1p1p1/4b3/3p1p1p/6P1/P4P2/1PPPPQ1P/RNKNR1BB w EAea - 2 9", - {30, 739, 23124, 594962, 19252739, 521629794}}, // 575 - {"bbqr1krn/pppp1p1p/5n2/4p1p1/3P4/P3QP2/1PP1P1PP/BB1RNKRN w GDgd - 0 9", - {31, 799, 25627, 674913, 22172123, 609277274}}, // 576 - {"bq1b1krn/pp1ppppp/3n4/2r5/3p3N/6N1/PPP1PPPP/BQRB1KR1 w GCg - 2 9", - {21, 798, 18571, 688429, 17546069, 647165916}}, // 577 - {"bqrnkbrn/2pp1pp1/p7/1p2p2p/1P6/4N3/P1PPPPPP/BQR1KBRN w GCgc - 0 9", - {27, 783, 22327, 670798, 20059741, 624462073}}, // 578 - {"bqr1krnb/1np1pppp/8/pp1p4/8/2P2N2/PP1PPPPP/BQRNKR1B w FCfc - 0 9", - {28, 636, 18874, 461104, 14237097, 372181570}}, // 579 - {"qbb1rkrn/1ppppppp/p7/7n/8/P2P4/1PP1PPPP/QBBRNKRN w Gg - 0 9", - {25, 547, 13837, 332918, 8849383, 229112926}}, // 580 - {"1rbbnkrn/p1p1pp1p/2q5/1p1p2p1/8/2P3P1/PP1PPP1P/QRBBNKRN w GBgb - 2 9", - {24, 1010, 24370, 983770, 24328258, 961371180}}, // 581 - {"qrb1kbrn/ppp1p2p/4npp1/3p4/8/1PP4P/PR1PPPP1/Q1BNKBRN w Ggb - 1 9", - {18, 451, 9291, 247310, 5568106, 155744022}}, // 582 - {"qr2krnb/p1p1pppp/b1np4/1p6/3NP3/7P/PPPP1PP1/QRBNKR1B w FBfb - 2 9", - {25, 667, 17081, 476030, 12458875, 361495148}}, // 583 - {"qbrnbkrn/ppp3pp/3p4/5p2/2P1pP2/6PP/PP1PP3/QBRNBKRN w GCgc - 0 9", - {24, 650, 16835, 445263, 12187382, 326834539}}, // 584 - {"qrnb1krn/ppp1p1pp/5p2/2Np4/b2P4/2P5/PP2PPPP/QR1BBKRN w GBgb - 0 9", - {27, 641, 17490, 432041, 12103076, 310695797}}, // 585 - {"qrnkbbrn/pp2pp2/8/2pp2pp/6PP/3P4/PPPKPP2/QRN1BBRN w gb - 0 9", - {22, 554, 13116, 357404, 9014737, 258925091}}, // 586 - {"qrnkbrnb/p1p1ppp1/1p6/3p4/3P3p/5N1P/PPP1PPP1/QRNKBR1B w FBfb - 0 9", - {24, 529, 13205, 318722, 8295874, 213856651}}, // 587 - {"qbr1krbn/1pppp1pp/p7/5pn1/2PP4/8/PPB1PPPP/Q1RNKRBN w FCfc - 0 9", - {26, 831, 21651, 696830, 18961456, 621884383}}, // 588 - {"1rnbkrbn/1qp1pppp/3p4/pp6/4P3/1NP4P/PP1P1PP1/QR1BKRBN w FBfb - 0 9", - {24, 597, 15089, 404761, 10832084, 307793179}}, // 589 - {"q1rkrbbn/ppp1pppp/8/3p4/1PnP4/P7/1RP1PPPP/Q1NKRBBN w Ee - 1 9", - {20, 520, 10769, 278067, 6452205, 170268300}}, // 590 - {"qrnkrn1b/ppppp1pp/4b3/7P/6p1/P7/1PPPPP2/QRNKRNBB w EBeb - 0 9", - {26, 566, 15623, 381312, 10940750, 287987207}}, // 591 - {"bbr1nkrn/ppp1pppp/3q4/3p4/8/P7/1PPPPPPP/BBRQNRKN w gc - 5 9", - {19, 661, 13895, 460396, 10870247, 356399665}}, // 592 - {"brqbnkrn/pp1pp2p/5pp1/2p5/4P3/P2P1N2/1PP2PPP/BRQB1KRN w GBgb - 0 9", - {27, 679, 19916, 527306, 16391730, 455940859}}, // 593 - {"2qnkbrn/p1pppppp/8/1r6/1p2bP2/7N/PPPPP1PP/BR1QKBRN w GBg - 4 9", - {18, 774, 15713, 635461, 14371755, 559579332}}, // 594 - {"r1qnkr1b/p1pppppp/7n/1p6/8/1P3b1N/PRPPPPPP/B1QNK1RB w f - 5 9", - {21, 677, 15437, 501520, 12463801, 410795298}}, // 595 - {"rbbqn1rn/pppp1pp1/3k4/4p2Q/2PPP3/8/PP3PPP/RBB1NKRN w GA - 1 9", - {40, 742, 28757, 579833, 21852196, 471452088}}, // 596 - {"rqbbnkrn/3pppp1/p1p4p/1p6/5P2/P2N4/1PPPP1PP/RQBBK1RN w ga - 0 9", - {23, 665, 16400, 492544, 12794736, 396640086}}, // 597 - {"r2nkbrn/pp2pppp/8/2ppqb2/2P3P1/5P2/PP1PPN1P/RQB1KBRN w GAga - 3 9", - {28, 1108, 31164, 1194581, 34780853, 1292405738}}, // 598 - {"rqbnk1nb/p1pppr1p/5p2/1p4p1/1PP1P3/8/P2P1PPP/RQBNKRNB w FAa - 1 9", - {26, 650, 18208, 491403, 14565370, 416833400}}, // 599 - {"rbqnb1rn/p1pp1kpp/1p2pp2/8/4P2P/P5P1/1PPP1P2/RBQNBKRN w GA - 0 9", - {20, 437, 9423, 222154, 5282124, 132309824}}, // 600 - {"rqnbbkrn/p1p1pppp/3p4/1p5B/8/1P1NP3/P1PP1PPP/RQ2BKRN w GAga - 0 9", - {30, 606, 18382, 422491, 12989786, 326601372}}, // 601 - {"rqnkbbr1/ppppp1pp/5p2/7n/8/2PNP2P/PP1P1PP1/RQ1KBBRN w GAga - 1 9", - {23, 482, 12506, 297869, 8430874, 217797292}}, // 602 - {"r1nkbrnb/2ppppp1/1q6/pp5p/1P6/P3P3/2PPKPPP/RQN1BRNB w fa - 2 9", - {25, 827, 21518, 701071, 19290675, 632892337}}, // 603 - {"rbqnkrbn/p1ppppp1/7p/1p6/7P/2N1P3/PPPP1PPB/RBQ1KR1N w FAfa - 1 9", - {30, 627, 18566, 440217, 12976682, 337377291}}, // 604 - {"r1nbkrbn/p1qp1ppp/8/1pp1p3/2P1P3/6P1/PP1PBP1P/RQN1KRBN w FAfa - 2 9", - {22, 616, 14503, 431199, 10850952, 335943324}}, // 605 - {"rqnkr1bn/ppp1ppb1/3p2pp/8/P7/2P2P2/1PKPP1PP/RQN1RBBN w ea - 1 9", - {31, 679, 21365, 493500, 15661072, 379844460}}, // 606 - {"r2krnbb/qppp1ppp/1n6/p3p3/PP6/4N3/N1PPPPPP/RQ1KR1BB w EAea - 4 9", - {24, 645, 17054, 487028, 13837270, 416239106}}, // 607 - {"bbr1qk1n/1ppppp1p/2n5/p7/P7/1P2P3/2PP1PrP/1BRNQKRN w GCc - 0 9", - {18, 520, 10680, 304462, 7215306, 207612575}}, // 608 - {"brnbq1rn/2ppppkp/p5p1/1p6/8/1BP3P1/PP1PPP1P/BRN1QRKN w - - 0 9", - {21, 625, 13989, 419667, 9929336, 300902534}}, // 609 - {"brn1kbrn/pp2p1pp/3p4/q1p2p2/2P4P/6P1/PP1PPP2/BRNQKBRN w GBgb - 1 9", - {18, 477, 10205, 273925, 6720181, 187205941}}, // 610 - {"brn1krnb/p3pppp/1qpp4/1p6/2P3P1/1P6/P2PPP1P/BRNQKRNB w FBfb - 1 9", - {30, 835, 24761, 716151, 21806428, 654487872}}, // 611 - {"r1b1qkrn/1p1ppppp/p1p1n3/8/4P3/1PN5/P1PPQPPb/RBB2KRN w GAga - 0 9", - {28, 825, 24536, 716585, 22079005, 647939781}}, // 612 - {"r1bbqk1n/p1pppprp/n7/1p4p1/5P2/2N3N1/PPPPP1PP/1RBBQKR1 w Ga - 4 9", - {25, 545, 14657, 358854, 10271111, 273864588}}, // 613 - {"rnbqkbrn/p1pp1pp1/4p3/7p/2p4P/2P5/PP1PPPP1/R1BQKBRN w GAga - 0 9", - {17, 445, 9076, 255098, 5918310, 174733195}}, // 614 - {"rnbqkrnb/1p1pp1p1/2p4p/p4p2/3P2P1/7N/PPPBPP1P/RN1QKR1B w FAfa - 0 9", - {34, 746, 25319, 623133, 21285553, 569141201}}, // 615 - {"rbnqbkr1/1ppppp2/p5n1/6pp/4P3/1N6/PPPP1PPP/RBQ1BRKN w ga - 2 9", - {18, 466, 9683, 260864, 6051500, 170135726}}, // 616 - {"rnqb1krn/ppppp1p1/7p/7b/P1P2pPP/8/1P1PPP2/RNQBBKRN w GAga - 0 9", - {24, 575, 15400, 385825, 11039042, 291243811}}, // 617 - {"rnqkbbr1/p1pp1ppp/4p3/1p6/P3P2n/5P2/1PPP1NPP/RNQKBBR1 w GAga - 2 9", - {27, 803, 22883, 694449, 20666099, 638696065}}, // 618 - {"rn1kbrnb/1qppp1pp/1p6/p4p2/1B1P4/1P5N/P1P1PPPP/RNQK1R1B w FAfa - 0 9", - {37, 1209, 43015, 1425600, 49748034, 1671593862}}, // 619 - {"rbnqkrbn/Bppp1p2/p5pp/4p3/5P2/6PP/PPPPP3/RBNQKR1N w FAfa - 0 9", - {29, 720, 20434, 534148, 15384362, 421343249}}, // 620 - {"rnqbkr1n/1p1ppbpp/3p1p2/p7/8/1P6/P1PPPPPP/R1QBKRBN w FAfa - 0 9", - {20, 657, 14424, 492678, 11843134, 413965054}}, // 621 - {"rnqkrb1n/ppppp3/6p1/5p1p/2b2P2/P1N5/1PPPP1PP/RQ1KRBBN w EAea - 1 9", - {28, 749, 20684, 543151, 15379233, 417191461}}, // 622 - {"rnqk1nbb/1pp2ppp/3pr3/p3p3/3P1P2/2N3N1/PPP1P1PP/R1QKR1BB w EAa - 1 9", - {29, 883, 26412, 815098, 25144295, 789705382}}, // 623 - {"bbr1kqrn/p1p1ppp1/1p2n2p/3p4/1P1P4/2N5/P1P1PPPP/BBR1KQRN w GCgc - 0 9", - {22, 485, 11475, 271271, 6825123, 171793012}}, // 624 - {"brnbkq1n/ppp1ppr1/7p/3p2p1/2P3PP/8/PPBPPP2/BRN1KQRN w GBb - 2 9", - {30, 634, 19017, 442537, 13674310, 345386924}}, // 625 - {"brnkqbr1/1pppp1pp/5p2/p7/P1P1P2n/8/1P1P1PP1/BRNKQBRN w GBgb - 0 9", - {21, 504, 11672, 305184, 7778289, 217596497}}, // 626 + // {"rn1k1qrb/p1pppppp/bp6/8/4n3/P4BPP/1PPPPP2/RNBKNQR1 w GAga - 2 9", {22, + // 670, 14998, 451517, 11199653, 339919682}}, // 551 + // {"rb2bnqr/nppkpppp/3p4/p7/1P6/P2N2P1/2PPPP1P/RB1KBNQR w HA - 3 9", {22, + // 479, 11475, 264739, 6831555, 167329117}}, // 552 + // {"r1kbb1qr/2pppppp/np2n3/p7/2P3P1/8/PP1PPPQP/RNKBBN1R w HAha - 1 9", {32, + // 723, 23953, 581832, 19472074, 504622114}}, // 553 + // {"rnknbb1r/p1ppp1pp/8/1p1P1p1q/8/P1P5/1P2PPPP/RNKNBBQR w HAha - 1 9", + // {19, 607, 12733, 417451, 9753617, 325177085}}, // 554 + // {"rnkn1qrb/pp1bp1pp/2p5/1N1p1p2/8/2P5/PPKPPPPP/R2NBQRB w ga - 2 9", {27, + // 533, 14549, 330747, 9206957, 232664675}}, // 555 + // {"r1nknqbr/pp2p1pp/2p2p2/3p4/6P1/PP1P4/2P1PP1b/RBNKNQBR w HAha - 0 9", + // {20, 582, 13777, 409166, 10708639, 326565393}}, // 556 + // {"rnkb1qbr/p1pp1p1p/1p2pn2/1Q4p1/4P3/N4P2/PPPP2PP/R1KBN1BR w HAha - 0 9", + // {40, 1038, 39356, 1051441, 39145902, 1079612614}}, // 557 + // {"rn2qbbr/1pkppp1p/p3n1p1/8/8/2P2P2/PP1PP1PP/RNKN1BBR w HA - 0 9", {24, + // 605, 14888, 385964, 9687507, 260874068}}, // 558 + // {"rn1nqrbb/p1kppp1p/8/1pp3p1/1P6/2N1P3/P1PP1PPP/RK1NQRBB w - - 0 9", {21, + // 540, 12489, 337997, 8436136, 237525904}}, // 559 + // {"bbrnknrq/1pp3pp/p2p1p2/4p3/P7/1P2N3/2PPPPPP/BBRN1RKQ w gc - 0 9", {24, + // 527, 13900, 326175, 9139962, 226253685}}, // 560 + // {"brnb1nrq/pppp1kpp/4p3/8/5p1P/P1P3P1/1P1PPP2/BRNBKNRQ w GB - 1 9", {29, + // 773, 23904, 638768, 20503775, 560338709}}, // 561 + // {"br1k1brq/ppppp2p/1n1n1pp1/8/P1P5/3P2P1/1P2PP1P/BRNKNBRQ w GBgb - 0 9", + // {28, 811, 23550, 664880, 19913758, 565143976}}, // 562 + // {"1r1knrqb/n1pppppp/p1b5/1p6/8/3N1P2/PPPPP1PP/BRNK1RQB w fb - 3 9", {29, + // 753, 23210, 620019, 20044474, 558383603}}, // 563 + // {"rbbnk1rq/pppppppp/8/3Pn3/8/4P1P1/PPP2P1P/RBBNKNRQ w GAga - 1 9", {22, + // 551, 12619, 324608, 8204171, 217689974}}, // 564 + // {"rnbbk1rq/2pppp1p/p3n1p1/1p6/P3N3/8/1PPPPPPP/RNBB1KRQ w ga - 0 9", {26, + // 742, 20061, 599527, 16787080, 525678162}}, // 565 + // {"rnbkn1rq/ppppppb1/6p1/7p/2B2P2/1P2P3/P1PP2PP/RNBKN1RQ w GAga - 1 9", + // {28, 799, 23210, 689436, 20755098, 639632905}}, // 566 + // {"rn1knrqb/p2pppp1/b1p5/1p5p/2P2P2/1P6/P2PP1PP/RNBKNRQB w FAfa - 1 9", + // {30, 579, 18481, 397545, 13257198, 311282465}}, // 567 + // {"rbnkbnrq/pp2p1Np/2p2p2/8/3p4/8/PPPPPPPP/RBNKBR1Q w Aga - 0 9", {23, + // 670, 16435, 501883, 13012378, 411860744}}, // 568 + // {"rk1bbnrq/ppp1pppp/n7/3p4/5P2/3P2NP/PPP1P1P1/RNKBB1RQ w GA - 0 9", {26, + // 597, 16238, 402506, 11269462, 296701249}}, // 569 + // {"r1knbbrq/pppp2p1/2n1p2p/5p2/4P3/P1PP4/1P3PPP/RNKNBBRQ w GAga - 1 9", + // {20, 596, 13091, 399069, 9416862, 293659781}}, // 570 + // {"rnknbrqb/p1p1pp1p/3p4/1p1N2p1/8/N7/PPPPPPPP/1RK1BRQB w Ffa - 0 9", {26, + // 724, 18942, 552040, 15257204, 461293885}}, // 571 + // {"rbnknrb1/1p1ppp1p/p1p3p1/8/1P3P2/1R6/PqPPP1PP/RBNKN1BQ w Afa - 0 9", + // {31, 1183, 34723, 1289502, 38722152, 1421492227}}, // 572 + // {"rnkbnrbq/2p1ppp1/p7/1p1p3p/3P4/1P4P1/P1P1PP1P/RNKBNRBQ w FAfa - 0 9", + // {24, 506, 12748, 301464, 8086100, 207129256}}, // 573 + // {"r1knrbbq/pp1ppppp/2p1n3/8/2P3P1/P7/1PKPPP1P/RN1NRBBQ w ea - 0 9", {28, + // 570, 16037, 352471, 10278695, 242592363}}, // 574 + // {"rnknrq1b/ppp1p1p1/4b3/3p1p1p/6P1/P4P2/1PPPPQ1P/RNKNR1BB w EAea - 2 9", + // {30, 739, 23124, 594962, 19252739, 521629794}}, // 575 + // {"bbqr1krn/pppp1p1p/5n2/4p1p1/3P4/P3QP2/1PP1P1PP/BB1RNKRN w GDgd - 0 9", + // {31, 799, 25627, 674913, 22172123, 609277274}}, // 576 + // {"bq1b1krn/pp1ppppp/3n4/2r5/3p3N/6N1/PPP1PPPP/BQRB1KR1 w GCg - 2 9", {21, + // 798, 18571, 688429, 17546069, 647165916}}, // 577 + // {"bqrnkbrn/2pp1pp1/p7/1p2p2p/1P6/4N3/P1PPPPPP/BQR1KBRN w GCgc - 0 9", + // {27, 783, 22327, 670798, 20059741, 624462073}}, // 578 + // {"bqr1krnb/1np1pppp/8/pp1p4/8/2P2N2/PP1PPPPP/BQRNKR1B w FCfc - 0 9", {28, + // 636, 18874, 461104, 14237097, 372181570}}, // 579 + // {"qbb1rkrn/1ppppppp/p7/7n/8/P2P4/1PP1PPPP/QBBRNKRN w Gg - 0 9", {25, 547, + // 13837, 332918, 8849383, 229112926}}, // 580 + // {"1rbbnkrn/p1p1pp1p/2q5/1p1p2p1/8/2P3P1/PP1PPP1P/QRBBNKRN w GBgb - 2 9", + // {24, 1010, 24370, 983770, 24328258, 961371180}}, // 581 + // {"qrb1kbrn/ppp1p2p/4npp1/3p4/8/1PP4P/PR1PPPP1/Q1BNKBRN w Ggb - 1 9", {18, + // 451, 9291, 247310, 5568106, 155744022}}, // 582 + // {"qr2krnb/p1p1pppp/b1np4/1p6/3NP3/7P/PPPP1PP1/QRBNKR1B w FBfb - 2 9", + // {25, 667, 17081, 476030, 12458875, 361495148}}, // 583 + // {"qbrnbkrn/ppp3pp/3p4/5p2/2P1pP2/6PP/PP1PP3/QBRNBKRN w GCgc - 0 9", {24, + // 650, 16835, 445263, 12187382, 326834539}}, // 584 + // {"qrnb1krn/ppp1p1pp/5p2/2Np4/b2P4/2P5/PP2PPPP/QR1BBKRN w GBgb - 0 9", + // {27, 641, 17490, 432041, 12103076, 310695797}}, // 585 + // {"qrnkbbrn/pp2pp2/8/2pp2pp/6PP/3P4/PPPKPP2/QRN1BBRN w gb - 0 9", {22, + // 554, 13116, 357404, 9014737, 258925091}}, // 586 + // {"qrnkbrnb/p1p1ppp1/1p6/3p4/3P3p/5N1P/PPP1PPP1/QRNKBR1B w FBfb - 0 9", + // {24, 529, 13205, 318722, 8295874, 213856651}}, // 587 + // {"qbr1krbn/1pppp1pp/p7/5pn1/2PP4/8/PPB1PPPP/Q1RNKRBN w FCfc - 0 9", {26, + // 831, 21651, 696830, 18961456, 621884383}}, // 588 + // {"1rnbkrbn/1qp1pppp/3p4/pp6/4P3/1NP4P/PP1P1PP1/QR1BKRBN w FBfb - 0 9", + // {24, 597, 15089, 404761, 10832084, 307793179}}, // 589 + // {"q1rkrbbn/ppp1pppp/8/3p4/1PnP4/P7/1RP1PPPP/Q1NKRBBN w Ee - 1 9", {20, + // 520, 10769, 278067, 6452205, 170268300}}, // 590 + // {"qrnkrn1b/ppppp1pp/4b3/7P/6p1/P7/1PPPPP2/QRNKRNBB w EBeb - 0 9", {26, + // 566, 15623, 381312, 10940750, 287987207}}, // 591 + // {"bbr1nkrn/ppp1pppp/3q4/3p4/8/P7/1PPPPPPP/BBRQNRKN w gc - 5 9", {19, 661, + // 13895, 460396, 10870247, 356399665}}, // 592 + // {"brqbnkrn/pp1pp2p/5pp1/2p5/4P3/P2P1N2/1PP2PPP/BRQB1KRN w GBgb - 0 9", + // {27, 679, 19916, 527306, 16391730, 455940859}}, // 593 + // {"2qnkbrn/p1pppppp/8/1r6/1p2bP2/7N/PPPPP1PP/BR1QKBRN w GBg - 4 9", {18, + // 774, 15713, 635461, 14371755, 559579332}}, // 594 + // {"r1qnkr1b/p1pppppp/7n/1p6/8/1P3b1N/PRPPPPPP/B1QNK1RB w f - 5 9", {21, + // 677, 15437, 501520, 12463801, 410795298}}, // 595 + // {"rbbqn1rn/pppp1pp1/3k4/4p2Q/2PPP3/8/PP3PPP/RBB1NKRN w GA - 1 9", {40, + // 742, 28757, 579833, 21852196, 471452088}}, // 596 + // {"rqbbnkrn/3pppp1/p1p4p/1p6/5P2/P2N4/1PPPP1PP/RQBBK1RN w ga - 0 9", {23, + // 665, 16400, 492544, 12794736, 396640086}}, // 597 + // {"r2nkbrn/pp2pppp/8/2ppqb2/2P3P1/5P2/PP1PPN1P/RQB1KBRN w GAga - 3 9", + // {28, 1108, 31164, 1194581, 34780853, 1292405738}}, // 598 + // {"rqbnk1nb/p1pppr1p/5p2/1p4p1/1PP1P3/8/P2P1PPP/RQBNKRNB w FAa - 1 9", + // {26, 650, 18208, 491403, 14565370, 416833400}}, // 599 + // {"rbqnb1rn/p1pp1kpp/1p2pp2/8/4P2P/P5P1/1PPP1P2/RBQNBKRN w GA - 0 9", {20, + // 437, 9423, 222154, 5282124, 132309824}}, // 600 + // {"rqnbbkrn/p1p1pppp/3p4/1p5B/8/1P1NP3/P1PP1PPP/RQ2BKRN w GAga - 0 9", + // {30, 606, 18382, 422491, 12989786, 326601372}}, // 601 + // {"rqnkbbr1/ppppp1pp/5p2/7n/8/2PNP2P/PP1P1PP1/RQ1KBBRN w GAga - 1 9", {23, + // 482, 12506, 297869, 8430874, 217797292}}, // 602 + // {"r1nkbrnb/2ppppp1/1q6/pp5p/1P6/P3P3/2PPKPPP/RQN1BRNB w fa - 2 9", {25, + // 827, 21518, 701071, 19290675, 632892337}}, // 603 + // {"rbqnkrbn/p1ppppp1/7p/1p6/7P/2N1P3/PPPP1PPB/RBQ1KR1N w FAfa - 1 9", {30, + // 627, 18566, 440217, 12976682, 337377291}}, // 604 + // {"r1nbkrbn/p1qp1ppp/8/1pp1p3/2P1P3/6P1/PP1PBP1P/RQN1KRBN w FAfa - 2 9", + // {22, 616, 14503, 431199, 10850952, 335943324}}, // 605 + // {"rqnkr1bn/ppp1ppb1/3p2pp/8/P7/2P2P2/1PKPP1PP/RQN1RBBN w ea - 1 9", {31, + // 679, 21365, 493500, 15661072, 379844460}}, // 606 + // {"r2krnbb/qppp1ppp/1n6/p3p3/PP6/4N3/N1PPPPPP/RQ1KR1BB w EAea - 4 9", {24, + // 645, 17054, 487028, 13837270, 416239106}}, // 607 + // {"bbr1qk1n/1ppppp1p/2n5/p7/P7/1P2P3/2PP1PrP/1BRNQKRN w GCc - 0 9", {18, + // 520, 10680, 304462, 7215306, 207612575}}, // 608 + // {"brnbq1rn/2ppppkp/p5p1/1p6/8/1BP3P1/PP1PPP1P/BRN1QRKN w - - 0 9", {21, + // 625, 13989, 419667, 9929336, 300902534}}, // 609 + // {"brn1kbrn/pp2p1pp/3p4/q1p2p2/2P4P/6P1/PP1PPP2/BRNQKBRN w GBgb - 1 9", + // {18, 477, 10205, 273925, 6720181, 187205941}}, // 610 + // {"brn1krnb/p3pppp/1qpp4/1p6/2P3P1/1P6/P2PPP1P/BRNQKRNB w FBfb - 1 9", + // {30, 835, 24761, 716151, 21806428, 654487872}}, // 611 + // {"r1b1qkrn/1p1ppppp/p1p1n3/8/4P3/1PN5/P1PPQPPb/RBB2KRN w GAga - 0 9", + // {28, 825, 24536, 716585, 22079005, 647939781}}, // 612 + // {"r1bbqk1n/p1pppprp/n7/1p4p1/5P2/2N3N1/PPPPP1PP/1RBBQKR1 w Ga - 4 9", + // {25, 545, 14657, 358854, 10271111, 273864588}}, // 613 + // {"rnbqkbrn/p1pp1pp1/4p3/7p/2p4P/2P5/PP1PPPP1/R1BQKBRN w GAga - 0 9", {17, + // 445, 9076, 255098, 5918310, 174733195}}, // 614 + // {"rnbqkrnb/1p1pp1p1/2p4p/p4p2/3P2P1/7N/PPPBPP1P/RN1QKR1B w FAfa - 0 9", + // {34, 746, 25319, 623133, 21285553, 569141201}}, // 615 + // {"rbnqbkr1/1ppppp2/p5n1/6pp/4P3/1N6/PPPP1PPP/RBQ1BRKN w ga - 2 9", {18, + // 466, 9683, 260864, 6051500, 170135726}}, // 616 + // {"rnqb1krn/ppppp1p1/7p/7b/P1P2pPP/8/1P1PPP2/RNQBBKRN w GAga - 0 9", {24, + // 575, 15400, 385825, 11039042, 291243811}}, // 617 + // {"rnqkbbr1/p1pp1ppp/4p3/1p6/P3P2n/5P2/1PPP1NPP/RNQKBBR1 w GAga - 2 9", + // {27, 803, 22883, 694449, 20666099, 638696065}}, // 618 + // {"rn1kbrnb/1qppp1pp/1p6/p4p2/1B1P4/1P5N/P1P1PPPP/RNQK1R1B w FAfa - 0 9", + // {37, 1209, 43015, 1425600, 49748034, 1671593862}}, // 619 + // {"rbnqkrbn/Bppp1p2/p5pp/4p3/5P2/6PP/PPPPP3/RBNQKR1N w FAfa - 0 9", {29, + // 720, 20434, 534148, 15384362, 421343249}}, // 620 + // {"rnqbkr1n/1p1ppbpp/3p1p2/p7/8/1P6/P1PPPPPP/R1QBKRBN w FAfa - 0 9", {20, + // 657, 14424, 492678, 11843134, 413965054}}, // 621 + // {"rnqkrb1n/ppppp3/6p1/5p1p/2b2P2/P1N5/1PPPP1PP/RQ1KRBBN w EAea - 1 9", + // {28, 749, 20684, 543151, 15379233, 417191461}}, // 622 + // {"rnqk1nbb/1pp2ppp/3pr3/p3p3/3P1P2/2N3N1/PPP1P1PP/R1QKR1BB w EAa - 1 9", + // {29, 883, 26412, 815098, 25144295, 789705382}}, // 623 + // {"bbr1kqrn/p1p1ppp1/1p2n2p/3p4/1P1P4/2N5/P1P1PPPP/BBR1KQRN w GCgc - 0 9", + // {22, 485, 11475, 271271, 6825123, 171793012}}, // 624 + // {"brnbkq1n/ppp1ppr1/7p/3p2p1/2P3PP/8/PPBPPP2/BRN1KQRN w GBb - 2 9", {30, + // 634, 19017, 442537, 13674310, 345386924}}, // 625 + // {"brnkqbr1/1pppp1pp/5p2/p7/P1P1P2n/8/1P1P1PP1/BRNKQBRN w GBgb - 0 9", + // {21, 504, 11672, 305184, 7778289, 217596497}}, // 626 {"b1rkqrnb/p1ppp1pp/1p1n4/5p2/5P2/PN5P/1PPPP1P1/BR1KQRNB w FBf - 0 9", {23, 688, 17259, 531592, 14228372, 451842354}}, // 627 - {"1bbnkqrn/rppppp2/p5p1/7p/7P/P1P1P3/1P1P1PP1/RBBNKQRN w GAg - 1 9", - {25, 450, 12391, 263946, 7752404, 185393913}}, // 628 - {"rnbbkqr1/1pppppp1/7p/p3n3/PP5P/8/1BPPPPP1/RN1BKQRN w GAga - 0 9", - {23, 543, 12224, 305812, 7549008, 199883770}}, // 629 - {"r1bkqbrn/ppppp1pp/8/5p2/3nPP2/1P4N1/P1PP2PP/RNBKQBR1 w GAga - 1 9", - {27, 751, 21158, 600417, 17989920, 527273615}}, // 630 - {"rnbkqr1b/1p1pp1pp/p4p1n/2p5/1P5P/N4P2/P1PPP1P1/R1BKQRNB w FAfa - 0 9", - {21, 498, 11738, 302278, 7808375, 216224115}}, // 631 - {"rbnkbqrn/p1p3pp/1p1p4/B3pp2/3P2P1/6N1/PPP1PP1P/RBNK1QR1 w GAga - 0 9", - {34, 977, 33464, 961128, 33318567, 978991050}}, // 632 - {"r1kbbqrn/ppp3pp/2np1p2/1P2p3/3P1P2/8/P1P1P1PP/RNKBBQRN w GAga - 0 9", - {32, 920, 28916, 844881, 26763259, 797524786}}, // 633 - {"rk1qbbrn/p2npppp/1p6/2p4Q/8/4P3/PPPP1PPP/RNK1B1RN w GA - 2 9", - {35, 657, 22359, 495406, 16662477, 419496845}}, // 634 - {"rnk1brnb/pp1p1pp1/8/q1p1p2p/5P2/NP6/P1PPP1PP/R1KQBRNB w FAfa - 1 9", - {26, 774, 20215, 610661, 16987110, 523437649}}, // 635 - {"rb1kqrbn/npp1ppp1/p7/3P3p/2PP4/8/PP3PPP/RBNKQRBN w FAfa - 0 9", - {35, 775, 27395, 661118, 23983464, 625669222}}, // 636 - {"rnkb1rbn/pp1p2pp/8/2p1pp1q/P6P/1PN5/2PPPPP1/R1KBQRBN w FAfa - 1 9", - {22, 899, 21188, 850597, 21518343, 857951339}}, // 637 - {"rnkqrbbn/1pppp1p1/8/p2N1p1p/2P4P/8/PP1PPPP1/R1KQRBBN w EAea - 0 9", - {29, 585, 17571, 393221, 12238776, 299752383}}, // 638 - {"rnk1r1bb/pp1ppppp/1q4n1/2p5/5P1P/3PP3/PPP3P1/RNKQRNBB w EAea - 1 9", - {27, 884, 24613, 811915, 23698701, 790239502}}, // 639 - {"bbrnkrqn/1ppp1p2/6pp/p3p3/5PP1/2PB4/PP1PP2P/B1RNKRQN w FCfc - 0 9", - {37, 693, 25425, 550527, 20138432, 481498664}}, // 640 - {"b1rbkrqn/ppp2ppp/1n2p3/3p4/6P1/2PP4/PP2PP1P/BRNBKRQN w FBf - 1 9", - {21, 463, 10610, 253204, 6307276, 159025909}}, // 641 - {"brnkrb1n/1pp1p1pp/3p4/p1Nq1p2/2P5/8/PP1PPPPP/BRK1RBQN w eb - 2 9", - {27, 725, 17842, 496072, 12604078, 362747791}}, // 642 - {"brn1r1nb/ppppkppp/4p3/8/2PP1P2/8/PP1KP1PP/BRN1RQNB w - - 1 9", - {25, 623, 16874, 426659, 12290985, 317097424}}, // 643 - {"rbb1krqn/1pp1pp1p/p3n1p1/3pP3/8/1PN5/P1PP1PPP/RBB1KRQN w FAfa d6 0 9", - {23, 529, 12641, 310277, 7861413, 202594556}}, // 644 - {"r1bbkrqn/p1pppppp/8/4n3/1p5P/P2P2P1/1PP1PP2/RNBBKRQN w FAfa - 0 9", - {23, 571, 13133, 346793, 8699448, 243460643}}, // 645 - {"rnbkrbqn/p1pp1ppp/4p3/1p6/8/BPN3P1/P1PPPP1P/R2KRBQN w EAea - 2 9", - {29, 692, 20014, 500375, 14904192, 386694739}}, // 646 - {"rnbkrqn1/pppppp2/8/1Q2b1pp/P3P3/5P2/1PPP2PP/RNBKR1NB w EAea - 0 9", - {37, 1001, 36440, 987842, 35626426, 993747544}}, // 647 - {"rbnkbrqn/p1pppp2/7p/1p4pP/3P1P2/8/PPP1P1P1/RBNKBRQN w FAfa - 0 9", - {30, 564, 17143, 381364, 11859538, 293703269}}, // 648 - {"1nkbbrqn/3ppppp/r1p5/pp6/8/4PP2/PPPPN1PP/RNKBBRQ1 w FAf - 2 9", - {26, 546, 14641, 344592, 9556962, 245137199}}, // 649 - {"rnkrbbq1/pppppnp1/7p/8/1B1Q1p2/3P1P2/PPP1P1PP/RNKR1B1N w DAda - 2 9", - {43, 887, 36240, 846858, 33185346, 851927292}}, // 650 - {"1rkrbqnb/pppppp2/2n3p1/7p/3P3P/P4N2/1PP1PPP1/RNKRBQ1B w DAd - 0 9", - {26, 622, 16049, 403921, 10786140, 285233838}}, // 651 - {"rbnkr1bn/pp1pqp1p/2p1p3/6p1/3P4/7P/PPP1PPP1/RBNKRQBN w EAea - 0 9", - {19, 566, 12257, 381197, 9107175, 293397389}}, // 652 - {"r1kbrqb1/pppp2pp/2n1p1n1/5p1B/4PP2/P7/1PPP2PP/RNK1RQBN w EAea - 2 9", - {39, 1359, 53626, 1876028, 73871486, 2633945690}}, // 653 - {"rnkrqbbn/p1p3pp/1p1ppp2/8/1P6/3P2P1/PKP1PP1P/RN1RQBBN w da - 0 9", - {26, 776, 20735, 611907, 16884013, 503561996}}, // 654 - {"rnkrqnbb/ppp2p1p/3p4/4p1p1/3P3P/N1Q5/PPP1PPP1/R1KR1NBB w DAda - 0 9", - {40, 1175, 45637, 1375884, 52620163, 1633655838}}, // 655 - {"bbrnkrn1/p1pppp2/1p6/6pp/3q4/1P3QP1/P1PPPP1P/BBRNKRN1 w FCfc - 0 9", - {34, 1398, 45749, 1712950, 57268492, 2059942014}}, // 656 - {"br1bkrnq/1p2pppp/pnp5/3p4/P1P5/5P2/1P1PPKPP/BRNB1RNQ w fb - 2 9", - {24, 501, 12237, 284936, 7049659, 177940764}}, // 657 - {"brnkrbn1/pppppp1q/B6p/6p1/8/1P2PP2/P1PP2PP/BRNKR1NQ w EBeb - 0 9", - {34, 815, 25868, 700970, 22006883, 639803952}}, // 658 - {"br1krnqb/pppppp1p/1n4p1/8/8/P2NN3/2PPPPPP/BR1K1RQB w Beb - 2 9", - {37, 1029, 36748, 1025712, 36214583, 1026195877}}, // 659 - {"rbbnkr1q/p1p2ppp/1p1ppn2/8/1PP4P/8/P2PPPP1/RBBNKRNQ w FAfa - 0 9", - {28, 755, 22623, 605106, 18972778, 513486101}}, // 660 - {"r1b1krnq/pp2pppp/1bn5/2pp4/4N3/5P2/PPPPPRPP/R1BBK1NQ w Afa - 0 9", - {24, 705, 17427, 532521, 13532966, 426443376}}, // 661 - {"1nbkrbn1/rpppppqp/p7/6p1/4P3/3P2P1/PPP1KP1P/RNB1RBNQ w e - 1 9", - {31, 800, 24748, 693366, 21193292, 625757852}}, // 662 - {"r1bkrnqb/pp3ppp/n1ppp3/8/1P5P/P7/R1PPPPP1/1NBKRNQB w Eea - 0 9", - {21, 482, 11417, 275339, 7112890, 180378139}}, // 663 - {"rbnkbrnq/ppp1p2p/5p2/3p2p1/1B1P4/1N4P1/PPP1PP1P/RB1K1RNQ w FAfa - 0 9", - {33, 780, 25532, 628945, 20756770, 535497008}}, // 664 - {"rnk1brnq/pp1ppppp/2p5/b7/8/1P2P2P/P1PP1PPQ/RNKBBRN1 w FAfa - 3 9", - {29, 648, 19043, 449637, 13722785, 341389148}}, // 665 - {"rnkrbbnq/p1p3pp/5p2/1p1pp3/P7/1PN2P2/2PPP1PP/R1KRBBNQ w DAda - 0 9", - {26, 827, 21865, 683167, 18916370, 589161126}}, // 666 - {"r1krbnqb/p1pp1ppp/2n1p3/8/1p4P1/PPP5/3PPP1P/RNKRBNQB w DAda - 1 9", - {25, 540, 14709, 331332, 9491817, 225389422}}, // 667 - {"rbnkrnbq/ppp1pp2/3p2p1/2N5/P6p/2P5/1P1PPPPP/RB1KRNBQ w EAea - 0 9", - {32, 790, 25107, 661207, 20906017, 578332225}}, // 668 - {"rnkbrn1q/1ppppppb/8/p4N1p/8/P1N5/1PPPPPPP/R1KBR1BQ w EAea - 0 9", - {31, 691, 20813, 510665, 15308408, 404129987}}, // 669 - {"rnkrnbbq/p1p2ppp/3pp3/1p6/6P1/4PQ1B/PPPP1P1P/RNKRN1B1 w DAda - 0 9", - {29, 558, 16800, 352887, 10825379, 246965507}}, // 670 - {"rnkrnqbb/pp2p1p1/3p3p/2p2p2/5P2/1P1N4/P1PPPQPP/RNKR2BB w DAda - 0 9", - {29, 762, 23210, 644936, 20522675, 596067005}}, // 671 - {"bb1rknnr/ppqppppp/8/2p5/3P1N2/1P6/P1P1PPPP/BBQRKN1R w HDhd - 1 9", - {33, 963, 32279, 1000890, 34552118, 1124738493}}, // 672 - {"bqrbknnr/ppp1p2p/8/3p1p2/5p2/P3N2P/1PPPP1P1/BQRBK1NR w HChc - 0 9", - {20, 398, 9009, 194859, 4834319, 113660536}}, // 673 - {"b1rk1bnr/qpp1pppp/p4n2/3p4/3PPP2/7N/PPP3PP/BQRKNB1R w HChc - 1 9", - {25, 648, 16587, 455720, 12200870, 351766307}}, // 674 - {"bqkrnnrb/pppp2p1/4pp2/4P2p/6P1/7P/PPPP1P2/BQRKNNRB w GC - 1 9", - {30, 493, 15118, 280726, 8786998, 181492621}}, // 675 - {"q1brknnr/1p1ppppp/p7/2p5/8/1PPP4/P2RPPPP/QBB1KNNR w Hhd - 0 9", - {25, 501, 13206, 290463, 7982978, 192717198}}, // 676 - {"qrb1k1nr/ppppb1pp/6n1/4ppN1/3P4/4N3/PPP1PPPP/QRBBK2R w HBhb - 2 9", - {31, 872, 26191, 739276, 22493014, 646855304}}, // 677 - {"1rbknbnr/1ppp1pp1/q6p/p3p3/5P2/2PPB3/PP2P1PP/QR1KNBNR w HBhb - 0 9", - {28, 1020, 28147, 984000, 27484692, 947786800}}, // 678 - {"qrbk2rb/1ppp1ppp/5nn1/p3p3/1N6/P7/1PPPPPPP/QRB1KNRB w gb - 0 9", - {23, 592, 14398, 395716, 10098215, 293988585}}, // 679 - {"qbrk1nnr/1pp1pppp/2b5/p2p4/P2P2P1/8/1PP1PP1P/QBKRBNNR w hc - 1 9", - {26, 654, 18103, 471653, 13740891, 373081138}}, // 680 - {"qrkbbnnr/ppp2p1p/4p3/3p2p1/P7/2PP4/1P2PPPP/QRKBBNNR w HBhb - 0 9", - {25, 626, 16616, 431634, 12079406, 324006164}}, // 681 - {"qr1kbbnr/ppp1pp1p/4n1p1/2Pp4/6P1/4N3/PP1PPP1P/QRK1BBNR w HB d6 0 9", - {26, 699, 18068, 497152, 13353359, 375702908}}, // 682 - {"qrk1b1rb/p1pppppp/3nnQ2/1p6/1P3P2/3P4/P1P1P1PP/1RKNBNRB w GBgb - 3 9", - {43, 1369, 55463, 1831200, 71514365, 2427477375}}, // 683 - {"qbrk1nbr/pppp3p/5n2/4ppp1/3P1P2/4N3/PPP1P1PP/QBKRN1BR w hc - 0 9", - {25, 752, 20165, 615263, 17493373, 543180234}}, // 684 - {"qrkb1nbr/1pppppQp/3n4/p7/5p2/1P1N4/P1PPP1PP/1RKB1NBR w HBhb - 0 9", - {45, 946, 40100, 966903, 39736157, 1051910977}}, // 685 - {"qrk1nbbr/ppp1p1p1/4n2p/3p1p2/1P5P/3P2P1/P1P1PP2/QRKNNBBR w HBhb - 1 9", - {32, 770, 25367, 646977, 21717615, 577979364}}, // 686 - {"qrkn1rbb/pp2pppp/2p5/3p4/P2Qn1P1/1P6/2PPPP1P/1RKNNRBB w FBfb - 0 9", - {38, 943, 35335, 868165, 31909835, 798405123}}, // 687 - {"bbrqknnr/ppp4p/3pp3/5pp1/4PP2/5Q2/PPPP2PP/BBR1KNNR w HChc - 0 9", - {36, 843, 29974, 758528, 26828059, 723306114}}, // 688 - {"1rqbkn1r/p1p1pppp/1p5n/P2p4/3Pb1P1/8/1PP1PP1P/BRQBKNNR w HBhb - 0 9", - {23, 778, 19482, 649789, 17337683, 579112676}}, // 689 - {"br1knbnr/1qp1pppp/pp1p4/8/8/PP6/2PPPPPP/BRQKNBNR w HBhb - 2 9", - {26, 697, 18835, 546622, 15280079, 473071890}}, // 690 - {"brqk2rb/ppppp1pp/4np2/8/2n5/3P1Q2/PP2PPPP/BR1KNNRB w GBgb - 0 9", - {32, 948, 30434, 885713, 29821322, 874251866}}, // 691 - {"r1bqknnr/pp1pp1p1/5p1p/2p1b2N/2P5/8/PPQPPPPP/RBB1K1NR w HAha - 0 9", - {31, 785, 25549, 659952, 22244193, 592797491}}, // 692 - {"rqbbknnr/ppppp2p/5pp1/8/8/1P3PP1/PQPPP2P/R1BBKNNR w HAha - 0 9", - {23, 391, 10163, 198450, 5576671, 121267576}}, // 693 - {"rqbknbnr/1pp1p2p/p7/3p1pp1/7N/1PP5/P2PPPPP/RQBK1BNR w HAha - 0 9", - {27, 676, 19606, 522428, 15955388, 448477218}}, // 694 - {"rqb1nnrb/2ppkppp/1p2p3/p7/2PPP3/1P6/P4PPP/RQBKNNRB w GA - 1 9", - {31, 727, 22895, 570647, 18361051, 483248153}}, // 695 - {"rb1kbn1r/p1ppppp1/qp5n/7p/P7/RPP5/3PPPPP/1BQKBNNR w Hha - 2 9", - {29, 837, 23815, 730083, 21279560, 682863811}}, // 696 - {"rqkbb1nr/p1p2ppp/1p1p2n1/3Np3/4P3/5N2/PPPP1PPP/RQKBB2R w HAha - 0 9", - {28, 717, 20663, 550987, 16347343, 453153783}}, // 697 - {"rqknbbr1/p1pppp1p/1p3np1/8/4P3/2P2P1P/PP1P2P1/RQKNBBNR w HAa - 0 9", - {27, 650, 18231, 475303, 13847463, 383256006}}, // 698 - {"r1k1bnrb/1qpppppp/1p2n3/p7/1P5P/6P1/P1PPPP2/RQKNBNR1 w GAga - 1 9", - {24, 806, 20693, 713220, 19382263, 686009788}}, // 699 - {"rb1knnbr/1pp1ppp1/p2p3p/5q2/3B2P1/3P1P2/PPP1P2P/RBQKNN1R w HAha - 0 9", - {34, 1360, 44096, 1605706, 51973672, 1837704407}}, // 700 - {"rqkb1nbr/p1p1ppp1/1p3n1p/2Qp4/8/2P5/PP1PPPPP/R1KBNNBR w HAha - 2 9", - {39, 983, 38218, 940989, 36347815, 918801645}}, // 701 - {"rqknnbbr/2pppp2/pp5p/6p1/1P1P4/4PP2/P1P3PP/RQKNNBBR w HAha - 0 9", - {26, 628, 17638, 464924, 13787303, 386125234}}, // 702 - {"rqkn1rbb/1pp1pppp/p7/3p4/3Pn3/2P1PP2/PP4PP/RQKNNRBB w FAfa - 1 9", - {20, 527, 12216, 321533, 8082183, 219311659}}, // 703 + // {"1bbnkqrn/rppppp2/p5p1/7p/7P/P1P1P3/1P1P1PP1/RBBNKQRN w GAg - 1 9", {25, + // 450, 12391, 263946, 7752404, 185393913}}, // 628 + // {"rnbbkqr1/1pppppp1/7p/p3n3/PP5P/8/1BPPPPP1/RN1BKQRN w GAga - 0 9", {23, + // 543, 12224, 305812, 7549008, 199883770}}, // 629 + // {"r1bkqbrn/ppppp1pp/8/5p2/3nPP2/1P4N1/P1PP2PP/RNBKQBR1 w GAga - 1 9", + // {27, 751, 21158, 600417, 17989920, 527273615}}, // 630 + // {"rnbkqr1b/1p1pp1pp/p4p1n/2p5/1P5P/N4P2/P1PPP1P1/R1BKQRNB w FAfa - 0 9", + // {21, 498, 11738, 302278, 7808375, 216224115}}, // 631 + // {"rbnkbqrn/p1p3pp/1p1p4/B3pp2/3P2P1/6N1/PPP1PP1P/RBNK1QR1 w GAga - 0 9", + // {34, 977, 33464, 961128, 33318567, 978991050}}, // 632 + // {"r1kbbqrn/ppp3pp/2np1p2/1P2p3/3P1P2/8/P1P1P1PP/RNKBBQRN w GAga - 0 9", + // {32, 920, 28916, 844881, 26763259, 797524786}}, // 633 + // {"rk1qbbrn/p2npppp/1p6/2p4Q/8/4P3/PPPP1PPP/RNK1B1RN w GA - 2 9", {35, + // 657, 22359, 495406, 16662477, 419496845}}, // 634 + // {"rnk1brnb/pp1p1pp1/8/q1p1p2p/5P2/NP6/P1PPP1PP/R1KQBRNB w FAfa - 1 9", + // {26, 774, 20215, 610661, 16987110, 523437649}}, // 635 + // {"rb1kqrbn/npp1ppp1/p7/3P3p/2PP4/8/PP3PPP/RBNKQRBN w FAfa - 0 9", {35, + // 775, 27395, 661118, 23983464, 625669222}}, // 636 + // {"rnkb1rbn/pp1p2pp/8/2p1pp1q/P6P/1PN5/2PPPPP1/R1KBQRBN w FAfa - 1 9", + // {22, 899, 21188, 850597, 21518343, 857951339}}, // 637 + // {"rnkqrbbn/1pppp1p1/8/p2N1p1p/2P4P/8/PP1PPPP1/R1KQRBBN w EAea - 0 9", + // {29, 585, 17571, 393221, 12238776, 299752383}}, // 638 + // {"rnk1r1bb/pp1ppppp/1q4n1/2p5/5P1P/3PP3/PPP3P1/RNKQRNBB w EAea - 1 9", + // {27, 884, 24613, 811915, 23698701, 790239502}}, // 639 + // {"bbrnkrqn/1ppp1p2/6pp/p3p3/5PP1/2PB4/PP1PP2P/B1RNKRQN w FCfc - 0 9", + // {37, 693, 25425, 550527, 20138432, 481498664}}, // 640 + // {"b1rbkrqn/ppp2ppp/1n2p3/3p4/6P1/2PP4/PP2PP1P/BRNBKRQN w FBf - 1 9", {21, + // 463, 10610, 253204, 6307276, 159025909}}, // 641 + // {"brnkrb1n/1pp1p1pp/3p4/p1Nq1p2/2P5/8/PP1PPPPP/BRK1RBQN w eb - 2 9", {27, + // 725, 17842, 496072, 12604078, 362747791}}, // 642 + // {"brn1r1nb/ppppkppp/4p3/8/2PP1P2/8/PP1KP1PP/BRN1RQNB w - - 1 9", {25, + // 623, 16874, 426659, 12290985, 317097424}}, // 643 + // {"rbb1krqn/1pp1pp1p/p3n1p1/3pP3/8/1PN5/P1PP1PPP/RBB1KRQN w FAfa d6 0 9", + // {23, 529, 12641, 310277, 7861413, 202594556}}, // 644 + // {"r1bbkrqn/p1pppppp/8/4n3/1p5P/P2P2P1/1PP1PP2/RNBBKRQN w FAfa - 0 9", + // {23, 571, 13133, 346793, 8699448, 243460643}}, // 645 + // {"rnbkrbqn/p1pp1ppp/4p3/1p6/8/BPN3P1/P1PPPP1P/R2KRBQN w EAea - 2 9", {29, + // 692, 20014, 500375, 14904192, 386694739}}, // 646 + // {"rnbkrqn1/pppppp2/8/1Q2b1pp/P3P3/5P2/1PPP2PP/RNBKR1NB w EAea - 0 9", + // {37, 1001, 36440, 987842, 35626426, 993747544}}, // 647 + // {"rbnkbrqn/p1pppp2/7p/1p4pP/3P1P2/8/PPP1P1P1/RBNKBRQN w FAfa - 0 9", {30, + // 564, 17143, 381364, 11859538, 293703269}}, // 648 + // {"1nkbbrqn/3ppppp/r1p5/pp6/8/4PP2/PPPPN1PP/RNKBBRQ1 w FAf - 2 9", {26, + // 546, 14641, 344592, 9556962, 245137199}}, // 649 + // {"rnkrbbq1/pppppnp1/7p/8/1B1Q1p2/3P1P2/PPP1P1PP/RNKR1B1N w DAda - 2 9", + // {43, 887, 36240, 846858, 33185346, 851927292}}, // 650 + // {"1rkrbqnb/pppppp2/2n3p1/7p/3P3P/P4N2/1PP1PPP1/RNKRBQ1B w DAd - 0 9", + // {26, 622, 16049, 403921, 10786140, 285233838}}, // 651 + // {"rbnkr1bn/pp1pqp1p/2p1p3/6p1/3P4/7P/PPP1PPP1/RBNKRQBN w EAea - 0 9", + // {19, 566, 12257, 381197, 9107175, 293397389}}, // 652 + // {"r1kbrqb1/pppp2pp/2n1p1n1/5p1B/4PP2/P7/1PPP2PP/RNK1RQBN w EAea - 2 9", + // {39, 1359, 53626, 1876028, 73871486, 2633945690}}, // 653 + // {"rnkrqbbn/p1p3pp/1p1ppp2/8/1P6/3P2P1/PKP1PP1P/RN1RQBBN w da - 0 9", {26, + // 776, 20735, 611907, 16884013, 503561996}}, // 654 + // {"rnkrqnbb/ppp2p1p/3p4/4p1p1/3P3P/N1Q5/PPP1PPP1/R1KR1NBB w DAda - 0 9", + // {40, 1175, 45637, 1375884, 52620163, 1633655838}}, // 655 + // {"bbrnkrn1/p1pppp2/1p6/6pp/3q4/1P3QP1/P1PPPP1P/BBRNKRN1 w FCfc - 0 9", + // {34, 1398, 45749, 1712950, 57268492, 2059942014}}, // 656 + // {"br1bkrnq/1p2pppp/pnp5/3p4/P1P5/5P2/1P1PPKPP/BRNB1RNQ w fb - 2 9", {24, + // 501, 12237, 284936, 7049659, 177940764}}, // 657 + // {"brnkrbn1/pppppp1q/B6p/6p1/8/1P2PP2/P1PP2PP/BRNKR1NQ w EBeb - 0 9", {34, + // 815, 25868, 700970, 22006883, 639803952}}, // 658 + // {"br1krnqb/pppppp1p/1n4p1/8/8/P2NN3/2PPPPPP/BR1K1RQB w Beb - 2 9", {37, + // 1029, 36748, 1025712, 36214583, 1026195877}}, // 659 + // {"rbbnkr1q/p1p2ppp/1p1ppn2/8/1PP4P/8/P2PPPP1/RBBNKRNQ w FAfa - 0 9", {28, + // 755, 22623, 605106, 18972778, 513486101}}, // 660 + // {"r1b1krnq/pp2pppp/1bn5/2pp4/4N3/5P2/PPPPPRPP/R1BBK1NQ w Afa - 0 9", {24, + // 705, 17427, 532521, 13532966, 426443376}}, // 661 + // {"1nbkrbn1/rpppppqp/p7/6p1/4P3/3P2P1/PPP1KP1P/RNB1RBNQ w e - 1 9", {31, + // 800, 24748, 693366, 21193292, 625757852}}, // 662 + // {"r1bkrnqb/pp3ppp/n1ppp3/8/1P5P/P7/R1PPPPP1/1NBKRNQB w Eea - 0 9", {21, + // 482, 11417, 275339, 7112890, 180378139}}, // 663 + // {"rbnkbrnq/ppp1p2p/5p2/3p2p1/1B1P4/1N4P1/PPP1PP1P/RB1K1RNQ w FAfa - 0 9", + // {33, 780, 25532, 628945, 20756770, 535497008}}, // 664 + // {"rnk1brnq/pp1ppppp/2p5/b7/8/1P2P2P/P1PP1PPQ/RNKBBRN1 w FAfa - 3 9", {29, + // 648, 19043, 449637, 13722785, 341389148}}, // 665 + // {"rnkrbbnq/p1p3pp/5p2/1p1pp3/P7/1PN2P2/2PPP1PP/R1KRBBNQ w DAda - 0 9", + // {26, 827, 21865, 683167, 18916370, 589161126}}, // 666 + // {"r1krbnqb/p1pp1ppp/2n1p3/8/1p4P1/PPP5/3PPP1P/RNKRBNQB w DAda - 1 9", + // {25, 540, 14709, 331332, 9491817, 225389422}}, // 667 + // {"rbnkrnbq/ppp1pp2/3p2p1/2N5/P6p/2P5/1P1PPPPP/RB1KRNBQ w EAea - 0 9", + // {32, 790, 25107, 661207, 20906017, 578332225}}, // 668 + // {"rnkbrn1q/1ppppppb/8/p4N1p/8/P1N5/1PPPPPPP/R1KBR1BQ w EAea - 0 9", {31, + // 691, 20813, 510665, 15308408, 404129987}}, // 669 + // {"rnkrnbbq/p1p2ppp/3pp3/1p6/6P1/4PQ1B/PPPP1P1P/RNKRN1B1 w DAda - 0 9", + // {29, 558, 16800, 352887, 10825379, 246965507}}, // 670 + // {"rnkrnqbb/pp2p1p1/3p3p/2p2p2/5P2/1P1N4/P1PPPQPP/RNKR2BB w DAda - 0 9", + // {29, 762, 23210, 644936, 20522675, 596067005}}, // 671 + // {"bb1rknnr/ppqppppp/8/2p5/3P1N2/1P6/P1P1PPPP/BBQRKN1R w HDhd - 1 9", {33, + // 963, 32279, 1000890, 34552118, 1124738493}}, // 672 + // {"bqrbknnr/ppp1p2p/8/3p1p2/5p2/P3N2P/1PPPP1P1/BQRBK1NR w HChc - 0 9", + // {20, 398, 9009, 194859, 4834319, 113660536}}, // 673 + // {"b1rk1bnr/qpp1pppp/p4n2/3p4/3PPP2/7N/PPP3PP/BQRKNB1R w HChc - 1 9", {25, + // 648, 16587, 455720, 12200870, 351766307}}, // 674 + // {"bqkrnnrb/pppp2p1/4pp2/4P2p/6P1/7P/PPPP1P2/BQRKNNRB w GC - 1 9", {30, + // 493, 15118, 280726, 8786998, 181492621}}, // 675 + // {"q1brknnr/1p1ppppp/p7/2p5/8/1PPP4/P2RPPPP/QBB1KNNR w Hhd - 0 9", {25, + // 501, 13206, 290463, 7982978, 192717198}}, // 676 + // {"qrb1k1nr/ppppb1pp/6n1/4ppN1/3P4/4N3/PPP1PPPP/QRBBK2R w HBhb - 2 9", + // {31, 872, 26191, 739276, 22493014, 646855304}}, // 677 + // {"1rbknbnr/1ppp1pp1/q6p/p3p3/5P2/2PPB3/PP2P1PP/QR1KNBNR w HBhb - 0 9", + // {28, 1020, 28147, 984000, 27484692, 947786800}}, // 678 + // {"qrbk2rb/1ppp1ppp/5nn1/p3p3/1N6/P7/1PPPPPPP/QRB1KNRB w gb - 0 9", {23, + // 592, 14398, 395716, 10098215, 293988585}}, // 679 + // {"qbrk1nnr/1pp1pppp/2b5/p2p4/P2P2P1/8/1PP1PP1P/QBKRBNNR w hc - 1 9", {26, + // 654, 18103, 471653, 13740891, 373081138}}, // 680 + // {"qrkbbnnr/ppp2p1p/4p3/3p2p1/P7/2PP4/1P2PPPP/QRKBBNNR w HBhb - 0 9", {25, + // 626, 16616, 431634, 12079406, 324006164}}, // 681 + // {"qr1kbbnr/ppp1pp1p/4n1p1/2Pp4/6P1/4N3/PP1PPP1P/QRK1BBNR w HB d6 0 9", + // {26, 699, 18068, 497152, 13353359, 375702908}}, // 682 + // {"qrk1b1rb/p1pppppp/3nnQ2/1p6/1P3P2/3P4/P1P1P1PP/1RKNBNRB w GBgb - 3 9", + // {43, 1369, 55463, 1831200, 71514365, 2427477375}}, // 683 + // {"qbrk1nbr/pppp3p/5n2/4ppp1/3P1P2/4N3/PPP1P1PP/QBKRN1BR w hc - 0 9", {25, + // 752, 20165, 615263, 17493373, 543180234}}, // 684 + // {"qrkb1nbr/1pppppQp/3n4/p7/5p2/1P1N4/P1PPP1PP/1RKB1NBR w HBhb - 0 9", + // {45, 946, 40100, 966903, 39736157, 1051910977}}, // 685 + // {"qrk1nbbr/ppp1p1p1/4n2p/3p1p2/1P5P/3P2P1/P1P1PP2/QRKNNBBR w HBhb - 1 9", + // {32, 770, 25367, 646977, 21717615, 577979364}}, // 686 + // {"qrkn1rbb/pp2pppp/2p5/3p4/P2Qn1P1/1P6/2PPPP1P/1RKNNRBB w FBfb - 0 9", + // {38, 943, 35335, 868165, 31909835, 798405123}}, // 687 + // {"bbrqknnr/ppp4p/3pp3/5pp1/4PP2/5Q2/PPPP2PP/BBR1KNNR w HChc - 0 9", {36, + // 843, 29974, 758528, 26828059, 723306114}}, // 688 + // {"1rqbkn1r/p1p1pppp/1p5n/P2p4/3Pb1P1/8/1PP1PP1P/BRQBKNNR w HBhb - 0 9", + // {23, 778, 19482, 649789, 17337683, 579112676}}, // 689 + // {"br1knbnr/1qp1pppp/pp1p4/8/8/PP6/2PPPPPP/BRQKNBNR w HBhb - 2 9", {26, + // 697, 18835, 546622, 15280079, 473071890}}, // 690 + // {"brqk2rb/ppppp1pp/4np2/8/2n5/3P1Q2/PP2PPPP/BR1KNNRB w GBgb - 0 9", {32, + // 948, 30434, 885713, 29821322, 874251866}}, // 691 + // {"r1bqknnr/pp1pp1p1/5p1p/2p1b2N/2P5/8/PPQPPPPP/RBB1K1NR w HAha - 0 9", + // {31, 785, 25549, 659952, 22244193, 592797491}}, // 692 + // {"rqbbknnr/ppppp2p/5pp1/8/8/1P3PP1/PQPPP2P/R1BBKNNR w HAha - 0 9", {23, + // 391, 10163, 198450, 5576671, 121267576}}, // 693 + // {"rqbknbnr/1pp1p2p/p7/3p1pp1/7N/1PP5/P2PPPPP/RQBK1BNR w HAha - 0 9", {27, + // 676, 19606, 522428, 15955388, 448477218}}, // 694 + // {"rqb1nnrb/2ppkppp/1p2p3/p7/2PPP3/1P6/P4PPP/RQBKNNRB w GA - 1 9", {31, + // 727, 22895, 570647, 18361051, 483248153}}, // 695 + // {"rb1kbn1r/p1ppppp1/qp5n/7p/P7/RPP5/3PPPPP/1BQKBNNR w Hha - 2 9", {29, + // 837, 23815, 730083, 21279560, 682863811}}, // 696 + // {"rqkbb1nr/p1p2ppp/1p1p2n1/3Np3/4P3/5N2/PPPP1PPP/RQKBB2R w HAha - 0 9", + // {28, 717, 20663, 550987, 16347343, 453153783}}, // 697 + // {"rqknbbr1/p1pppp1p/1p3np1/8/4P3/2P2P1P/PP1P2P1/RQKNBBNR w HAa - 0 9", + // {27, 650, 18231, 475303, 13847463, 383256006}}, // 698 + // {"r1k1bnrb/1qpppppp/1p2n3/p7/1P5P/6P1/P1PPPP2/RQKNBNR1 w GAga - 1 9", + // {24, 806, 20693, 713220, 19382263, 686009788}}, // 699 + // {"rb1knnbr/1pp1ppp1/p2p3p/5q2/3B2P1/3P1P2/PPP1P2P/RBQKNN1R w HAha - 0 9", + // {34, 1360, 44096, 1605706, 51973672, 1837704407}}, // 700 + // {"rqkb1nbr/p1p1ppp1/1p3n1p/2Qp4/8/2P5/PP1PPPPP/R1KBNNBR w HAha - 2 9", + // {39, 983, 38218, 940989, 36347815, 918801645}}, // 701 + // {"rqknnbbr/2pppp2/pp5p/6p1/1P1P4/4PP2/P1P3PP/RQKNNBBR w HAha - 0 9", {26, + // 628, 17638, 464924, 13787303, 386125234}}, // 702 + // {"rqkn1rbb/1pp1pppp/p7/3p4/3Pn3/2P1PP2/PP4PP/RQKNNRBB w FAfa - 1 9", {20, + // 527, 12216, 321533, 8082183, 219311659}}, // 703 {"bbrkqn1r/1pppppp1/5n2/p7/1PP2P1p/7N/P2PP1PP/BBRKQN1R w HChc - 1 9", {36, 963, 35291, 973839, 35907489, 1034223364}}, // 704 - {"brkbqn1r/p2ppppp/7n/1p6/P1p3PP/8/1PPPPP1N/BRKBQ1NR w HBhb - 0 9", - {18, 583, 11790, 394603, 8858385, 304339862}}, // 705 - {"brkq1bnr/pp1ppp1p/8/2p2np1/P7/8/1PPPPPPP/BRKQNBNR w HBhb - 0 9", - {19, 552, 11811, 354260, 8432183, 262293169}}, // 706 - {"brkqnnrb/1ppppppp/8/8/p3P3/5N2/PPPP1PPP/BRKQ1NRB w GBgb - 3 9", - {21, 397, 9653, 204350, 5489836, 128389738}}, // 707 - {"rbbkq1nr/1p2pppp/p1p3nB/3p4/1Q1P4/6N1/PPP1PPPP/RB1K2NR w HAha - 0 9", - {40, 1132, 43404, 1260470, 47425783, 1415578783}}, // 708 - {"rkbbq1nr/1pppp1p1/4np2/p6p/8/PP3P2/1KPPP1PP/R1BBQNNR w ha - 0 9", - {24, 596, 15220, 402121, 10822049, 302056813}}, // 709 - {"r1bqn1nr/pkpppp1p/1p4pb/8/PN6/R7/1PPPPPPP/1KBQ1BNR w H - 2 9", - {33, 794, 25450, 649150, 20919309, 561073410}}, // 710 - {"rkb1nnrb/1pppq1pp/p4p2/4p3/5P2/1P1PB3/P1P1P1PP/RK1QNNRB w GAga - 0 9", - {26, 625, 17050, 442036, 12515042, 342967558}}, // 711 - {"rbkqbn1r/pppp1p1p/2n1p1p1/8/8/1P1PP1N1/P1P2PPP/RBKQB1NR w HAha - 1 9", - {30, 660, 20308, 492714, 15348335, 403323883}}, // 712 - {"rkqbb1n1/pppppppr/8/6np/5P2/8/PPPPP1PP/RKQBBNNR w HAa - 6 9", - {23, 500, 12154, 292936, 7519117, 196524441}}, // 713 - {"rkqnbbnr/ppppppp1/8/7p/3N4/6PP/PPPPPP2/RKQNBB1R w HAa - 0 9", - {24, 484, 12495, 284570, 7775173, 193947530}}, // 714 - {"rkqnb1rb/p1p1pppp/1p1p4/2n5/3P4/2P1N1N1/PP2PPPP/RKQ1B1RB w GAga - 0 9", - {28, 1020, 29124, 1027904, 30515456, 1073711823}}, // 715 - {"rbk1nnbr/1ppq1ppp/p2p4/4p3/P3B2P/2P5/1P1PPPP1/R1KQNNBR w HAha - 2 9", - {38, 998, 37265, 1047592, 38552638, 1139322479}}, // 716 - {"r1qbn1br/k1pppppp/6n1/pp6/5P1P/P7/1PPPP1PB/RKQBNN1R w HA - 1 9", - {22, 549, 12867, 348574, 8725809, 251613569}}, // 717 - {"rkqnn1br/pppp3p/4p1pb/5p2/P2P4/7P/1PP1PPPB/RKQNNB1R w HAha - 1 9", - {32, 659, 21249, 469701, 15434721, 365761521}}, // 718 - {"rk1nnrbb/p1p1pppp/1p6/3p1q2/P3P3/2NN4/1PPP1PPP/RKQ2RBB w FAfa - 3 9", - {29, 989, 29087, 980477, 29643404, 998848556}}, // 719 - {"bbrk1q1r/ppppppp1/3n4/7p/3Pn3/6PN/PPP1PPNP/BBRK1Q1R w HChc - 2 9", - {23, 712, 16551, 516177, 12995202, 411077508}}, // 720 - {"brkbnq1r/p1ppp2p/5ppn/1p6/5P2/1P1P2P1/P1P1P2P/BRKBNQNR w HBhb - 0 9", - {28, 856, 24984, 780503, 23529352, 754501112}}, // 721 - {"br1k1bnr/ppppp1pp/4np2/1B2P2q/3P4/8/PPP2PPP/BRKNQ1NR w HB - 3 9", - {36, 1214, 40615, 1328331, 45096834, 1470987023}}, // 722 - {"brk1qnrb/pnppp1p1/1p6/5p1p/8/5PPP/PPPPP1R1/BRKNQN1B w Bgb - 0 9", - {22, 551, 13111, 353317, 9040545, 259643605}}, // 723 - {"rbbkn1nr/1ppp2pp/p3p3/2q2p2/3P4/6P1/PPPBPP1P/RB1KNQNR w HAha - 0 9", - {31, 1060, 31332, 1015099, 30314172, 976268967}}, // 724 - {"rkbbn1nr/ppppp1pp/8/6N1/5p2/1q6/P1PPPPPP/RKBBN1QR w HAha - 0 9", - {3, 72, 1919, 50827, 1400832, 39654253}}, // 725 - {"rkb2bnr/pp2pppp/2p1n3/3p4/q2P4/5NP1/PPP1PP1P/RKBNQBR1 w Aha - 0 9", - {29, 861, 24504, 763454, 22763215, 731511256}}, // 726 - {"rkbq1nrb/ppppppp1/7p/8/1P1n4/P4P1P/2PPP1P1/RKBNQNRB w GAga - 0 9", - {25, 672, 17631, 473864, 12954224, 361237536}}, // 727 - {"rbknb1nr/ppp1qp1p/6p1/3pp3/3P3P/2B1P3/PPP2PP1/RBKN1QNR w HAha - 1 9", - {27, 857, 24688, 792538, 23790033, 768247869}}, // 728 - {"rknbbq1r/p1pppppp/1p2N3/8/3n4/2P5/PP1PPPPP/RK1BBQNR w HAha - 4 9", - {29, 763, 22138, 574054, 16926075, 447896703}}, // 729 - {"r1nqbbnr/1pppp1pp/1k6/p4p2/8/4P3/PPPP1PPP/RKN1BBNR w HA - 0 9", - {26, 658, 17302, 464039, 12380488, 349047256}}, // 730 - {"rkn2qrb/ppp1pppp/6n1/1b1p4/1P6/4PPB1/P1PP2PP/RKNQ1NRB w GAga - 3 9", - {23, 574, 14070, 370324, 9501401, 263870337}}, // 731 - {"rbkn2br/ppppp1p1/4np1p/1P5q/8/2P1N3/P2PPPPP/RBK1QNBR w HAha - 1 9", - {29, 992, 29506, 999564, 30148787, 1045942540}}, // 732 - {"1knbqnbr/1ppppp1p/r5p1/p7/7P/2PN2P1/PP1PPP2/RK1BQNBR w HAh - 2 9", - {26, 698, 19395, 512023, 14848229, 402599313}}, // 733 - {"rk1qnbbr/pnpppp1p/6p1/1p6/3P4/1P6/P1P1PPPP/RKNQNBBR w HAha - 1 9", - {20, 480, 11159, 287539, 7425917, 203194521}}, // 734 - {"rknqnrbb/pp1p2p1/5p1p/2p1p3/2P1P3/P2P4/1P3PPP/RKNQNRBB w FAfa - 0 9", - {26, 679, 18116, 494953, 13790137, 392629571}}, // 735 - {"bbrk2qr/pp1p1ppp/3n2n1/2p1p3/3P1P2/6N1/PPP1P1PP/BBRKN1QR w HChc - 0 9", - {26, 790, 21521, 673269, 19259490, 617563700}}, // 736 - {"b1krnnqr/1p1ppppp/p1p5/b6B/P7/4P1N1/1PPP1PPP/BRK1N1QR w HB - 2 9", - {26, 625, 16451, 415452, 11490615, 304805107}}, // 737 - {"1rknnbqr/3ppppp/p7/1pp5/4b2P/P4P2/1PPPP1PR/BRKNNBQ1 w Bhb - 1 9", - {24, 757, 19746, 618777, 17275100, 544309489}}, // 738 - {"br1nn1rb/pppkpqpp/3p1p2/8/PP6/4N3/1KPPPPPP/BR2NQRB w - - 3 9", - {24, 682, 17129, 482711, 13057308, 375033550}}, // 739 - {"rbbkn1qr/pppp2p1/6np/4pp2/7N/7P/PPPPPPPR/RBBK1NQ1 w Aha - 0 9", - {22, 586, 14158, 409891, 10607781, 324452612}}, // 740 - {"rk1bn1qr/pppbpppp/4n3/4p3/4P3/5P2/PPPP2PP/RKBB1NQR w HAha - 1 9", - {22, 530, 13440, 348004, 9514787, 259898748}}, // 741 - {"rkbnnbqr/1ppp1ppp/p7/4p3/8/QP3P2/P1PPP1PP/RKBNNB1R w HAha - 0 9", - {29, 705, 21511, 551042, 17524731, 472356665}}, // 742 - {"1kbnnqrb/1pp1p1pp/r4p2/p2p4/N4P2/3P4/PPP1P1PP/RKB1NQRB w GAg - 2 9", - {21, 623, 14979, 437554, 11601134, 343214006}}, // 743 - {"rbknbn1r/pppp1p1p/4p1q1/8/P1P3Pp/8/1P1PPP2/RBKNBNQR w HAha - 0 9", - {30, 813, 24959, 708454, 23379040, 692576573}}, // 744 - {"rk1bb1qr/2pppppp/p2nn3/1p4P1/6QP/8/PPPPPP2/RKNBBN1R w HAha - 2 9", - {36, 857, 30124, 757524, 26485812, 696999449}}, // 745 - {"rkn1bbqr/p2ppppp/2p1n3/1p6/4PP2/6PP/PPPP4/RKNNBBQR w HAha - 0 9", - {33, 687, 22744, 511018, 17101732, 412778368}}, // 746 - {"rkn1bqrb/pnp1pppp/3p4/8/Pp6/1N2NP2/1PPPP1PP/RK2BQRB w GAga - 0 9", - {28, 591, 17174, 406025, 12182448, 312575205}}, // 747 - {"rbk1n1br/ppp1ppqp/2n5/2Np2p1/8/2P5/PPBPPPPP/R1KN1QBR w HAha - 4 9", - {35, 930, 30663, 844433, 27160490, 780616047}}, // 748 - {"rknbn1br/1ppp1ppp/p3p3/8/1q6/2P2N1P/P2PPPP1/RKNB1QBR w HAha - 0 9", - {4, 157, 3697, 138102, 3454704, 125373395}}, // 749 - {"rkn1qbbr/pp3ppp/4n3/2ppp3/4P1P1/P2P4/1PP2P1P/RKNNQBBR w HAha - 0 9", - {28, 840, 24437, 771328, 23200961, 756489357}}, // 750 - {"rkn1qrbb/pp1ppp2/2p1n1p1/7p/2P2P1P/6P1/PP1PP3/RKNNQRBB w FAfa - 1 9", - {32, 867, 27595, 757836, 24485663, 688115847}}, // 751 - {"b1rknnrq/bpppp1p1/p6p/5p1P/6P1/4N3/PPPPPP2/BBRKN1RQ w GCgc - 1 9", - {33, 851, 28888, 763967, 26686205, 731944177}}, // 752 - {"brkb1nr1/pppppp2/3n2pp/3B4/1P6/4P3/PqPP1PPP/BRK1NNRQ w GBgb - 2 9", - {4, 98, 2965, 76143, 2352530, 64251468}}, // 753 - {"brk1nbrq/1ppppn1p/6p1/p4p2/P5P1/5R2/1PPPPP1P/BRKNNB1Q w Bgb - 0 9", - {29, 922, 27709, 879527, 27463717, 888881062}}, // 754 - {"brkn1rqb/1p1ppppp/3n4/p1p5/1P3P2/8/PNPPP1PP/BR1KNRQB w fb - 1 9", - {29, 633, 19399, 469818, 15076198, 396737074}}, // 755 - {"rb1k1nrq/pbp1pppp/1p1p1n2/8/5P2/4NN1P/PPPPP1P1/RBBK2RQ w GAga - 2 9", - {28, 841, 24056, 710751, 20772996, 613798447}}, // 756 - {"rkbbnnrq/p1pp3p/4p1p1/1p3p2/P6P/1P6/1BPPPPP1/RK1BNNRQ w GAga - 0 9", - {33, 957, 30668, 907217, 29735654, 903933626}}, // 757 - {"rk2nbrq/p1ppppp1/bpn5/7p/6P1/2N2P2/PPPPP1QP/RKB1NBR1 w GAga - 2 9", - {24, 687, 18206, 544627, 15518417, 484217179}}, // 758 - {"rkbn1r1b/pp1pppnp/6q1/2p3p1/5P1P/4N3/PPPPP1P1/RKB1NRQB w FAfa - 1 9", - {23, 831, 21254, 754622, 21126103, 744755212}}, // 759 - {"rbknb1rq/ppp1p1p1/3pnp1p/8/6PP/2PP4/PP2PP2/RBKNBNRQ w GAga - 0 9", - {31, 838, 26800, 736910, 24008129, 677776408}}, // 760 - {"rknbb1rq/p1pn1ppp/4p3/1p1p4/2P5/1P2N1P1/P2PPP1P/RKNBB1RQ w GAga - 1 9", - {29, 830, 24798, 721630, 22243832, 660040360}}, // 761 - {"rk1nbbrq/pp1p1ppp/3n4/P3p3/2p4P/8/1PPPPPP1/RKNNBBRQ w GAga - 1 9", - {24, 484, 12776, 297419, 8379748, 214004367}}, // 762 - {"rknnbr1b/ppp2pqp/3p4/4p1p1/7P/3P1P2/PPP1P1P1/RKNNBRQB w FAfa - 0 9", - {32, 838, 26408, 740701, 23472124, 699211365}}, // 763 - {"rb1k1rbq/ppppN1pp/2nn4/5p2/7P/8/PPPPPPP1/RBK1NRBQ w FA - 1 9", - {27, 800, 22785, 701742, 20804424, 660917073}}, // 764 - {"r1nbnrbq/kppppp1p/6p1/8/p1PP1P2/4P3/PP4PP/RKNBNRBQ w FA - 1 9", - {28, 757, 21198, 602699, 17180857, 507618340}}, // 765 - {"rkn1rbbq/p1pppppp/2n5/1pP5/8/1N2P3/PP1P1PPP/RK1NRBBQ w EAea - 1 9", - {22, 483, 11890, 283679, 7497674, 191130942}}, // 766 - {"rknnrqbb/2pppppp/8/p7/Np3P2/3P4/PPP1P1PP/RKN1RQBB w EAea - 0 9", - {25, 536, 14456, 339180, 9694947, 245669668}}, // 767 - {"bb1rknrn/1qppppp1/1p4B1/p6N/8/2P5/PP1PPPPP/B1QRK1RN w GDgd - 1 9", - {32, 715, 22421, 575008, 17860156, 502410909}}, // 768 - {"b1rbknrn/qpp1ppp1/p6p/3p4/2P5/1P1P1P2/P3P1PP/BQRBKNRN w GCgc - 0 9", - {30, 818, 24421, 688711, 20981488, 611986786}}, // 769 - {"bqkrnbrn/1pp1pp1p/p7/1B1p2p1/4P3/7P/PPPP1PP1/BQKRN1RN w - - 0 9", - {28, 676, 18366, 478054, 13126287, 363765666}}, // 770 - {"bqrknrnb/1p2ppp1/p1pp3p/8/3P1P2/1PP5/P3P1PP/BQRKNRNB w FCfc - 0 9", - {31, 646, 20686, 455607, 14984618, 349082278}}, // 771 - {"qbbrkn1r/pppppp1p/8/6p1/2P1Pn1P/6N1/PP1P1PP1/QBBRKNR1 w GDd - 3 9", - {20, 532, 11581, 303586, 7512432, 202967948}}, // 772 - {"1rbbknr1/p1ppp1pp/1pq2pn1/8/3P4/P3P3/QPP2PPP/1RBBKNRN w GBgb - 3 9", - {31, 1002, 30581, 999607, 30642468, 1009228283}}, // 773 - {"qrbkn1rn/pppp1ppp/8/6b1/P1P1Pp2/8/1P1P2PP/QRBKNBRN w GBgb - 0 9", - {22, 505, 12447, 304863, 8192621, 214730959}}, // 774 - {"qrbk1rnb/p2ppp1p/5n2/1pp3p1/8/7P/PPPPPPPN/QRBKR1NB w Bfb - 0 9", - {20, 619, 13448, 449630, 10571176, 369603424}}, // 775 - {"qbrkb1r1/ppp2ppp/3pn1n1/P3p3/4P3/3P4/1PP2PPP/QBRKBNRN w GCgc - 1 9", - {26, 755, 20596, 604483, 17164382, 510878835}}, // 776 - {"qrkbb1r1/ppp1pnpp/3p2n1/5p2/1P3P2/2Q3N1/P1PPP1PP/1RKBB1RN w GBgb - 0 9", - {35, 918, 32244, 870888, 30933394, 867833733}}, // 777 - {"qrknbbrn/ppp1ppp1/8/7p/2Bp4/4PPP1/PPPP3P/QRKNB1RN w GBgb - 0 9", - {27, 593, 16168, 376808, 10422676, 258348640}}, // 778 - {"qrk1brnb/ppppp3/4n2p/5pp1/2PP4/2N4P/PP2PPP1/QRK1BRNB w FBfb - 2 9", - {24, 672, 17447, 506189, 13765777, 414930519}}, // 779 - {"qbrknrb1/p2ppppp/2p3n1/8/p4P2/6PP/1PPPP3/QBRKNRBN w FCfc - 0 9", - {29, 759, 23235, 634493, 20416668, 584870558}}, // 780 + // {"brkbqn1r/p2ppppp/7n/1p6/P1p3PP/8/1PPPPP1N/BRKBQ1NR w HBhb - 0 9", {18, + // 583, 11790, 394603, 8858385, 304339862}}, // 705 + // {"brkq1bnr/pp1ppp1p/8/2p2np1/P7/8/1PPPPPPP/BRKQNBNR w HBhb - 0 9", {19, + // 552, 11811, 354260, 8432183, 262293169}}, // 706 + // {"brkqnnrb/1ppppppp/8/8/p3P3/5N2/PPPP1PPP/BRKQ1NRB w GBgb - 3 9", {21, + // 397, 9653, 204350, 5489836, 128389738}}, // 707 + // {"rbbkq1nr/1p2pppp/p1p3nB/3p4/1Q1P4/6N1/PPP1PPPP/RB1K2NR w HAha - 0 9", + // {40, 1132, 43404, 1260470, 47425783, 1415578783}}, // 708 + // {"rkbbq1nr/1pppp1p1/4np2/p6p/8/PP3P2/1KPPP1PP/R1BBQNNR w ha - 0 9", {24, + // 596, 15220, 402121, 10822049, 302056813}}, // 709 + // {"r1bqn1nr/pkpppp1p/1p4pb/8/PN6/R7/1PPPPPPP/1KBQ1BNR w H - 2 9", {33, + // 794, 25450, 649150, 20919309, 561073410}}, // 710 + // {"rkb1nnrb/1pppq1pp/p4p2/4p3/5P2/1P1PB3/P1P1P1PP/RK1QNNRB w GAga - 0 9", + // {26, 625, 17050, 442036, 12515042, 342967558}}, // 711 + // {"rbkqbn1r/pppp1p1p/2n1p1p1/8/8/1P1PP1N1/P1P2PPP/RBKQB1NR w HAha - 1 9", + // {30, 660, 20308, 492714, 15348335, 403323883}}, // 712 + // {"rkqbb1n1/pppppppr/8/6np/5P2/8/PPPPP1PP/RKQBBNNR w HAa - 6 9", {23, 500, + // 12154, 292936, 7519117, 196524441}}, // 713 + // {"rkqnbbnr/ppppppp1/8/7p/3N4/6PP/PPPPPP2/RKQNBB1R w HAa - 0 9", {24, 484, + // 12495, 284570, 7775173, 193947530}}, // 714 + // {"rkqnb1rb/p1p1pppp/1p1p4/2n5/3P4/2P1N1N1/PP2PPPP/RKQ1B1RB w GAga - 0 9", + // {28, 1020, 29124, 1027904, 30515456, 1073711823}}, // 715 + // {"rbk1nnbr/1ppq1ppp/p2p4/4p3/P3B2P/2P5/1P1PPPP1/R1KQNNBR w HAha - 2 9", + // {38, 998, 37265, 1047592, 38552638, 1139322479}}, // 716 + // {"r1qbn1br/k1pppppp/6n1/pp6/5P1P/P7/1PPPP1PB/RKQBNN1R w HA - 1 9", {22, + // 549, 12867, 348574, 8725809, 251613569}}, // 717 + // {"rkqnn1br/pppp3p/4p1pb/5p2/P2P4/7P/1PP1PPPB/RKQNNB1R w HAha - 1 9", {32, + // 659, 21249, 469701, 15434721, 365761521}}, // 718 + // {"rk1nnrbb/p1p1pppp/1p6/3p1q2/P3P3/2NN4/1PPP1PPP/RKQ2RBB w FAfa - 3 9", + // {29, 989, 29087, 980477, 29643404, 998848556}}, // 719 + // {"bbrk1q1r/ppppppp1/3n4/7p/3Pn3/6PN/PPP1PPNP/BBRK1Q1R w HChc - 2 9", {23, + // 712, 16551, 516177, 12995202, 411077508}}, // 720 + // {"brkbnq1r/p1ppp2p/5ppn/1p6/5P2/1P1P2P1/P1P1P2P/BRKBNQNR w HBhb - 0 9", + // {28, 856, 24984, 780503, 23529352, 754501112}}, // 721 + // {"br1k1bnr/ppppp1pp/4np2/1B2P2q/3P4/8/PPP2PPP/BRKNQ1NR w HB - 3 9", {36, + // 1214, 40615, 1328331, 45096834, 1470987023}}, // 722 + // {"brk1qnrb/pnppp1p1/1p6/5p1p/8/5PPP/PPPPP1R1/BRKNQN1B w Bgb - 0 9", {22, + // 551, 13111, 353317, 9040545, 259643605}}, // 723 + // {"rbbkn1nr/1ppp2pp/p3p3/2q2p2/3P4/6P1/PPPBPP1P/RB1KNQNR w HAha - 0 9", + // {31, 1060, 31332, 1015099, 30314172, 976268967}}, // 724 + // {"rkbbn1nr/ppppp1pp/8/6N1/5p2/1q6/P1PPPPPP/RKBBN1QR w HAha - 0 9", {3, + // 72, 1919, 50827, 1400832, 39654253}}, // 725 + // {"rkb2bnr/pp2pppp/2p1n3/3p4/q2P4/5NP1/PPP1PP1P/RKBNQBR1 w Aha - 0 9", + // {29, 861, 24504, 763454, 22763215, 731511256}}, // 726 + // {"rkbq1nrb/ppppppp1/7p/8/1P1n4/P4P1P/2PPP1P1/RKBNQNRB w GAga - 0 9", {25, + // 672, 17631, 473864, 12954224, 361237536}}, // 727 + // {"rbknb1nr/ppp1qp1p/6p1/3pp3/3P3P/2B1P3/PPP2PP1/RBKN1QNR w HAha - 1 9", + // {27, 857, 24688, 792538, 23790033, 768247869}}, // 728 + // {"rknbbq1r/p1pppppp/1p2N3/8/3n4/2P5/PP1PPPPP/RK1BBQNR w HAha - 4 9", {29, + // 763, 22138, 574054, 16926075, 447896703}}, // 729 + // {"r1nqbbnr/1pppp1pp/1k6/p4p2/8/4P3/PPPP1PPP/RKN1BBNR w HA - 0 9", {26, + // 658, 17302, 464039, 12380488, 349047256}}, // 730 + // {"rkn2qrb/ppp1pppp/6n1/1b1p4/1P6/4PPB1/P1PP2PP/RKNQ1NRB w GAga - 3 9", + // {23, 574, 14070, 370324, 9501401, 263870337}}, // 731 + // {"rbkn2br/ppppp1p1/4np1p/1P5q/8/2P1N3/P2PPPPP/RBK1QNBR w HAha - 1 9", + // {29, 992, 29506, 999564, 30148787, 1045942540}}, // 732 + // {"1knbqnbr/1ppppp1p/r5p1/p7/7P/2PN2P1/PP1PPP2/RK1BQNBR w HAh - 2 9", {26, + // 698, 19395, 512023, 14848229, 402599313}}, // 733 + // {"rk1qnbbr/pnpppp1p/6p1/1p6/3P4/1P6/P1P1PPPP/RKNQNBBR w HAha - 1 9", {20, + // 480, 11159, 287539, 7425917, 203194521}}, // 734 + // {"rknqnrbb/pp1p2p1/5p1p/2p1p3/2P1P3/P2P4/1P3PPP/RKNQNRBB w FAfa - 0 9", + // {26, 679, 18116, 494953, 13790137, 392629571}}, // 735 + // {"bbrk2qr/pp1p1ppp/3n2n1/2p1p3/3P1P2/6N1/PPP1P1PP/BBRKN1QR w HChc - 0 9", + // {26, 790, 21521, 673269, 19259490, 617563700}}, // 736 + // {"b1krnnqr/1p1ppppp/p1p5/b6B/P7/4P1N1/1PPP1PPP/BRK1N1QR w HB - 2 9", {26, + // 625, 16451, 415452, 11490615, 304805107}}, // 737 + // {"1rknnbqr/3ppppp/p7/1pp5/4b2P/P4P2/1PPPP1PR/BRKNNBQ1 w Bhb - 1 9", {24, + // 757, 19746, 618777, 17275100, 544309489}}, // 738 + // {"br1nn1rb/pppkpqpp/3p1p2/8/PP6/4N3/1KPPPPPP/BR2NQRB w - - 3 9", {24, + // 682, 17129, 482711, 13057308, 375033550}}, // 739 + // {"rbbkn1qr/pppp2p1/6np/4pp2/7N/7P/PPPPPPPR/RBBK1NQ1 w Aha - 0 9", {22, + // 586, 14158, 409891, 10607781, 324452612}}, // 740 + // {"rk1bn1qr/pppbpppp/4n3/4p3/4P3/5P2/PPPP2PP/RKBB1NQR w HAha - 1 9", {22, + // 530, 13440, 348004, 9514787, 259898748}}, // 741 + // {"rkbnnbqr/1ppp1ppp/p7/4p3/8/QP3P2/P1PPP1PP/RKBNNB1R w HAha - 0 9", {29, + // 705, 21511, 551042, 17524731, 472356665}}, // 742 + // {"1kbnnqrb/1pp1p1pp/r4p2/p2p4/N4P2/3P4/PPP1P1PP/RKB1NQRB w GAg - 2 9", + // {21, 623, 14979, 437554, 11601134, 343214006}}, // 743 + // {"rbknbn1r/pppp1p1p/4p1q1/8/P1P3Pp/8/1P1PPP2/RBKNBNQR w HAha - 0 9", {30, + // 813, 24959, 708454, 23379040, 692576573}}, // 744 + // {"rk1bb1qr/2pppppp/p2nn3/1p4P1/6QP/8/PPPPPP2/RKNBBN1R w HAha - 2 9", {36, + // 857, 30124, 757524, 26485812, 696999449}}, // 745 + // {"rkn1bbqr/p2ppppp/2p1n3/1p6/4PP2/6PP/PPPP4/RKNNBBQR w HAha - 0 9", {33, + // 687, 22744, 511018, 17101732, 412778368}}, // 746 + // {"rkn1bqrb/pnp1pppp/3p4/8/Pp6/1N2NP2/1PPPP1PP/RK2BQRB w GAga - 0 9", {28, + // 591, 17174, 406025, 12182448, 312575205}}, // 747 + // {"rbk1n1br/ppp1ppqp/2n5/2Np2p1/8/2P5/PPBPPPPP/R1KN1QBR w HAha - 4 9", + // {35, 930, 30663, 844433, 27160490, 780616047}}, // 748 + // {"rknbn1br/1ppp1ppp/p3p3/8/1q6/2P2N1P/P2PPPP1/RKNB1QBR w HAha - 0 9", {4, + // 157, 3697, 138102, 3454704, 125373395}}, // 749 + // {"rkn1qbbr/pp3ppp/4n3/2ppp3/4P1P1/P2P4/1PP2P1P/RKNNQBBR w HAha - 0 9", + // {28, 840, 24437, 771328, 23200961, 756489357}}, // 750 + // {"rkn1qrbb/pp1ppp2/2p1n1p1/7p/2P2P1P/6P1/PP1PP3/RKNNQRBB w FAfa - 1 9", + // {32, 867, 27595, 757836, 24485663, 688115847}}, // 751 + // {"b1rknnrq/bpppp1p1/p6p/5p1P/6P1/4N3/PPPPPP2/BBRKN1RQ w GCgc - 1 9", {33, + // 851, 28888, 763967, 26686205, 731944177}}, // 752 + // {"brkb1nr1/pppppp2/3n2pp/3B4/1P6/4P3/PqPP1PPP/BRK1NNRQ w GBgb - 2 9", {4, + // 98, 2965, 76143, 2352530, 64251468}}, // 753 + // {"brk1nbrq/1ppppn1p/6p1/p4p2/P5P1/5R2/1PPPPP1P/BRKNNB1Q w Bgb - 0 9", + // {29, 922, 27709, 879527, 27463717, 888881062}}, // 754 + // {"brkn1rqb/1p1ppppp/3n4/p1p5/1P3P2/8/PNPPP1PP/BR1KNRQB w fb - 1 9", {29, + // 633, 19399, 469818, 15076198, 396737074}}, // 755 + // {"rb1k1nrq/pbp1pppp/1p1p1n2/8/5P2/4NN1P/PPPPP1P1/RBBK2RQ w GAga - 2 9", + // {28, 841, 24056, 710751, 20772996, 613798447}}, // 756 + // {"rkbbnnrq/p1pp3p/4p1p1/1p3p2/P6P/1P6/1BPPPPP1/RK1BNNRQ w GAga - 0 9", + // {33, 957, 30668, 907217, 29735654, 903933626}}, // 757 + // {"rk2nbrq/p1ppppp1/bpn5/7p/6P1/2N2P2/PPPPP1QP/RKB1NBR1 w GAga - 2 9", + // {24, 687, 18206, 544627, 15518417, 484217179}}, // 758 + // {"rkbn1r1b/pp1pppnp/6q1/2p3p1/5P1P/4N3/PPPPP1P1/RKB1NRQB w FAfa - 1 9", + // {23, 831, 21254, 754622, 21126103, 744755212}}, // 759 + // {"rbknb1rq/ppp1p1p1/3pnp1p/8/6PP/2PP4/PP2PP2/RBKNBNRQ w GAga - 0 9", {31, + // 838, 26800, 736910, 24008129, 677776408}}, // 760 + // {"rknbb1rq/p1pn1ppp/4p3/1p1p4/2P5/1P2N1P1/P2PPP1P/RKNBB1RQ w GAga - 1 9", + // {29, 830, 24798, 721630, 22243832, 660040360}}, // 761 + // {"rk1nbbrq/pp1p1ppp/3n4/P3p3/2p4P/8/1PPPPPP1/RKNNBBRQ w GAga - 1 9", {24, + // 484, 12776, 297419, 8379748, 214004367}}, // 762 + // {"rknnbr1b/ppp2pqp/3p4/4p1p1/7P/3P1P2/PPP1P1P1/RKNNBRQB w FAfa - 0 9", + // {32, 838, 26408, 740701, 23472124, 699211365}}, // 763 + // {"rb1k1rbq/ppppN1pp/2nn4/5p2/7P/8/PPPPPPP1/RBK1NRBQ w FA - 1 9", {27, + // 800, 22785, 701742, 20804424, 660917073}}, // 764 + // {"r1nbnrbq/kppppp1p/6p1/8/p1PP1P2/4P3/PP4PP/RKNBNRBQ w FA - 1 9", {28, + // 757, 21198, 602699, 17180857, 507618340}}, // 765 + // {"rkn1rbbq/p1pppppp/2n5/1pP5/8/1N2P3/PP1P1PPP/RK1NRBBQ w EAea - 1 9", + // {22, 483, 11890, 283679, 7497674, 191130942}}, // 766 + // {"rknnrqbb/2pppppp/8/p7/Np3P2/3P4/PPP1P1PP/RKN1RQBB w EAea - 0 9", {25, + // 536, 14456, 339180, 9694947, 245669668}}, // 767 + // {"bb1rknrn/1qppppp1/1p4B1/p6N/8/2P5/PP1PPPPP/B1QRK1RN w GDgd - 1 9", {32, + // 715, 22421, 575008, 17860156, 502410909}}, // 768 + // {"b1rbknrn/qpp1ppp1/p6p/3p4/2P5/1P1P1P2/P3P1PP/BQRBKNRN w GCgc - 0 9", + // {30, 818, 24421, 688711, 20981488, 611986786}}, // 769 + // {"bqkrnbrn/1pp1pp1p/p7/1B1p2p1/4P3/7P/PPPP1PP1/BQKRN1RN w - - 0 9", {28, + // 676, 18366, 478054, 13126287, 363765666}}, // 770 + // {"bqrknrnb/1p2ppp1/p1pp3p/8/3P1P2/1PP5/P3P1PP/BQRKNRNB w FCfc - 0 9", + // {31, 646, 20686, 455607, 14984618, 349082278}}, // 771 + // {"qbbrkn1r/pppppp1p/8/6p1/2P1Pn1P/6N1/PP1P1PP1/QBBRKNR1 w GDd - 3 9", + // {20, 532, 11581, 303586, 7512432, 202967948}}, // 772 + // {"1rbbknr1/p1ppp1pp/1pq2pn1/8/3P4/P3P3/QPP2PPP/1RBBKNRN w GBgb - 3 9", + // {31, 1002, 30581, 999607, 30642468, 1009228283}}, // 773 + // {"qrbkn1rn/pppp1ppp/8/6b1/P1P1Pp2/8/1P1P2PP/QRBKNBRN w GBgb - 0 9", {22, + // 505, 12447, 304863, 8192621, 214730959}}, // 774 + // {"qrbk1rnb/p2ppp1p/5n2/1pp3p1/8/7P/PPPPPPPN/QRBKR1NB w Bfb - 0 9", {20, + // 619, 13448, 449630, 10571176, 369603424}}, // 775 + // {"qbrkb1r1/ppp2ppp/3pn1n1/P3p3/4P3/3P4/1PP2PPP/QBRKBNRN w GCgc - 1 9", + // {26, 755, 20596, 604483, 17164382, 510878835}}, // 776 + // {"qrkbb1r1/ppp1pnpp/3p2n1/5p2/1P3P2/2Q3N1/P1PPP1PP/1RKBB1RN w GBgb - 0 + // 9", {35, 918, 32244, 870888, 30933394, 867833733}}, // 777 + // {"qrknbbrn/ppp1ppp1/8/7p/2Bp4/4PPP1/PPPP3P/QRKNB1RN w GBgb - 0 9", {27, + // 593, 16168, 376808, 10422676, 258348640}}, // 778 + // {"qrk1brnb/ppppp3/4n2p/5pp1/2PP4/2N4P/PP2PPP1/QRK1BRNB w FBfb - 2 9", + // {24, 672, 17447, 506189, 13765777, 414930519}}, // 779 + // {"qbrknrb1/p2ppppp/2p3n1/8/p4P2/6PP/1PPPP3/QBRKNRBN w FCfc - 0 9", {29, + // 759, 23235, 634493, 20416668, 584870558}}, // 780 {"1rkb1rbn/p1pp1ppp/3np3/1p6/4qP2/3NB3/PPPPPRPP/QRKB3N w Bfb - 0 9", {22, 923, 22585, 914106, 24049880, 957218571}}, // 781 - {"1rknrbbn/p1pp1p1p/8/1p2p1p1/4qPP1/2P5/PP1PP1BP/QRKNR1BN w EBeb - 0 9", - {28, 1309, 36355, 1568968, 44576409, 1846382333}}, // 782 - {"qrk1rn1b/ppppp2p/4n3/3b1pp1/4P2P/5BP1/PPPP1P2/QRKNRNB1 w EBeb - 3 9", - {26, 839, 22189, 726354, 19978260, 661207281}}, // 783 - {"bbrqk1rn/pp1ppppp/8/2p5/2P1P3/5n1P/PPBP1PP1/B1RQKNRN w GCgc - 1 9", - {3, 95, 2690, 85038, 2518864, 80775549}}, // 784 - {"brqbk2n/pppppprp/8/6p1/1P3n2/5P2/P1PPP1PP/R1QBKNRN w Gb - 2 9", - {22, 593, 13255, 362760, 8922397, 253271592}}, // 785 - {"brqknbr1/pp3ppp/3p2n1/2p1p3/2P5/5P2/PPKPP1PP/BRQ1NBRN w gb - 0 9", - {21, 590, 13190, 397355, 9581695, 304103516}}, // 786 - {"1rqknrnb/2pp1ppp/p3p3/1p6/P2P4/5bP1/1PP1PP1P/BRQKNRNB w FBfb - 0 9", - {24, 737, 20052, 598439, 17948681, 536330341}}, // 787 - {"rbb1k1rn/p1pqpppp/6n1/1p1p4/5P2/3PP3/PPP1K1PP/RBBQ1NRN w ga - 3 9", - {24, 694, 16773, 513782, 13094823, 419402704}}, // 788 - {"rqbbknr1/1ppp2pp/p5n1/4pp2/P7/1PP5/1Q1PPPPP/R1BBKNRN w GAga - 0 9", - {24, 600, 15347, 408207, 11029596, 308553169}}, // 789 - {"rqbknbrn/2pppppp/6Q1/pp6/8/2P5/PP1PPPPP/R1BKNBRN w GAga - 2 9", - {40, 949, 34100, 889887, 31296485, 881529007}}, // 790 - {"rqbknr1b/pp1ppp2/2p2n1p/6p1/8/3P1PPP/PPP1P3/RQBKNRNB w FAfa - 0 9", - {20, 560, 12275, 373921, 8687544, 277906201}}, // 791 - {"rbqkbnrn/p3pppp/1p6/3p4/P1p3P1/1P6/1QPPPP1P/RB1KBNRN w GAga - 0 9", - {30, 1155, 35865, 1351455, 43092716, 1614019629}}, // 792 - {"rqkbb1rn/p1p1pppn/1p1p4/7p/4PP2/7P/PPPPB1P1/RQK1BNRN w GAga - 1 9", - {30, 701, 20804, 515942, 15450970, 401499189}}, // 793 - {"rqknbbrn/1p2pp1p/3p2p1/p1p5/P2P4/1P6/1KP1PPPP/RQ1NBBRN w ga - 0 9", - {28, 756, 21655, 610320, 17989811, 525585996}}, // 794 - {"rqknbrnb/1pp3pp/5p2/p2pp3/P7/3PPN2/1PP2PPP/RQKNBR1B w FAfa - 0 9", - {26, 731, 19509, 550395, 15209404, 439767476}}, // 795 - {"rbqkr1bn/p1pppp1p/1p1n4/6p1/7P/3P1PP1/PPP1P3/RBQKNRBN w FAa - 0 9", - {27, 586, 16282, 381604, 10905865, 274364342}}, // 796 - {"rqk1nrb1/ppbp1ppp/4p1n1/2p5/7P/1PP5/P2PPPP1/RQKBNRBN w FAfa - 1 9", - {27, 749, 21480, 602318, 18084787, 520547029}}, // 797 - {"rqknrbbn/pp1p1ppp/4p3/2p5/3P2P1/7P/PPP1PP2/RQKNRBBN w EAa - 0 9", - {20, 533, 11829, 336248, 8230417, 245871540}}, // 798 - {"rqknrnbb/pp1ppp1p/2p3p1/8/8/1P2P1NP/P1PP1PP1/RQKNR1BB w EAea - 0 9", - {22, 633, 14480, 441877, 10827868, 343525739}}, // 799 - {"1brkq1rn/2pppppp/1p2n3/p2bN3/8/7P/PPPPPPP1/BBRKQ1RN w GCgc - 2 9", - {27, 748, 20134, 580054, 16010135, 475206624}}, // 800 - {"brkbqnrn/2pp1ppp/8/1p2p3/Pp2N3/8/2PPPPPP/BRKBQNR1 w GBgb - 0 9", - {30, 827, 25308, 757837, 23746165, 751690068}}, // 801 - {"brk1nbrn/pp1ppppp/2p5/7P/5P2/q2P4/PPP1P1P1/BRKQNBRN w GBgb - 1 9", - {15, 471, 8716, 276424, 5960901, 190316951}}, // 802 - {"brkqnrnb/1p1pp1p1/p4p2/2p4p/8/P2PP3/1PP1QPPP/BRK1NRNB w FBfb - 0 9", - {24, 479, 12584, 280081, 7830230, 190419716}}, // 803 - {"rbbkqnrn/2ppp2p/pp3p2/6p1/P6P/8/RPPPPPP1/1BBKQNRN w Gga - 0 9", - {21, 523, 12125, 328733, 8322614, 242240658}}, // 804 - {"rkbbqr1n/1ppppppn/7p/p7/4P3/2P2P2/PP1PB1PP/RKB1QNRN w GAa - 3 9", - {27, 563, 16026, 372148, 11105151, 283211800}}, // 805 - {"rkbqnbrn/ppppp3/8/5ppp/2P3P1/7P/PPQPPP2/RKB1NBRN w GAga - 0 9", - {28, 639, 19250, 469250, 14872172, 384663405}}, // 806 - {"rkb1nrnb/pppp1pp1/5q1p/8/P3p3/4R1P1/1PPPPP1P/1KBQNRNB w Ffa - 0 9", - {28, 873, 23690, 720814, 20209424, 625281937}}, // 807 - {"rbkqb1rn/1p1ppppp/4n3/p1p5/8/3PBP2/PPP1P1PP/RBKQ1NRN w GAga - 0 9", - {26, 798, 21416, 667496, 18475618, 591681956}}, // 808 - {"rk1qbnrn/1p1ppppp/1b6/p1p5/P7/2P3NP/1P1PPPP1/RKQBB1RN w GAga - 0 9", - {22, 506, 12313, 301029, 7891676, 205739580}}, // 809 - {"rk1nbbrn/ppp1ppp1/8/3p3p/1P1P2q1/5PB1/P1P1P1PP/RKQN1BRN w GAga - 1 9", - {31, 956, 29219, 903799, 27827461, 876341492}}, // 810 - {"rkqnbr1b/pp1pppp1/7p/2p2n2/P2P4/7N/RPP1PPPP/1KQNBR1B w Ffa - 0 9", - {31, 750, 24267, 646252, 21639104, 617064197}}, // 811 - {"rbkq1rbn/2p1pppp/pp3n2/3p4/5P2/3N2N1/PPPPP1PP/RBKQR1B1 w Afa - 2 9", - {26, 647, 18027, 465119, 13643783, 369702807}}, // 812 - {"rkqbr1bn/p2ppppp/1pp2n2/8/5P2/3P1N2/PPP1PRPP/RKQB2BN w Aa - 3 9", - {24, 574, 14593, 371597, 10066892, 271121237}}, // 813 - {"rk1qrbbn/p1ppp1pp/1p2n3/5p2/1P6/K3N3/P1PPPPPP/R1Q1RBBN w ea - 0 9", - {25, 548, 14069, 340734, 9043111, 235545764}}, // 814 - {"rkqnrnbb/pp1pp3/2p5/5ppp/8/PP4NP/2PPPPP1/RKQNR1BB w EAea - 0 9", - {23, 727, 18228, 566572, 15078056, 471296844}}, // 815 - {"bbrknq1r/ppppppp1/8/7p/5n2/3P4/PPP1PNPP/BBKRNQR1 w c - 0 9", - {21, 610, 13300, 394705, 9605845, 293532398}}, // 816 - {"brkbnqr1/2pppnpp/pp3p2/8/4PPPP/8/PPPP4/BRKBNQRN w GBgb - 1 9", - {30, 757, 23908, 621332, 20360394, 548380577}}, // 817 - {"brk1qb1n/ppppppr1/2n3pp/8/2P3P1/2N5/PP1PPP1P/BR1KQBRN w b - 1 9", - {26, 570, 15537, 352883, 10081351, 242864559}}, // 818 - {"brknq1nb/pp2prpp/8/2pP1p2/6P1/2N5/PPPP1P1P/BRK1QRNB w FBb - 1 9", - {33, 830, 27897, 764915, 26262884, 765831403}}, // 819 - {"rbbk1qrn/ppp1p1pp/5p2/3p1n2/7N/P7/1PPPPPPP/RBB1KQRN w ga - 0 9", - {21, 562, 13060, 378883, 9520963, 290579255}}, // 820 - {"rk1b1qrn/ppp1pppp/5n2/3pN3/P6P/7b/1PPPPPP1/RKBB1QRN w GAga - 4 9", - {28, 677, 19235, 488740, 14354779, 383207197}}, // 821 - {"rkbnqbrn/pp1ppp1p/2p5/6p1/P7/4P3/KPPPQPPP/R1BN1BRN w - - 3 9", - {28, 585, 17443, 401483, 12574541, 310495538}}, // 822 - {"rk1nqrnb/pbpppp2/1p4p1/7p/P7/5NP1/1PPPPPBP/RKBNQR2 w FAfa - 2 9", - {26, 774, 21626, 645200, 19093408, 576325868}}, // 823 - {"rbknb1rn/p1pp2pp/1p6/4pp2/1q3P1B/2N5/PPPPPNPP/RBK2QR1 w GAga - 2 9", - {31, 1206, 36940, 1374158, 42849564, 1555711209}}, // 824 - {"rk1bbqrn/pp1pp1pp/3n4/5p2/3p4/1PP5/PK2PPPP/R1NBBQRN w ga - 0 9", - {21, 629, 14059, 429667, 10587910, 332632033}}, // 825 - {"rknqbbr1/p1pp1pp1/1p4n1/4p2p/4P1P1/6RB/PPPP1P1P/RKNQB2N w Aga - 0 9", - {27, 753, 20918, 593155, 17318772, 507563675}}, // 826 - {"rknqbr1b/pppp1ppp/4p2n/8/1P3P2/4P3/P1PPN1PP/RKNQBR1B w FAfa - 2 9", - {26, 623, 17177, 460663, 13389799, 383508368}}, // 827 - {"r2kqrbn/bppppppp/2n5/p4B2/5P2/2P5/PP1PP1PP/1RKNQRBN w F - 2 9", - {39, 1026, 37800, 1011922, 35946987, 992756232}}, // 828 - {"rk1bqrb1/ppppppp1/1n6/7p/2P2P1n/4P1Q1/PP1P2PP/RKNB1RBN w FAfa - 0 9", - {35, 760, 25817, 610557, 21014787, 536852043}}, // 829 - {"rkq1rb1n/ppppp1pp/1n6/5p2/PPb2P2/8/1KPPP1PP/R1NQRBBN w ea - 1 9", - {27, 754, 21009, 568788, 16461795, 448313956}}, // 830 - {"rknqr2b/pppnp1pp/3p4/3b1p2/8/1N1P2N1/PPP1PPPP/RKQ1R1BB w EAea - 1 9", - {27, 803, 23708, 700453, 21875031, 654754840}}, // 831 - {"bbrknrqn/ppppp1pB/8/2P2p1p/8/5N2/PP1PPPPP/B1RK1RQN w FCfc - 0 9", - {30, 799, 23923, 671112, 20532790, 603059376}}, // 832 - {"brkbnrq1/1pppp1p1/6np/p4p2/4P3/1PP5/P1KP1PPP/BR1BNRQN w fb - 1 9", - {27, 726, 19329, 555622, 15156662, 457601127}}, // 833 - {"brknrbq1/1p1p1ppp/p3p1n1/2p5/8/1P1BPP2/P1PP2PP/BRKNR1QN w EBeb - 0 9", - {36, 786, 27868, 655019, 22852433, 577223409}}, // 834 - {"brknrqnb/p2ppp1p/2p5/1p6/3P2p1/P1P1N3/1P2PPPP/BRK1RQNB w EBeb - 0 9", - {23, 649, 15169, 440504, 10687843, 320881984}}, // 835 - {"rbbk1rqn/1ppppppp/3n4/p7/2P5/3N4/PP1PPPPP/RBB1KRQN w fa - 1 9", - {20, 478, 11094, 275250, 7094988, 185488058}}, // 836 - {"rkbbnrqn/p2p1ppp/1p2p3/8/P1p1P3/1BP5/1P1P1PPP/RKB1NRQN w FAfa - 0 9", - {22, 570, 13295, 346811, 8671852, 229898448}}, // 837 - {"rkb1rb1n/ppppppqp/8/2n3p1/2P1P1P1/8/PP1P1P1P/RKBNRBQN w EAea - 1 9", - {23, 663, 16212, 490748, 12900485, 404944553}}, // 838 - {"rkb1rqnb/pppp3p/2n3p1/4pp2/P2P3P/2P5/1P2PPP1/RKBNRQNB w EAea - 0 9", - {25, 845, 22188, 741972, 20276176, 683290790}}, // 839 - {"rbk1brqn/ppp1pppp/8/3p4/7P/1P4P1/2PPPP2/RBKNBRQN w FAfa - 0 9", - {24, 526, 13862, 322175, 9054028, 222704171}}, // 840 - {"rknbbrqn/pp3pp1/4p3/2pp3p/2P5/8/PPBPPPPP/RKN1BRQN w FAfa - 0 9", - {26, 756, 19280, 559186, 14697705, 433719427}}, // 841 - {"1knrbbqn/rp1p1ppp/p3p3/2p5/8/5P1P/PPPPP1P1/RKNRBBQN w DAd - 0 9", - {26, 539, 15194, 345070, 10223443, 248715580}}, // 842 - {"rknr1qnb/ppp1p1pp/3p2b1/8/4p3/1P3P1P/P1PP2P1/RKNRBQNB w DAda - 0 9", - {25, 701, 18969, 561369, 16047041, 496340789}}, // 843 - {"rbk1r1bn/ppppp1pp/4n3/5p2/1P3P2/4N2P/PqPPP1P1/RBK1RQBN w EAea - 1 9", - {2, 60, 1319, 41765, 1017864, 33183408}}, // 844 - {"r1nbrqbn/k1ppp1pp/1p6/p4p2/2P5/6PQ/PP1PPP1P/RKNBR1BN w EA - 0 9", - {27, 699, 20436, 561765, 17192121, 499247248}}, // 845 - {"rknrqbbn/1pp1pp2/p5p1/3p3p/6P1/PN5P/1PPPPP2/RK1RQBBN w DAda - 0 9", - {23, 611, 15515, 435927, 11917036, 352885930}}, // 846 - {"rknrqn1b/p1pp1ppb/8/1p2p1Qp/3P4/3N4/PPP1PPPP/RK1R1NBB w DAda - 0 9", - {45, 1170, 48283, 1320341, 52213677, 1500007485}}, // 847 - {"bbkrnrnq/p2p1ppp/2p1p3/1p6/1P2Q3/6P1/P1PPPP1P/BBKRNRN1 w - - 0 9", - {41, 1035, 39895, 1035610, 38555608, 1037686769}}, // 848 - {"brkbnr2/1ppppp1p/7n/p5N1/P2q4/8/1PPPPPPP/BRKBNRQ1 w FBfb - 1 9", - {22, 869, 19234, 679754, 16453359, 567287944}}, // 849 - {"brknrbnq/p1ppppp1/1p6/7p/2PP4/5P2/PPK1P1PP/BR1NRBNQ w eb - 1 9", - {23, 641, 14748, 422240, 10192718, 302864305}}, // 850 - {"brk1r1qb/pp1ppnpp/2p2pn1/8/6N1/2N3P1/PPPPPP1P/BRK1R1QB w EBeb - 3 9", - {32, 863, 28379, 773191, 25848794, 720443112}}, // 851 - {"rbbk1rnq/pppp1pp1/4p2p/8/3P2n1/4BN1P/PPP1PPP1/RB1K1RNQ w FAfa - 3 9", - {26, 628, 16151, 411995, 11237919, 300314373}}, // 852 - {"rkbbnr1q/p1pppppp/5n2/1p5B/PP6/4P3/2PP1PPP/RKB1NRNQ w FAfa - 0 9", - {30, 692, 21036, 519283, 16025428, 420887328}}, // 853 - {"rkb1rbnq/1pppp1pp/5p2/p7/5n1P/1PN3P1/P1PPPP2/RKB1RBNQ w EAea - 0 9", - {32, 825, 27130, 697251, 23593363, 622249676}}, // 854 - {"rkbnrnqb/1ppp1p1p/p5p1/4p3/4P3/2N2P2/PPPP2PP/RKBR1NQB w Aea - 0 9", - {24, 487, 13300, 301989, 8782713, 215787079}}, // 855 - {"rbknbr1q/pppp2pp/4p3/5p1n/1P2P2N/8/P1PP1PPP/RBKNBR1Q w FAfa - 0 9", - {23, 571, 13799, 365272, 9224232, 257288920}}, // 856 - {"rknbb1nq/pppppr2/5pp1/7p/8/1N4P1/PPPPPP1P/RK1BBRNQ w FAa - 2 9", - {26, 548, 15618, 350173, 10587626, 253006082}}, // 857 + // {"1rknrbbn/p1pp1p1p/8/1p2p1p1/4qPP1/2P5/PP1PP1BP/QRKNR1BN w EBeb - 0 9", + // {28, 1309, 36355, 1568968, 44576409, 1846382333}}, // 782 + // {"qrk1rn1b/ppppp2p/4n3/3b1pp1/4P2P/5BP1/PPPP1P2/QRKNRNB1 w EBeb - 3 9", + // {26, 839, 22189, 726354, 19978260, 661207281}}, // 783 + // {"bbrqk1rn/pp1ppppp/8/2p5/2P1P3/5n1P/PPBP1PP1/B1RQKNRN w GCgc - 1 9", {3, + // 95, 2690, 85038, 2518864, 80775549}}, // 784 + // {"brqbk2n/pppppprp/8/6p1/1P3n2/5P2/P1PPP1PP/R1QBKNRN w Gb - 2 9", {22, + // 593, 13255, 362760, 8922397, 253271592}}, // 785 + // {"brqknbr1/pp3ppp/3p2n1/2p1p3/2P5/5P2/PPKPP1PP/BRQ1NBRN w gb - 0 9", {21, + // 590, 13190, 397355, 9581695, 304103516}}, // 786 + // {"1rqknrnb/2pp1ppp/p3p3/1p6/P2P4/5bP1/1PP1PP1P/BRQKNRNB w FBfb - 0 9", + // {24, 737, 20052, 598439, 17948681, 536330341}}, // 787 + // {"rbb1k1rn/p1pqpppp/6n1/1p1p4/5P2/3PP3/PPP1K1PP/RBBQ1NRN w ga - 3 9", + // {24, 694, 16773, 513782, 13094823, 419402704}}, // 788 + // {"rqbbknr1/1ppp2pp/p5n1/4pp2/P7/1PP5/1Q1PPPPP/R1BBKNRN w GAga - 0 9", + // {24, 600, 15347, 408207, 11029596, 308553169}}, // 789 + // {"rqbknbrn/2pppppp/6Q1/pp6/8/2P5/PP1PPPPP/R1BKNBRN w GAga - 2 9", {40, + // 949, 34100, 889887, 31296485, 881529007}}, // 790 + // {"rqbknr1b/pp1ppp2/2p2n1p/6p1/8/3P1PPP/PPP1P3/RQBKNRNB w FAfa - 0 9", + // {20, 560, 12275, 373921, 8687544, 277906201}}, // 791 + // {"rbqkbnrn/p3pppp/1p6/3p4/P1p3P1/1P6/1QPPPP1P/RB1KBNRN w GAga - 0 9", + // {30, 1155, 35865, 1351455, 43092716, 1614019629}}, // 792 + // {"rqkbb1rn/p1p1pppn/1p1p4/7p/4PP2/7P/PPPPB1P1/RQK1BNRN w GAga - 1 9", + // {30, 701, 20804, 515942, 15450970, 401499189}}, // 793 + // {"rqknbbrn/1p2pp1p/3p2p1/p1p5/P2P4/1P6/1KP1PPPP/RQ1NBBRN w ga - 0 9", + // {28, 756, 21655, 610320, 17989811, 525585996}}, // 794 + // {"rqknbrnb/1pp3pp/5p2/p2pp3/P7/3PPN2/1PP2PPP/RQKNBR1B w FAfa - 0 9", {26, + // 731, 19509, 550395, 15209404, 439767476}}, // 795 + // {"rbqkr1bn/p1pppp1p/1p1n4/6p1/7P/3P1PP1/PPP1P3/RBQKNRBN w FAa - 0 9", + // {27, 586, 16282, 381604, 10905865, 274364342}}, // 796 + // {"rqk1nrb1/ppbp1ppp/4p1n1/2p5/7P/1PP5/P2PPPP1/RQKBNRBN w FAfa - 1 9", + // {27, 749, 21480, 602318, 18084787, 520547029}}, // 797 + // {"rqknrbbn/pp1p1ppp/4p3/2p5/3P2P1/7P/PPP1PP2/RQKNRBBN w EAa - 0 9", {20, + // 533, 11829, 336248, 8230417, 245871540}}, // 798 + // {"rqknrnbb/pp1ppp1p/2p3p1/8/8/1P2P1NP/P1PP1PP1/RQKNR1BB w EAea - 0 9", + // {22, 633, 14480, 441877, 10827868, 343525739}}, // 799 + // {"1brkq1rn/2pppppp/1p2n3/p2bN3/8/7P/PPPPPPP1/BBRKQ1RN w GCgc - 2 9", {27, + // 748, 20134, 580054, 16010135, 475206624}}, // 800 + // {"brkbqnrn/2pp1ppp/8/1p2p3/Pp2N3/8/2PPPPPP/BRKBQNR1 w GBgb - 0 9", {30, + // 827, 25308, 757837, 23746165, 751690068}}, // 801 + // {"brk1nbrn/pp1ppppp/2p5/7P/5P2/q2P4/PPP1P1P1/BRKQNBRN w GBgb - 1 9", {15, + // 471, 8716, 276424, 5960901, 190316951}}, // 802 + // {"brkqnrnb/1p1pp1p1/p4p2/2p4p/8/P2PP3/1PP1QPPP/BRK1NRNB w FBfb - 0 9", + // {24, 479, 12584, 280081, 7830230, 190419716}}, // 803 + // {"rbbkqnrn/2ppp2p/pp3p2/6p1/P6P/8/RPPPPPP1/1BBKQNRN w Gga - 0 9", {21, + // 523, 12125, 328733, 8322614, 242240658}}, // 804 + // {"rkbbqr1n/1ppppppn/7p/p7/4P3/2P2P2/PP1PB1PP/RKB1QNRN w GAa - 3 9", {27, + // 563, 16026, 372148, 11105151, 283211800}}, // 805 + // {"rkbqnbrn/ppppp3/8/5ppp/2P3P1/7P/PPQPPP2/RKB1NBRN w GAga - 0 9", {28, + // 639, 19250, 469250, 14872172, 384663405}}, // 806 + // {"rkb1nrnb/pppp1pp1/5q1p/8/P3p3/4R1P1/1PPPPP1P/1KBQNRNB w Ffa - 0 9", + // {28, 873, 23690, 720814, 20209424, 625281937}}, // 807 + // {"rbkqb1rn/1p1ppppp/4n3/p1p5/8/3PBP2/PPP1P1PP/RBKQ1NRN w GAga - 0 9", + // {26, 798, 21416, 667496, 18475618, 591681956}}, // 808 + // {"rk1qbnrn/1p1ppppp/1b6/p1p5/P7/2P3NP/1P1PPPP1/RKQBB1RN w GAga - 0 9", + // {22, 506, 12313, 301029, 7891676, 205739580}}, // 809 + // {"rk1nbbrn/ppp1ppp1/8/3p3p/1P1P2q1/5PB1/P1P1P1PP/RKQN1BRN w GAga - 1 9", + // {31, 956, 29219, 903799, 27827461, 876341492}}, // 810 + // {"rkqnbr1b/pp1pppp1/7p/2p2n2/P2P4/7N/RPP1PPPP/1KQNBR1B w Ffa - 0 9", {31, + // 750, 24267, 646252, 21639104, 617064197}}, // 811 + // {"rbkq1rbn/2p1pppp/pp3n2/3p4/5P2/3N2N1/PPPPP1PP/RBKQR1B1 w Afa - 2 9", + // {26, 647, 18027, 465119, 13643783, 369702807}}, // 812 + // {"rkqbr1bn/p2ppppp/1pp2n2/8/5P2/3P1N2/PPP1PRPP/RKQB2BN w Aa - 3 9", {24, + // 574, 14593, 371597, 10066892, 271121237}}, // 813 + // {"rk1qrbbn/p1ppp1pp/1p2n3/5p2/1P6/K3N3/P1PPPPPP/R1Q1RBBN w ea - 0 9", + // {25, 548, 14069, 340734, 9043111, 235545764}}, // 814 + // {"rkqnrnbb/pp1pp3/2p5/5ppp/8/PP4NP/2PPPPP1/RKQNR1BB w EAea - 0 9", {23, + // 727, 18228, 566572, 15078056, 471296844}}, // 815 + // {"bbrknq1r/ppppppp1/8/7p/5n2/3P4/PPP1PNPP/BBKRNQR1 w c - 0 9", {21, 610, + // 13300, 394705, 9605845, 293532398}}, // 816 + // {"brkbnqr1/2pppnpp/pp3p2/8/4PPPP/8/PPPP4/BRKBNQRN w GBgb - 1 9", {30, + // 757, 23908, 621332, 20360394, 548380577}}, // 817 + // {"brk1qb1n/ppppppr1/2n3pp/8/2P3P1/2N5/PP1PPP1P/BR1KQBRN w b - 1 9", {26, + // 570, 15537, 352883, 10081351, 242864559}}, // 818 + // {"brknq1nb/pp2prpp/8/2pP1p2/6P1/2N5/PPPP1P1P/BRK1QRNB w FBb - 1 9", {33, + // 830, 27897, 764915, 26262884, 765831403}}, // 819 + // {"rbbk1qrn/ppp1p1pp/5p2/3p1n2/7N/P7/1PPPPPPP/RBB1KQRN w ga - 0 9", {21, + // 562, 13060, 378883, 9520963, 290579255}}, // 820 + // {"rk1b1qrn/ppp1pppp/5n2/3pN3/P6P/7b/1PPPPPP1/RKBB1QRN w GAga - 4 9", {28, + // 677, 19235, 488740, 14354779, 383207197}}, // 821 + // {"rkbnqbrn/pp1ppp1p/2p5/6p1/P7/4P3/KPPPQPPP/R1BN1BRN w - - 3 9", {28, + // 585, 17443, 401483, 12574541, 310495538}}, // 822 + // {"rk1nqrnb/pbpppp2/1p4p1/7p/P7/5NP1/1PPPPPBP/RKBNQR2 w FAfa - 2 9", {26, + // 774, 21626, 645200, 19093408, 576325868}}, // 823 + // {"rbknb1rn/p1pp2pp/1p6/4pp2/1q3P1B/2N5/PPPPPNPP/RBK2QR1 w GAga - 2 9", + // {31, 1206, 36940, 1374158, 42849564, 1555711209}}, // 824 + // {"rk1bbqrn/pp1pp1pp/3n4/5p2/3p4/1PP5/PK2PPPP/R1NBBQRN w ga - 0 9", {21, + // 629, 14059, 429667, 10587910, 332632033}}, // 825 + // {"rknqbbr1/p1pp1pp1/1p4n1/4p2p/4P1P1/6RB/PPPP1P1P/RKNQB2N w Aga - 0 9", + // {27, 753, 20918, 593155, 17318772, 507563675}}, // 826 + // {"rknqbr1b/pppp1ppp/4p2n/8/1P3P2/4P3/P1PPN1PP/RKNQBR1B w FAfa - 2 9", + // {26, 623, 17177, 460663, 13389799, 383508368}}, // 827 + // {"r2kqrbn/bppppppp/2n5/p4B2/5P2/2P5/PP1PP1PP/1RKNQRBN w F - 2 9", {39, + // 1026, 37800, 1011922, 35946987, 992756232}}, // 828 + // {"rk1bqrb1/ppppppp1/1n6/7p/2P2P1n/4P1Q1/PP1P2PP/RKNB1RBN w FAfa - 0 9", + // {35, 760, 25817, 610557, 21014787, 536852043}}, // 829 + // {"rkq1rb1n/ppppp1pp/1n6/5p2/PPb2P2/8/1KPPP1PP/R1NQRBBN w ea - 1 9", {27, + // 754, 21009, 568788, 16461795, 448313956}}, // 830 + // {"rknqr2b/pppnp1pp/3p4/3b1p2/8/1N1P2N1/PPP1PPPP/RKQ1R1BB w EAea - 1 9", + // {27, 803, 23708, 700453, 21875031, 654754840}}, // 831 + // {"bbrknrqn/ppppp1pB/8/2P2p1p/8/5N2/PP1PPPPP/B1RK1RQN w FCfc - 0 9", {30, + // 799, 23923, 671112, 20532790, 603059376}}, // 832 + // {"brkbnrq1/1pppp1p1/6np/p4p2/4P3/1PP5/P1KP1PPP/BR1BNRQN w fb - 1 9", {27, + // 726, 19329, 555622, 15156662, 457601127}}, // 833 + // {"brknrbq1/1p1p1ppp/p3p1n1/2p5/8/1P1BPP2/P1PP2PP/BRKNR1QN w EBeb - 0 9", + // {36, 786, 27868, 655019, 22852433, 577223409}}, // 834 + // {"brknrqnb/p2ppp1p/2p5/1p6/3P2p1/P1P1N3/1P2PPPP/BRK1RQNB w EBeb - 0 9", + // {23, 649, 15169, 440504, 10687843, 320881984}}, // 835 + // {"rbbk1rqn/1ppppppp/3n4/p7/2P5/3N4/PP1PPPPP/RBB1KRQN w fa - 1 9", {20, + // 478, 11094, 275250, 7094988, 185488058}}, // 836 + // {"rkbbnrqn/p2p1ppp/1p2p3/8/P1p1P3/1BP5/1P1P1PPP/RKB1NRQN w FAfa - 0 9", + // {22, 570, 13295, 346811, 8671852, 229898448}}, // 837 + // {"rkb1rb1n/ppppppqp/8/2n3p1/2P1P1P1/8/PP1P1P1P/RKBNRBQN w EAea - 1 9", + // {23, 663, 16212, 490748, 12900485, 404944553}}, // 838 + // {"rkb1rqnb/pppp3p/2n3p1/4pp2/P2P3P/2P5/1P2PPP1/RKBNRQNB w EAea - 0 9", + // {25, 845, 22188, 741972, 20276176, 683290790}}, // 839 + // {"rbk1brqn/ppp1pppp/8/3p4/7P/1P4P1/2PPPP2/RBKNBRQN w FAfa - 0 9", {24, + // 526, 13862, 322175, 9054028, 222704171}}, // 840 + // {"rknbbrqn/pp3pp1/4p3/2pp3p/2P5/8/PPBPPPPP/RKN1BRQN w FAfa - 0 9", {26, + // 756, 19280, 559186, 14697705, 433719427}}, // 841 + // {"1knrbbqn/rp1p1ppp/p3p3/2p5/8/5P1P/PPPPP1P1/RKNRBBQN w DAd - 0 9", {26, + // 539, 15194, 345070, 10223443, 248715580}}, // 842 + // {"rknr1qnb/ppp1p1pp/3p2b1/8/4p3/1P3P1P/P1PP2P1/RKNRBQNB w DAda - 0 9", + // {25, 701, 18969, 561369, 16047041, 496340789}}, // 843 + // {"rbk1r1bn/ppppp1pp/4n3/5p2/1P3P2/4N2P/PqPPP1P1/RBK1RQBN w EAea - 1 9", + // {2, 60, 1319, 41765, 1017864, 33183408}}, // 844 + // {"r1nbrqbn/k1ppp1pp/1p6/p4p2/2P5/6PQ/PP1PPP1P/RKNBR1BN w EA - 0 9", {27, + // 699, 20436, 561765, 17192121, 499247248}}, // 845 + // {"rknrqbbn/1pp1pp2/p5p1/3p3p/6P1/PN5P/1PPPPP2/RK1RQBBN w DAda - 0 9", + // {23, 611, 15515, 435927, 11917036, 352885930}}, // 846 + // {"rknrqn1b/p1pp1ppb/8/1p2p1Qp/3P4/3N4/PPP1PPPP/RK1R1NBB w DAda - 0 9", + // {45, 1170, 48283, 1320341, 52213677, 1500007485}}, // 847 + // {"bbkrnrnq/p2p1ppp/2p1p3/1p6/1P2Q3/6P1/P1PPPP1P/BBKRNRN1 w - - 0 9", {41, + // 1035, 39895, 1035610, 38555608, 1037686769}}, // 848 + // {"brkbnr2/1ppppp1p/7n/p5N1/P2q4/8/1PPPPPPP/BRKBNRQ1 w FBfb - 1 9", {22, + // 869, 19234, 679754, 16453359, 567287944}}, // 849 + // {"brknrbnq/p1ppppp1/1p6/7p/2PP4/5P2/PPK1P1PP/BR1NRBNQ w eb - 1 9", {23, + // 641, 14748, 422240, 10192718, 302864305}}, // 850 + // {"brk1r1qb/pp1ppnpp/2p2pn1/8/6N1/2N3P1/PPPPPP1P/BRK1R1QB w EBeb - 3 9", + // {32, 863, 28379, 773191, 25848794, 720443112}}, // 851 + // {"rbbk1rnq/pppp1pp1/4p2p/8/3P2n1/4BN1P/PPP1PPP1/RB1K1RNQ w FAfa - 3 9", + // {26, 628, 16151, 411995, 11237919, 300314373}}, // 852 + // {"rkbbnr1q/p1pppppp/5n2/1p5B/PP6/4P3/2PP1PPP/RKB1NRNQ w FAfa - 0 9", {30, + // 692, 21036, 519283, 16025428, 420887328}}, // 853 + // {"rkb1rbnq/1pppp1pp/5p2/p7/5n1P/1PN3P1/P1PPPP2/RKB1RBNQ w EAea - 0 9", + // {32, 825, 27130, 697251, 23593363, 622249676}}, // 854 + // {"rkbnrnqb/1ppp1p1p/p5p1/4p3/4P3/2N2P2/PPPP2PP/RKBR1NQB w Aea - 0 9", + // {24, 487, 13300, 301989, 8782713, 215787079}}, // 855 + // {"rbknbr1q/pppp2pp/4p3/5p1n/1P2P2N/8/P1PP1PPP/RBKNBR1Q w FAfa - 0 9", + // {23, 571, 13799, 365272, 9224232, 257288920}}, // 856 + // {"rknbb1nq/pppppr2/5pp1/7p/8/1N4P1/PPPPPP1P/RK1BBRNQ w FAa - 2 9", {26, + // 548, 15618, 350173, 10587626, 253006082}}, // 857 {"rknr1bnq/p2pp1pp/1p3p2/2p4b/6PP/2P2N2/PP1PPP2/RKNRBB1Q w DAda - 1 9", {25, 502, 13150, 279098, 7824941, 175766730}}, // 858 - {"rknrb1qb/ppp1pppp/3p4/8/4P1nP/2P5/PPKP1PP1/R1NRBNQB w da - 1 9", - {23, 643, 14849, 426616, 10507328, 312096061}}, // 859 - {"rbk1rnbq/pppp1npp/4p3/5p2/4P1P1/7P/PPPP1P1N/RBKNR1BQ w EAea - 1 9", - {24, 591, 15178, 376988, 10251465, 263574861}}, // 860 - {"rknbrnb1/p1pppp1p/1p6/3N2p1/P3q1P1/8/1PPPPP1P/RKNBR1BQ w EAea - 1 9", - {28, 948, 27343, 864588, 26241141, 812343987}}, // 861 - {"rknrn1b1/ppppppqp/8/6p1/2P5/2P1BP2/PP2P1PP/RKNRNB1Q w DAda - 1 9", - {31, 807, 24360, 672973, 20455205, 588518645}}, // 862 - {"1k1rnqbb/npppppp1/r7/p2B3p/5P2/1N4P1/PPPPP2P/RK1RNQB1 w DAd - 0 9", - {40, 1122, 44297, 1249989, 48711073, 1412437357}}, // 863 - {"bbqr1rkn/pp1ppppp/8/2p5/1P2P1n1/7N/P1PP1P1P/BBQRKR1N w FD - 0 9", - {26, 841, 22986, 746711, 21328001, 705170410}}, // 864 - {"bqkr1rnn/1ppp1ppp/p4b2/4p3/P7/3PP2N/1PP2PPP/BQRBKR1N w FC - 3 9", - {24, 500, 12802, 293824, 7928916, 197806842}}, // 865 - {"bqrkrbnn/1pp1ppp1/8/p6p/3p4/P3P2P/QPPP1PP1/B1RKRBNN w ECec - 0 9", - {31, 592, 18585, 396423, 12607528, 298629240}}, // 866 - {"bqkrrnnb/2p1pppp/p7/1P1p4/8/2R3P1/PP1PPP1P/BQ1KRNNB w E - 0 9", - {42, 1124, 45187, 1276664, 50052573, 1483524894}}, // 867 - {"qbbrkrn1/p1pppn1p/8/1p3Pp1/2P5/8/PP1PPP1P/QBBRKRNN w FDfd - 0 9", - {21, 577, 13244, 392131, 9683808, 300294295}}, // 868 - {"qrbbkrnn/pp1p2pp/4p3/5p2/2p2P1P/2P5/PP1PP1P1/QRBBKRNN w FBfb - 0 9", - {21, 571, 12736, 345681, 8239872, 228837930}}, // 869 - {"qrbkrbn1/1pp1pppp/p2p4/8/5PPn/2P5/PP1PP3/QRBKRBNN w EBeb - 0 9", - {18, 466, 9443, 257776, 5679073, 162883949}}, // 870 - {"qrb1rnnb/pp1p1ppp/2pk4/4p3/1P2P3/1R6/P1PP1PPP/Q1BKRNNB w E - 4 9", - {37, 760, 26863, 562201, 19486022, 421740856}}, // 871 - {"qbrkbrn1/p1pppp1p/6n1/1p4p1/1P6/5P2/P1PPPBPP/QBRK1RNN w FCfc - 1 9", - {33, 824, 27385, 750924, 25176664, 734656217}}, // 872 - {"qrkbbr2/2pppppp/5nn1/pp1Q4/P7/3P4/1PP1PPPP/1RKBBRNN w FBfb - 0 9", - {42, 1147, 44012, 1311247, 48216013, 1522548864}}, // 873 - {"qrkrbbnn/pp2pp2/2pp2pp/1B6/P7/4P3/1PPP1PPP/QRKRB1NN w DBdb - 0 9", - {26, 464, 12653, 242892, 6928220, 142507795}}, // 874 - {"qrkrbnnb/p1pp1pp1/1p5p/4p3/1P6/6PN/PKPPPP1P/QR1RBN1B w db - 0 9", - {29, 705, 20000, 529810, 15055365, 419552571}}, // 875 - {"qbrkr1bn/p1p1pp1p/1p1p2n1/6p1/3P1P2/4P3/PPP3PP/QBKRRNBN w ec - 2 9", - {23, 613, 14835, 426484, 10747407, 323905533}}, // 876 - {"qrk1rnb1/p1pp1ppp/1p2Bbn1/8/4P3/6P1/PPPP1P1P/QRK1RNBN w EBeb - 1 9", - {28, 927, 24887, 846839, 23063284, 807913585}}, // 877 - {"1qkrnbbn/1rpppppp/pp6/5N2/P4P2/8/1PPPP1PP/QRKRNBB1 w DBd - 3 9", - {30, 542, 16646, 345172, 10976745, 251694423}}, // 878 - {"qrkr2bb/pppppppp/8/1n2n3/1N5P/1P6/P1PPPPP1/QRKR1NBB w DBdb - 1 9", - {28, 719, 21048, 562015, 17351761, 479400272}}, // 879 - {"bbrqkrnn/3ppppp/8/ppp5/6P1/4P2N/PPPPKP1P/BBRQ1R1N w fc - 0 9", - {21, 704, 16119, 546215, 13676371, 470796854}}, // 880 - {"brqbkrnn/1pp2p1p/3pp1p1/p5N1/8/1P6/P1PPPPPP/BRQBK1RN w Bfb - 0 9", - {34, 688, 22827, 505618, 16639723, 402140795}}, // 881 - {"br1krb1n/2qppppp/pp3n2/8/1P4P1/8/P1PPPP1P/1RQKRBNN w EBeb - 0 9", - {24, 945, 23943, 926427, 25019636, 959651619}}, // 882 - {"brqkr1nb/2ppp1pp/1p2np2/p7/2P1PN2/8/PP1P1PPP/BRQKRN1B w EBeb - 0 9", - {28, 675, 19728, 504128, 15516491, 417396563}}, // 883 - {"rbbqkrnn/3pppp1/p7/1pp4p/2P1P2P/8/PP1P1PP1/RBBQKRNN w FAfa - 0 9", - {26, 671, 18164, 496806, 14072641, 404960259}}, // 884 - {"rqbbkr1n/pp1p1p1p/4pn2/2p3p1/4P1P1/3P3P/PPP2P2/RQBBKRNN w FAfa - 0 9", - {22, 633, 14629, 441809, 10776416, 335689685}}, // 885 - {"rqbkrbnn/p1ppp3/1p3pp1/7p/3P4/P1P5/1PQ1PPPP/R1BKRBNN w EAea - 0 9", - {32, 607, 20339, 454319, 15586203, 383515709}}, // 886 - {"rqbkrnn1/pp2ppbp/3p4/2p3p1/2P5/1P3N1P/P2PPPP1/RQBKRN1B w EAea - 1 9", - {29, 943, 28732, 908740, 28761841, 907579129}}, // 887 - {"rbqkb1nn/1ppppr1p/p5p1/5p2/1P6/2P4P/P1KPPPP1/RBQ1BRNN w a - 1 9", - {22, 441, 10403, 231273, 5784206, 140934555}}, // 888 - {"rqkb1rnn/1pp1pp1p/p5p1/1b1p4/3P4/P5P1/RPP1PP1P/1QKBBRNN w Ffa - 1 9", - {21, 505, 11592, 290897, 7147063, 188559137}}, // 889 - {"rq1rbbnn/pkp1ppp1/3p3p/1p2N1P1/8/8/PPPPPP1P/RQKRBB1N w DA - 0 9", - {27, 608, 16419, 387751, 10808908, 268393274}}, // 890 - {"rqkrb2b/p2ppppp/2p3nn/1p6/5P2/PP1P4/2P1P1PP/RQKRBNNB w DAda - 1 9", - {30, 749, 21563, 581531, 16916813, 485406712}}, // 891 - {"rbqkr1bn/pp1ppp2/2p1n2p/6p1/8/4BPNP/PPPPP1P1/RBQKRN2 w EAea - 0 9", - {23, 600, 15082, 410057, 11041820, 314327867}}, // 892 - {"rqkbrnb1/2ppp1pp/pp3pn1/8/5P2/B2P4/PPP1P1PP/RQKBRN1N w EAea - 2 9", - {22, 569, 13541, 371471, 9395816, 269460607}}, // 893 - {"rqkrnbb1/p1p1pppp/1p4n1/3p4/7P/P3P3/1PPPBPP1/RQKRN1BN w DAda - 0 9", - {27, 579, 15565, 373079, 10238486, 266047417}}, // 894 - {"rqkrn1bb/p1ppp1pp/4n3/1p6/6p1/4N3/PPPPPPPP/RQKR2BB w DAda - 0 9", - {20, 462, 10234, 274162, 6563859, 193376359}}, // 895 - {"bbrkqr2/pppp1ppp/6nn/8/2P1p3/3PP2N/PP3PPP/BBRKQR1N w FCfc - 0 9", - {28, 724, 21688, 619064, 19318355, 593204629}}, // 896 - {"brk1qrnn/1pppbppp/4p3/8/1p6/P1P4P/3PPPP1/BRKBQRNN w FBfb - 1 9", - {24, 662, 16920, 468215, 12610387, 355969349}}, // 897 - {"1r1qrbnn/p1pkpppp/1p1p4/8/3P1PP1/P4b2/1PP1P2P/BRKQRBNN w EB - 1 9", - {22, 696, 17021, 510247, 13697382, 401903030}}, // 898 - {"1rkqrnnb/p1p1p1pp/1p1p4/3b1p1N/4P3/5N2/PPPP1PPP/BRKQR2B w EBeb - 1 9", - {29, 887, 27035, 816176, 26051242, 791718847}}, // 899 - {"rbbkq1rn/pppppppp/7n/8/P7/3P3P/1PPKPPP1/RBB1QRNN w a - 3 9", - {22, 417, 9900, 216855, 5505063, 134818483}}, // 900 - {"rkbbqr1n/1p1pppp1/2p2n2/p4NBp/8/3P4/PPP1PPPP/RK1BQRN1 w FAfa - 0 9", - {37, 832, 30533, 728154, 26676373, 673756141}}, // 901 - {"rkbqrb1n/3pBppp/ppp2n2/8/8/P2P4/1PP1PPPP/RK1QRBNN w EAea - 0 9", - {28, 685, 19718, 543069, 16033316, 482288814}}, // 902 - {"rkb1rn1b/ppppqppp/4p3/8/1P2n1P1/5Q2/P1PP1P1P/RKB1RNNB w EAea - 2 9", - {37, 1158, 40114, 1234768, 44672979, 1389312729}}, // 903 - {"r1kqbrnn/pp1pp1p1/7p/2P2p2/5b2/3P4/P1P1P1PP/RBKQBRNN w FAfa - 0 9", - {5, 161, 4745, 154885, 4734999, 157499039}}, // 904 - {"rkqbbr1n/ppp1ppp1/8/Q2p3p/4n3/3P1P2/PPP1P1PP/RK1BBRNN w FAfa - 2 9", - {38, 1144, 40433, 1236877, 43832975, 1366087771}}, // 905 - {"rkqrbbn1/p1ppppp1/Bp5p/8/P6n/2P1P3/1P1P1PPP/RKQRB1NN w DAda - 0 9", - {28, 551, 15488, 350861, 9944107, 251179183}}, // 906 - {"rkqrb1nb/1ppp1ppp/p7/4p3/5n2/3P2N1/PPPQPPPP/RK1RB1NB w DAda - 0 9", - {26, 690, 19877, 513628, 15965907, 418191735}}, // 907 - {"rbkqrnbn/pppp1p2/4p1p1/7p/7P/P2P4/BPP1PPP1/R1KQRNBN w EAea - 0 9", - {27, 515, 13992, 309727, 8792550, 218658292}}, // 908 - {"rkqbrnbn/pp1ppp2/8/2p3p1/P1P4p/5P2/1PKPP1PP/R1QBRNBN w ea - 0 9", - {27, 627, 16843, 431101, 11978698, 328434174}}, // 909 - {"rkqrnbbn/1p2pp1p/3p2p1/p1p5/P5PP/3N4/1PPPPP2/RKQR1BBN w DAda - 0 9", - {23, 624, 15512, 451860, 11960861, 367311176}}, // 910 - {"rk2rnbb/ppqppppp/2pn4/8/1P3P2/6P1/P1PPP1NP/RKQR1NBB w DAa - 1 9", - {27, 727, 20206, 581003, 16633696, 505212747}}, // 911 - {"b1krrqnn/pp1ppp1p/2p3p1/8/P3Pb1P/1P6/2PP1PP1/BBRKRQNN w EC - 0 9", - {32, 943, 30759, 865229, 28672582, 800922511}}, // 912 - {"1rkbrqnn/p1pp1ppp/1p6/8/P2Pp3/8/1PPKPPQP/BR1BR1NN w eb - 0 9", - {28, 916, 24892, 817624, 22840279, 759318058}}, // 913 - {"brkrqb1n/1pppp1pp/p7/3n1p2/P5P1/3PP3/1PP2P1P/BRKRQBNN w DBdb - 0 9", - {27, 669, 18682, 484259, 13956472, 380267099}}, // 914 - {"brkrqnnb/3pppp1/1p6/p1p4p/2P3P1/6N1/PP1PPP1P/BRKRQ1NB w DBdb - 0 9", - {29, 699, 20042, 512639, 15093909, 406594531}}, // 915 - {"r1bkrq1n/pp2pppp/3b1n2/2pp2B1/6P1/3P1P2/PPP1P2P/RB1KRQNN w EAea - 2 9", - {27, 835, 22848, 713550, 19867800, 631209313}}, // 916 - {"rk1brq1n/p1p1pppp/3p1n2/1p3b2/4P3/2NQ4/PPPP1PPP/RKBBR2N w EAea - 4 9", - {36, 1004, 35774, 979608, 35143142, 966310885}}, // 917 - {"rkbrqbnn/1p2ppp1/B1p5/p2p3p/4P2P/8/PPPP1PP1/RKBRQ1NN w DAda - 0 9", - {27, 748, 21005, 597819, 17597073, 515304215}}, // 918 - {"rkbrqn1b/pp1pp1pp/2p2p2/5n2/8/2P2P2/PP1PP1PP/RKBRQ1NB w DAda - 0 9", - {20, 479, 10485, 266446, 6253775, 167767913}}, // 919 - {"rbkrbnn1/ppppp1pp/5q2/5p2/5P2/P3P2N/1PPP2PP/RBKRBQ1N w DAda - 3 9", - {28, 947, 26900, 876068, 26007841, 838704143}}, // 920 - {"rkr1bqnn/1ppp1p1p/p5p1/4p3/3PP2b/2P2P2/PP4PP/RKRBBQNN w CAca - 0 9", - {31, 1004, 32006, 1006830, 32688124, 1024529879}}, // 921 - {"rkrqbbnn/pppp3p/8/4ppp1/1PP4P/8/P2PPPP1/RKRQBBNN w CAca - 0 9", - {24, 717, 18834, 564137, 15844525, 484884485}}, // 922 - {"rkrqbn1b/pppp2pp/8/4pp2/1P1P2n1/5N2/P1P1PP1P/RKRQBN1B w CAca - 0 9", - {25, 718, 19654, 587666, 17257753, 537354146}}, // 923 - {"rbkrqnbn/p1p1ppp1/1p1p4/8/3PP2p/2PB4/PP3PPP/R1KRQNBN w DAda - 0 9", - {30, 754, 23298, 611322, 19338246, 532603566}}, // 924 - {"1krbqnbn/1p2pppp/r1pp4/p7/8/1P1P2PP/P1P1PP2/RKRBQNBN w CAc - 0 9", - {21, 566, 13519, 375128, 9700847, 279864836}}, // 925 - {"rkrq1b2/pppppppb/3n2np/2N5/4P3/7P/PPPP1PP1/RKRQ1BBN w CAca - 1 9", - {33, 654, 21708, 479678, 15990307, 382218272}}, // 926 - {"rkr1nnbb/ppp2p1p/3p1qp1/4p3/P5P1/3PN3/1PP1PP1P/RKRQN1BB w CAca - 1 9", - {28, 715, 20361, 555328, 16303092, 468666425}}, // 927 - {"bbrkrnqn/1p1ppppp/8/8/p2pP3/PP6/2P2PPP/BBRKRNQN w ECec - 0 9", - {24, 757, 19067, 603231, 15957628, 509307623}}, // 928 - {"brkbrnqn/ppp2p2/4p3/P2p2pp/6P1/5P2/1PPPP2P/BRKBRNQN w EBeb - 0 9", - {25, 548, 14563, 348259, 9688526, 247750144}}, // 929 - {"brkr1bqn/1pppppp1/3n3p/1p6/P7/4P1P1/1PPP1P1P/BRKRN1QN w DBdb - 0 9", - {19, 359, 7430, 157099, 3521652, 81787718}}, // 930 - {"brkr1qnb/pppp2pp/2B1p3/5p2/2n5/6PP/PPPPPPN1/BRKR1QN1 w DBdb - 1 9", - {27, 854, 23303, 741626, 20558538, 667089231}}, // 931 - {"rbbkrnqn/p1p1p1pp/8/1p1p4/1P1Pp3/6N1/P1P2PPP/RBBKRNQ1 w EAea - 0 9", - {28, 723, 19844, 514440, 14621108, 397454100}}, // 932 - {"rkbbrn1n/pppppp2/5q1p/6p1/3P3P/4P3/PPP2PP1/RKBBRNQN w EAea - 1 9", - {25, 741, 19224, 585198, 15605840, 485037906}}, // 933 - {"rkbr1bq1/ppnppppp/6n1/2p5/2P1N2P/8/PP1PPPP1/RKBRNBQ1 w DAda - 3 9", - {24, 547, 14359, 339497, 9410221, 234041078}}, // 934 + // {"rknrb1qb/ppp1pppp/3p4/8/4P1nP/2P5/PPKP1PP1/R1NRBNQB w da - 1 9", {23, + // 643, 14849, 426616, 10507328, 312096061}}, // 859 + // {"rbk1rnbq/pppp1npp/4p3/5p2/4P1P1/7P/PPPP1P1N/RBKNR1BQ w EAea - 1 9", + // {24, 591, 15178, 376988, 10251465, 263574861}}, // 860 + // {"rknbrnb1/p1pppp1p/1p6/3N2p1/P3q1P1/8/1PPPPP1P/RKNBR1BQ w EAea - 1 9", + // {28, 948, 27343, 864588, 26241141, 812343987}}, // 861 + // {"rknrn1b1/ppppppqp/8/6p1/2P5/2P1BP2/PP2P1PP/RKNRNB1Q w DAda - 1 9", {31, + // 807, 24360, 672973, 20455205, 588518645}}, // 862 + // {"1k1rnqbb/npppppp1/r7/p2B3p/5P2/1N4P1/PPPPP2P/RK1RNQB1 w DAd - 0 9", + // {40, 1122, 44297, 1249989, 48711073, 1412437357}}, // 863 + // {"bbqr1rkn/pp1ppppp/8/2p5/1P2P1n1/7N/P1PP1P1P/BBQRKR1N w FD - 0 9", {26, + // 841, 22986, 746711, 21328001, 705170410}}, // 864 + // {"bqkr1rnn/1ppp1ppp/p4b2/4p3/P7/3PP2N/1PP2PPP/BQRBKR1N w FC - 3 9", {24, + // 500, 12802, 293824, 7928916, 197806842}}, // 865 + // {"bqrkrbnn/1pp1ppp1/8/p6p/3p4/P3P2P/QPPP1PP1/B1RKRBNN w ECec - 0 9", {31, + // 592, 18585, 396423, 12607528, 298629240}}, // 866 + // {"bqkrrnnb/2p1pppp/p7/1P1p4/8/2R3P1/PP1PPP1P/BQ1KRNNB w E - 0 9", {42, + // 1124, 45187, 1276664, 50052573, 1483524894}}, // 867 + // {"qbbrkrn1/p1pppn1p/8/1p3Pp1/2P5/8/PP1PPP1P/QBBRKRNN w FDfd - 0 9", {21, + // 577, 13244, 392131, 9683808, 300294295}}, // 868 + // {"qrbbkrnn/pp1p2pp/4p3/5p2/2p2P1P/2P5/PP1PP1P1/QRBBKRNN w FBfb - 0 9", + // {21, 571, 12736, 345681, 8239872, 228837930}}, // 869 + // {"qrbkrbn1/1pp1pppp/p2p4/8/5PPn/2P5/PP1PP3/QRBKRBNN w EBeb - 0 9", {18, + // 466, 9443, 257776, 5679073, 162883949}}, // 870 + // {"qrb1rnnb/pp1p1ppp/2pk4/4p3/1P2P3/1R6/P1PP1PPP/Q1BKRNNB w E - 4 9", {37, + // 760, 26863, 562201, 19486022, 421740856}}, // 871 + // {"qbrkbrn1/p1pppp1p/6n1/1p4p1/1P6/5P2/P1PPPBPP/QBRK1RNN w FCfc - 1 9", + // {33, 824, 27385, 750924, 25176664, 734656217}}, // 872 + // {"qrkbbr2/2pppppp/5nn1/pp1Q4/P7/3P4/1PP1PPPP/1RKBBRNN w FBfb - 0 9", {42, + // 1147, 44012, 1311247, 48216013, 1522548864}}, // 873 + // {"qrkrbbnn/pp2pp2/2pp2pp/1B6/P7/4P3/1PPP1PPP/QRKRB1NN w DBdb - 0 9", {26, + // 464, 12653, 242892, 6928220, 142507795}}, // 874 + // {"qrkrbnnb/p1pp1pp1/1p5p/4p3/1P6/6PN/PKPPPP1P/QR1RBN1B w db - 0 9", {29, + // 705, 20000, 529810, 15055365, 419552571}}, // 875 + // {"qbrkr1bn/p1p1pp1p/1p1p2n1/6p1/3P1P2/4P3/PPP3PP/QBKRRNBN w ec - 2 9", + // {23, 613, 14835, 426484, 10747407, 323905533}}, // 876 + // {"qrk1rnb1/p1pp1ppp/1p2Bbn1/8/4P3/6P1/PPPP1P1P/QRK1RNBN w EBeb - 1 9", + // {28, 927, 24887, 846839, 23063284, 807913585}}, // 877 + // {"1qkrnbbn/1rpppppp/pp6/5N2/P4P2/8/1PPPP1PP/QRKRNBB1 w DBd - 3 9", {30, + // 542, 16646, 345172, 10976745, 251694423}}, // 878 + // {"qrkr2bb/pppppppp/8/1n2n3/1N5P/1P6/P1PPPPP1/QRKR1NBB w DBdb - 1 9", {28, + // 719, 21048, 562015, 17351761, 479400272}}, // 879 + // {"bbrqkrnn/3ppppp/8/ppp5/6P1/4P2N/PPPPKP1P/BBRQ1R1N w fc - 0 9", {21, + // 704, 16119, 546215, 13676371, 470796854}}, // 880 + // {"brqbkrnn/1pp2p1p/3pp1p1/p5N1/8/1P6/P1PPPPPP/BRQBK1RN w Bfb - 0 9", {34, + // 688, 22827, 505618, 16639723, 402140795}}, // 881 + // {"br1krb1n/2qppppp/pp3n2/8/1P4P1/8/P1PPPP1P/1RQKRBNN w EBeb - 0 9", {24, + // 945, 23943, 926427, 25019636, 959651619}}, // 882 + // {"brqkr1nb/2ppp1pp/1p2np2/p7/2P1PN2/8/PP1P1PPP/BRQKRN1B w EBeb - 0 9", + // {28, 675, 19728, 504128, 15516491, 417396563}}, // 883 + // {"rbbqkrnn/3pppp1/p7/1pp4p/2P1P2P/8/PP1P1PP1/RBBQKRNN w FAfa - 0 9", {26, + // 671, 18164, 496806, 14072641, 404960259}}, // 884 + // {"rqbbkr1n/pp1p1p1p/4pn2/2p3p1/4P1P1/3P3P/PPP2P2/RQBBKRNN w FAfa - 0 9", + // {22, 633, 14629, 441809, 10776416, 335689685}}, // 885 + // {"rqbkrbnn/p1ppp3/1p3pp1/7p/3P4/P1P5/1PQ1PPPP/R1BKRBNN w EAea - 0 9", + // {32, 607, 20339, 454319, 15586203, 383515709}}, // 886 + // {"rqbkrnn1/pp2ppbp/3p4/2p3p1/2P5/1P3N1P/P2PPPP1/RQBKRN1B w EAea - 1 9", + // {29, 943, 28732, 908740, 28761841, 907579129}}, // 887 + // {"rbqkb1nn/1ppppr1p/p5p1/5p2/1P6/2P4P/P1KPPPP1/RBQ1BRNN w a - 1 9", {22, + // 441, 10403, 231273, 5784206, 140934555}}, // 888 + // {"rqkb1rnn/1pp1pp1p/p5p1/1b1p4/3P4/P5P1/RPP1PP1P/1QKBBRNN w Ffa - 1 9", + // {21, 505, 11592, 290897, 7147063, 188559137}}, // 889 + // {"rq1rbbnn/pkp1ppp1/3p3p/1p2N1P1/8/8/PPPPPP1P/RQKRBB1N w DA - 0 9", {27, + // 608, 16419, 387751, 10808908, 268393274}}, // 890 + // {"rqkrb2b/p2ppppp/2p3nn/1p6/5P2/PP1P4/2P1P1PP/RQKRBNNB w DAda - 1 9", + // {30, 749, 21563, 581531, 16916813, 485406712}}, // 891 + // {"rbqkr1bn/pp1ppp2/2p1n2p/6p1/8/4BPNP/PPPPP1P1/RBQKRN2 w EAea - 0 9", + // {23, 600, 15082, 410057, 11041820, 314327867}}, // 892 + // {"rqkbrnb1/2ppp1pp/pp3pn1/8/5P2/B2P4/PPP1P1PP/RQKBRN1N w EAea - 2 9", + // {22, 569, 13541, 371471, 9395816, 269460607}}, // 893 + // {"rqkrnbb1/p1p1pppp/1p4n1/3p4/7P/P3P3/1PPPBPP1/RQKRN1BN w DAda - 0 9", + // {27, 579, 15565, 373079, 10238486, 266047417}}, // 894 + // {"rqkrn1bb/p1ppp1pp/4n3/1p6/6p1/4N3/PPPPPPPP/RQKR2BB w DAda - 0 9", {20, + // 462, 10234, 274162, 6563859, 193376359}}, // 895 + // {"bbrkqr2/pppp1ppp/6nn/8/2P1p3/3PP2N/PP3PPP/BBRKQR1N w FCfc - 0 9", {28, + // 724, 21688, 619064, 19318355, 593204629}}, // 896 + // {"brk1qrnn/1pppbppp/4p3/8/1p6/P1P4P/3PPPP1/BRKBQRNN w FBfb - 1 9", {24, + // 662, 16920, 468215, 12610387, 355969349}}, // 897 + // {"1r1qrbnn/p1pkpppp/1p1p4/8/3P1PP1/P4b2/1PP1P2P/BRKQRBNN w EB - 1 9", + // {22, 696, 17021, 510247, 13697382, 401903030}}, // 898 + // {"1rkqrnnb/p1p1p1pp/1p1p4/3b1p1N/4P3/5N2/PPPP1PPP/BRKQR2B w EBeb - 1 9", + // {29, 887, 27035, 816176, 26051242, 791718847}}, // 899 + // {"rbbkq1rn/pppppppp/7n/8/P7/3P3P/1PPKPPP1/RBB1QRNN w a - 3 9", {22, 417, + // 9900, 216855, 5505063, 134818483}}, // 900 + // {"rkbbqr1n/1p1pppp1/2p2n2/p4NBp/8/3P4/PPP1PPPP/RK1BQRN1 w FAfa - 0 9", + // {37, 832, 30533, 728154, 26676373, 673756141}}, // 901 + // {"rkbqrb1n/3pBppp/ppp2n2/8/8/P2P4/1PP1PPPP/RK1QRBNN w EAea - 0 9", {28, + // 685, 19718, 543069, 16033316, 482288814}}, // 902 + // {"rkb1rn1b/ppppqppp/4p3/8/1P2n1P1/5Q2/P1PP1P1P/RKB1RNNB w EAea - 2 9", + // {37, 1158, 40114, 1234768, 44672979, 1389312729}}, // 903 + // {"r1kqbrnn/pp1pp1p1/7p/2P2p2/5b2/3P4/P1P1P1PP/RBKQBRNN w FAfa - 0 9", {5, + // 161, 4745, 154885, 4734999, 157499039}}, // 904 + // {"rkqbbr1n/ppp1ppp1/8/Q2p3p/4n3/3P1P2/PPP1P1PP/RK1BBRNN w FAfa - 2 9", + // {38, 1144, 40433, 1236877, 43832975, 1366087771}}, // 905 + // {"rkqrbbn1/p1ppppp1/Bp5p/8/P6n/2P1P3/1P1P1PPP/RKQRB1NN w DAda - 0 9", + // {28, 551, 15488, 350861, 9944107, 251179183}}, // 906 + // {"rkqrb1nb/1ppp1ppp/p7/4p3/5n2/3P2N1/PPPQPPPP/RK1RB1NB w DAda - 0 9", + // {26, 690, 19877, 513628, 15965907, 418191735}}, // 907 + // {"rbkqrnbn/pppp1p2/4p1p1/7p/7P/P2P4/BPP1PPP1/R1KQRNBN w EAea - 0 9", {27, + // 515, 13992, 309727, 8792550, 218658292}}, // 908 + // {"rkqbrnbn/pp1ppp2/8/2p3p1/P1P4p/5P2/1PKPP1PP/R1QBRNBN w ea - 0 9", {27, + // 627, 16843, 431101, 11978698, 328434174}}, // 909 + // {"rkqrnbbn/1p2pp1p/3p2p1/p1p5/P5PP/3N4/1PPPPP2/RKQR1BBN w DAda - 0 9", + // {23, 624, 15512, 451860, 11960861, 367311176}}, // 910 + // {"rk2rnbb/ppqppppp/2pn4/8/1P3P2/6P1/P1PPP1NP/RKQR1NBB w DAa - 1 9", {27, + // 727, 20206, 581003, 16633696, 505212747}}, // 911 + // {"b1krrqnn/pp1ppp1p/2p3p1/8/P3Pb1P/1P6/2PP1PP1/BBRKRQNN w EC - 0 9", {32, + // 943, 30759, 865229, 28672582, 800922511}}, // 912 + // {"1rkbrqnn/p1pp1ppp/1p6/8/P2Pp3/8/1PPKPPQP/BR1BR1NN w eb - 0 9", {28, + // 916, 24892, 817624, 22840279, 759318058}}, // 913 + // {"brkrqb1n/1pppp1pp/p7/3n1p2/P5P1/3PP3/1PP2P1P/BRKRQBNN w DBdb - 0 9", + // {27, 669, 18682, 484259, 13956472, 380267099}}, // 914 + // {"brkrqnnb/3pppp1/1p6/p1p4p/2P3P1/6N1/PP1PPP1P/BRKRQ1NB w DBdb - 0 9", + // {29, 699, 20042, 512639, 15093909, 406594531}}, // 915 + // {"r1bkrq1n/pp2pppp/3b1n2/2pp2B1/6P1/3P1P2/PPP1P2P/RB1KRQNN w EAea - 2 9", + // {27, 835, 22848, 713550, 19867800, 631209313}}, // 916 + // {"rk1brq1n/p1p1pppp/3p1n2/1p3b2/4P3/2NQ4/PPPP1PPP/RKBBR2N w EAea - 4 9", + // {36, 1004, 35774, 979608, 35143142, 966310885}}, // 917 + // {"rkbrqbnn/1p2ppp1/B1p5/p2p3p/4P2P/8/PPPP1PP1/RKBRQ1NN w DAda - 0 9", + // {27, 748, 21005, 597819, 17597073, 515304215}}, // 918 + // {"rkbrqn1b/pp1pp1pp/2p2p2/5n2/8/2P2P2/PP1PP1PP/RKBRQ1NB w DAda - 0 9", + // {20, 479, 10485, 266446, 6253775, 167767913}}, // 919 + // {"rbkrbnn1/ppppp1pp/5q2/5p2/5P2/P3P2N/1PPP2PP/RBKRBQ1N w DAda - 3 9", + // {28, 947, 26900, 876068, 26007841, 838704143}}, // 920 + // {"rkr1bqnn/1ppp1p1p/p5p1/4p3/3PP2b/2P2P2/PP4PP/RKRBBQNN w CAca - 0 9", + // {31, 1004, 32006, 1006830, 32688124, 1024529879}}, // 921 + // {"rkrqbbnn/pppp3p/8/4ppp1/1PP4P/8/P2PPPP1/RKRQBBNN w CAca - 0 9", {24, + // 717, 18834, 564137, 15844525, 484884485}}, // 922 + // {"rkrqbn1b/pppp2pp/8/4pp2/1P1P2n1/5N2/P1P1PP1P/RKRQBN1B w CAca - 0 9", + // {25, 718, 19654, 587666, 17257753, 537354146}}, // 923 + // {"rbkrqnbn/p1p1ppp1/1p1p4/8/3PP2p/2PB4/PP3PPP/R1KRQNBN w DAda - 0 9", + // {30, 754, 23298, 611322, 19338246, 532603566}}, // 924 + // {"1krbqnbn/1p2pppp/r1pp4/p7/8/1P1P2PP/P1P1PP2/RKRBQNBN w CAc - 0 9", {21, + // 566, 13519, 375128, 9700847, 279864836}}, // 925 + // {"rkrq1b2/pppppppb/3n2np/2N5/4P3/7P/PPPP1PP1/RKRQ1BBN w CAca - 1 9", {33, + // 654, 21708, 479678, 15990307, 382218272}}, // 926 + // {"rkr1nnbb/ppp2p1p/3p1qp1/4p3/P5P1/3PN3/1PP1PP1P/RKRQN1BB w CAca - 1 9", + // {28, 715, 20361, 555328, 16303092, 468666425}}, // 927 + // {"bbrkrnqn/1p1ppppp/8/8/p2pP3/PP6/2P2PPP/BBRKRNQN w ECec - 0 9", {24, + // 757, 19067, 603231, 15957628, 509307623}}, // 928 + // {"brkbrnqn/ppp2p2/4p3/P2p2pp/6P1/5P2/1PPPP2P/BRKBRNQN w EBeb - 0 9", {25, + // 548, 14563, 348259, 9688526, 247750144}}, // 929 + // {"brkr1bqn/1pppppp1/3n3p/1p6/P7/4P1P1/1PPP1P1P/BRKRN1QN w DBdb - 0 9", + // {19, 359, 7430, 157099, 3521652, 81787718}}, // 930 + // {"brkr1qnb/pppp2pp/2B1p3/5p2/2n5/6PP/PPPPPPN1/BRKR1QN1 w DBdb - 1 9", + // {27, 854, 23303, 741626, 20558538, 667089231}}, // 931 + // {"rbbkrnqn/p1p1p1pp/8/1p1p4/1P1Pp3/6N1/P1P2PPP/RBBKRNQ1 w EAea - 0 9", + // {28, 723, 19844, 514440, 14621108, 397454100}}, // 932 + // {"rkbbrn1n/pppppp2/5q1p/6p1/3P3P/4P3/PPP2PP1/RKBBRNQN w EAea - 1 9", {25, + // 741, 19224, 585198, 15605840, 485037906}}, // 933 + // {"rkbr1bq1/ppnppppp/6n1/2p5/2P1N2P/8/PP1PPPP1/RKBRNBQ1 w DAda - 3 9", + // {24, 547, 14359, 339497, 9410221, 234041078}}, // 934 {"1kbrnqnb/r1ppppp1/8/pp5p/8/1P1NP3/P1PP1PPP/RKB1RQNB w Ad - 2 9", {26, 618, 17305, 442643, 13112297, 357030697}}, // 935 - {"rbkrb1qn/1pp1ppp1/3pn2p/pP6/8/4N1P1/P1PPPP1P/RBKRB1QN w DAda - 0 9", - {21, 544, 12492, 338832, 8381483, 236013157}}, // 936 - {"rkrbbnqn/ppppp3/5p2/6pp/5PBP/4P3/PPPP2P1/RKR1BNQN w CAca - 0 9", - {30, 891, 25435, 764356, 21894752, 669256602}}, // 937 - {"rkr1bb1n/ppppp1pp/5p2/4n3/3QP3/5P2/RPPP2PP/1KRNBB1N w Cca - 1 9", - {45, 1172, 51766, 1332060, 57856784, 1501852662}}, // 938 - {"rkr1bqnb/pp1ppppp/8/2pN4/1P6/5N2/P1PPnPPP/RKR1BQ1B w CAca - 0 9", - {28, 730, 20511, 559167, 16323242, 463032124}}, // 939 - {"rbkrnqb1/2ppppp1/p5np/1p6/8/3N4/PPPPPPPP/RBKRQNB1 w DAda - 2 9", - {20, 417, 9159, 217390, 5180716, 133936564}}, // 940 - {"rkrbnqb1/p1pppnpp/5p2/1p6/2P5/1P1P1N2/P3PPPP/RKRB1QBN w CAca - 0 9", - {25, 546, 14039, 330316, 8813781, 222026485}}, // 941 - {"rkr1qbbn/ppppppp1/4n3/7p/8/P7/KPPPPPPP/R1RNQBBN w ca - 0 9", - {22, 484, 11458, 267495, 6633319, 163291279}}, // 942 - {"rkrnqnb1/1ppppp2/p5p1/7p/8/P1bPP3/1PP1QPPP/RKRN1NBB w CAca - 0 9", - {22, 636, 15526, 441001, 11614241, 331083405}}, // 943 - {"b2krn1q/p1rppppp/1Q3n2/2p1b3/1P4P1/8/P1PPPP1P/BBRKRNN1 w ECe - 3 9", - {36, 1192, 42945, 1406795, 50382104, 1650202838}}, // 944 - {"brkbrnn1/pp1pppp1/7q/2p5/6Pp/4P1NP/PPPP1P2/BRKBR1NQ w EBeb - 2 9", - {30, 978, 29593, 942398, 29205057, 936568065}}, // 945 - {"brkrnb1q/pp1p1ppp/2p1p3/5n2/1P6/5N1N/P1PPPPPP/BRKR1B1Q w DBdb - 1 9", - {31, 897, 27830, 810187, 25423729, 755334868}}, // 946 - {"brkr1nqb/pp1p1pp1/2pn3p/P3p3/4P3/6P1/1PPP1P1P/BRKRNNQB w DBdb - 0 9", - {19, 382, 8052, 182292, 4232274, 103537333}}, // 947 - {"r1bkrn1q/ppbppppp/5n2/2p5/3P4/P6N/1PP1PPPP/RBBKRNQ1 w EAea - 3 9", - {27, 822, 22551, 678880, 19115128, 578210135}}, // 948 - {"rkbbrnnq/pp2pppp/8/2pp4/P1P5/1P3P2/3PP1PP/RKBBRNNQ w EAea - 1 9", - {23, 643, 15410, 442070, 11170489, 329615708}}, // 949 - {"rkbr1b1q/p1pppppp/1p1n4/7n/5QP1/3N4/PPPPPP1P/RKBR1BN1 w DAda - 4 9", - {37, 943, 34382, 880474, 31568111, 842265141}}, // 950 - {"rkbr1nqb/pppp2np/8/4ppp1/1P6/6N1/P1PPPPPP/RKBRN1QB w DAda - 1 9", - {23, 574, 13260, 362306, 9020291, 261247606}}, // 951 - {"rbkr1nnq/p1p1pp1p/1p4p1/3p4/b3P3/4N3/PPPPNPPP/RBKRB1Q1 w DAda - 0 9", - {26, 900, 23414, 805006, 21653203, 745802405}}, // 952 - {"rkrbb1nq/p2pppp1/1p4n1/2p4p/3N4/4P1P1/PPPP1P1P/RKRBBN1Q w CAca - 0 9", - {32, 697, 22231, 531121, 17150175, 441578567}}, // 953 - {"rkrnbb1q/pp2pp1p/6pn/2pp4/2B1P2P/8/PPPP1PP1/RKRNB1NQ w CAca - 0 9", - {28, 854, 23853, 755990, 21823412, 712787248}}, // 954 - {"rk2bnqb/pprpppp1/4n2p/2p5/P7/3P2NP/1PP1PPP1/RKRNB1QB w CAa - 1 9", - {26, 596, 16251, 414862, 11758184, 323043654}}, // 955 - {"r1krnnbq/pp1ppp1p/6p1/2p5/2P5/P3P3/Rb1P1PPP/1BKRNNBQ w Dda - 0 9", - {2, 61, 1312, 40072, 937188, 28753562}}, // 956 - {"1krbnnbq/1pp1p1pp/r7/p2p1p2/3PP3/2P3P1/PP3P1P/RKRBNNBQ w CAc - 0 9", - {30, 953, 28033, 860530, 25531358, 787205262}}, // 957 - {"rkr1nbbq/2ppp1pp/1pn5/p4p2/P6P/3P4/1PP1PPPB/RKRNNB1Q w CAca - 1 9", - {24, 645, 15689, 446423, 11484012, 341262639}}, // 958 - {"rkrnnqbb/p1ppp2p/Qp6/4Pp2/5p2/8/PPPP2PP/RKRNN1BB w CAca - 0 9", - {35, 929, 32020, 896130, 31272517, 915268405}}, // 959 - {"bbq1nr1r/pppppk1p/2n2p2/6p1/P4P2/4P1P1/1PPP3P/BBQNNRKR w HF - 1 9", - {23, 589, 14744, 387556, 10316716, 280056112}}, // 960 + // {"rbkrb1qn/1pp1ppp1/3pn2p/pP6/8/4N1P1/P1PPPP1P/RBKRB1QN w DAda - 0 9", + // {21, 544, 12492, 338832, 8381483, 236013157}}, // 936 + // {"rkrbbnqn/ppppp3/5p2/6pp/5PBP/4P3/PPPP2P1/RKR1BNQN w CAca - 0 9", {30, + // 891, 25435, 764356, 21894752, 669256602}}, // 937 + // {"rkr1bb1n/ppppp1pp/5p2/4n3/3QP3/5P2/RPPP2PP/1KRNBB1N w Cca - 1 9", {45, + // 1172, 51766, 1332060, 57856784, 1501852662}}, // 938 + // {"rkr1bqnb/pp1ppppp/8/2pN4/1P6/5N2/P1PPnPPP/RKR1BQ1B w CAca - 0 9", {28, + // 730, 20511, 559167, 16323242, 463032124}}, // 939 + // {"rbkrnqb1/2ppppp1/p5np/1p6/8/3N4/PPPPPPPP/RBKRQNB1 w DAda - 2 9", {20, + // 417, 9159, 217390, 5180716, 133936564}}, // 940 + // {"rkrbnqb1/p1pppnpp/5p2/1p6/2P5/1P1P1N2/P3PPPP/RKRB1QBN w CAca - 0 9", + // {25, 546, 14039, 330316, 8813781, 222026485}}, // 941 + // {"rkr1qbbn/ppppppp1/4n3/7p/8/P7/KPPPPPPP/R1RNQBBN w ca - 0 9", {22, 484, + // 11458, 267495, 6633319, 163291279}}, // 942 + // {"rkrnqnb1/1ppppp2/p5p1/7p/8/P1bPP3/1PP1QPPP/RKRN1NBB w CAca - 0 9", {22, + // 636, 15526, 441001, 11614241, 331083405}}, // 943 + // {"b2krn1q/p1rppppp/1Q3n2/2p1b3/1P4P1/8/P1PPPP1P/BBRKRNN1 w ECe - 3 9", + // {36, 1192, 42945, 1406795, 50382104, 1650202838}}, // 944 + // {"brkbrnn1/pp1pppp1/7q/2p5/6Pp/4P1NP/PPPP1P2/BRKBR1NQ w EBeb - 2 9", {30, + // 978, 29593, 942398, 29205057, 936568065}}, // 945 + // {"brkrnb1q/pp1p1ppp/2p1p3/5n2/1P6/5N1N/P1PPPPPP/BRKR1B1Q w DBdb - 1 9", + // {31, 897, 27830, 810187, 25423729, 755334868}}, // 946 + // {"brkr1nqb/pp1p1pp1/2pn3p/P3p3/4P3/6P1/1PPP1P1P/BRKRNNQB w DBdb - 0 9", + // {19, 382, 8052, 182292, 4232274, 103537333}}, // 947 + // {"r1bkrn1q/ppbppppp/5n2/2p5/3P4/P6N/1PP1PPPP/RBBKRNQ1 w EAea - 3 9", {27, + // 822, 22551, 678880, 19115128, 578210135}}, // 948 + // {"rkbbrnnq/pp2pppp/8/2pp4/P1P5/1P3P2/3PP1PP/RKBBRNNQ w EAea - 1 9", {23, + // 643, 15410, 442070, 11170489, 329615708}}, // 949 + // {"rkbr1b1q/p1pppppp/1p1n4/7n/5QP1/3N4/PPPPPP1P/RKBR1BN1 w DAda - 4 9", + // {37, 943, 34382, 880474, 31568111, 842265141}}, // 950 + // {"rkbr1nqb/pppp2np/8/4ppp1/1P6/6N1/P1PPPPPP/RKBRN1QB w DAda - 1 9", {23, + // 574, 13260, 362306, 9020291, 261247606}}, // 951 + // {"rbkr1nnq/p1p1pp1p/1p4p1/3p4/b3P3/4N3/PPPPNPPP/RBKRB1Q1 w DAda - 0 9", + // {26, 900, 23414, 805006, 21653203, 745802405}}, // 952 + // {"rkrbb1nq/p2pppp1/1p4n1/2p4p/3N4/4P1P1/PPPP1P1P/RKRBBN1Q w CAca - 0 9", + // {32, 697, 22231, 531121, 17150175, 441578567}}, // 953 + // {"rkrnbb1q/pp2pp1p/6pn/2pp4/2B1P2P/8/PPPP1PP1/RKRNB1NQ w CAca - 0 9", + // {28, 854, 23853, 755990, 21823412, 712787248}}, // 954 + // {"rk2bnqb/pprpppp1/4n2p/2p5/P7/3P2NP/1PP1PPP1/RKRNB1QB w CAa - 1 9", {26, + // 596, 16251, 414862, 11758184, 323043654}}, // 955 + // {"r1krnnbq/pp1ppp1p/6p1/2p5/2P5/P3P3/Rb1P1PPP/1BKRNNBQ w Dda - 0 9", {2, + // 61, 1312, 40072, 937188, 28753562}}, // 956 + // {"1krbnnbq/1pp1p1pp/r7/p2p1p2/3PP3/2P3P1/PP3P1P/RKRBNNBQ w CAc - 0 9", + // {30, 953, 28033, 860530, 25531358, 787205262}}, // 957 + // {"rkr1nbbq/2ppp1pp/1pn5/p4p2/P6P/3P4/1PP1PPPB/RKRNNB1Q w CAca - 1 9", + // {24, 645, 15689, 446423, 11484012, 341262639}}, // 958 + // {"rkrnnqbb/p1ppp2p/Qp6/4Pp2/5p2/8/PPPP2PP/RKRNN1BB w CAca - 0 9", {35, + // 929, 32020, 896130, 31272517, 915268405}}, // 959 + // {"bbq1nr1r/pppppk1p/2n2p2/6p1/P4P2/4P1P1/1PPP3P/BBQNNRKR w HF - 1 9", + // {23, 589, 14744, 387556, 10316716, 280056112}}, // 960 {"bqrkrbnn/ppp1pppp/8/8/8/8/PPP1PPPP/BQRKRBNN w CKeq - 0 1", {19, 342, 6987, 142308, 3294156, 75460468}}, // 961 castling {"r1bkrn1q/ppbppppp/5n2/2p5/3P4/P6N/1PP1PPPP/RBBKRNQ1 w KQkq - 3 9", diff --git a/src/mcts/params.cc b/src/mcts/params.cc index b5facc5d3e..0f23e5a228 100644 --- a/src/mcts/params.cc +++ b/src/mcts/params.cc @@ -27,6 +27,8 @@ #include "mcts/params.h" +#include "utils/exception.h" + namespace lczero { namespace { @@ -58,6 +60,9 @@ const OptionId SearchParams::kCpuctId{ "cpuct_init constant from \"UCT search\" algorithm. Higher values promote " "more exploration/wider search, lower values promote more " "confidence/deeper search."}; +const OptionId SearchParams::kCpuctAtRootOffsetId{ + "cpuct-root-offset", "CPuctRootOffset", + "cpuct_init value adjustment for the root node."}; const OptionId SearchParams::kCpuctBaseId{ "cpuct-base", "CPuctBase", "cpuct_base constant from \"UCT search\" algorithm. Lower value means " @@ -204,6 +209,7 @@ void SearchParams::Populate(OptionsParser* options) { options->Add(kMaxPrefetchBatchId, 0, 1024) = 32; options->Add(kLogitQId) = false; options->Add(kCpuctId, 0.0f, 100.0f) = 3.0f; + options->Add(kCpuctAtRootOffsetId, -100.0f, 100.0f) = 0.0f; options->Add(kCpuctBaseId, 1.0f, 1000000000.0f) = 19652.0f; options->Add(kCpuctFactorId, 0.0f, 1000.0f) = 2.0f; options->Add(kTemperatureId, 0.0f, 100.0f) = 0.0f; @@ -252,6 +258,7 @@ SearchParams::SearchParams(const OptionsDict& options) : options_(options), kLogitQ(options.Get(kLogitQId.GetId())), kCpuct(options.Get(kCpuctId.GetId())), + kCpuctAtRootOffset(options.Get(kCpuctAtRootOffsetId.GetId())), kCpuctBase(options.Get(kCpuctBaseId.GetId())), kCpuctFactor(options.Get(kCpuctFactorId.GetId())), kNoiseEpsilon(options.Get(kNoiseId.GetId()) @@ -282,6 +289,10 @@ SearchParams::SearchParams(const OptionsDict& options) kShortSightedness(options.Get(kShortSightednessId.GetId())), kDisplayCacheUsage(options.Get(kDisplayCacheUsageId.GetId())), kMaxConcurrentSearchers( - options.Get(kMaxConcurrentSearchersId.GetId())) {} + options.Get(kMaxConcurrentSearchersId.GetId())) { + if (kCpuct + kCpuctAtRootOffset < 0.0f) { + throw Exception("CPuct + CPuctRootOffset must be >= 0."); + } +} } // namespace lczero diff --git a/src/mcts/params.h b/src/mcts/params.h index df58f9279c..8da62928e8 100644 --- a/src/mcts/params.h +++ b/src/mcts/params.h @@ -48,6 +48,7 @@ class SearchParams { } bool GetLogitQ() const { return kLogitQ; } float GetCpuct() const { return kCpuct; } + float GetCpuctOffsetAtRoot() const { return kCpuctAtRootOffset; } float GetCpuctBase() const { return kCpuctBase; } float GetCpuctFactor() const { return kCpuctFactor; } float GetTemperature() const { @@ -107,6 +108,7 @@ class SearchParams { static const OptionId kMaxPrefetchBatchId; static const OptionId kLogitQId; static const OptionId kCpuctId; + static const OptionId kCpuctAtRootOffsetId; static const OptionId kCpuctBaseId; static const OptionId kCpuctFactorId; static const OptionId kTemperatureId; @@ -149,6 +151,7 @@ class SearchParams { // trivial search optimiations. const bool kLogitQ; const float kCpuct; + const float kCpuctAtRootOffset; const float kCpuctBase; const float kCpuctFactor; const float kNoiseEpsilon; diff --git a/src/mcts/search.cc b/src/mcts/search.cc index 466f9bca61..1b9bdde9e5 100644 --- a/src/mcts/search.cc +++ b/src/mcts/search.cc @@ -207,8 +207,10 @@ inline float GetFpu(const SearchParams& params, Node* node, bool is_root_node) { : -node->GetQ() - value * std::sqrt(node->GetVisitedPolicy()); } -inline float ComputeCpuct(const SearchParams& params, uint32_t N) { - const float init = params.GetCpuct(); +inline float ComputeCpuct(const SearchParams& params, uint32_t N, + bool is_root_node) { + const float init = + params.GetCpuct() + (is_root_node ? params.GetCpuctOffsetAtRoot() : 0.0f); const float k = params.GetCpuctFactor(); const float base = params.GetCpuctBase(); return init + (k ? k * FastLog((N + base) / base) : 0.0f); @@ -218,7 +220,7 @@ inline float ComputeCpuct(const SearchParams& params, uint32_t N) { std::vector Search::GetVerboseStats(Node* node, bool is_black_to_move) const { const float fpu = GetFpu(params_, node, node == root_node_); - const float cpuct = ComputeCpuct(params_, node->GetN()); + const float cpuct = ComputeCpuct(params_, node->GetN(), node == root_node_); const float U_coeff = cpuct * std::sqrt(std::max(node->GetChildrenVisits(), 1u)); const bool logit_q = params_.GetLogitQ(); @@ -883,7 +885,7 @@ SearchWorker::NodeToProcess SearchWorker::PickNodeToExtend( // If we fall through, then n_in_flight_ has been incremented but this // playout remains incomplete; we must go deeper. - const float cpuct = ComputeCpuct(params_, node->GetN()); + const float cpuct = ComputeCpuct(params_, node->GetN(), is_root_node); const float puct_mult = cpuct * std::sqrt(std::max(node->GetChildrenVisits(), 1u)); float best = std::numeric_limits::lowest(); @@ -1098,7 +1100,8 @@ int SearchWorker::PrefetchIntoCache(Node* node, int budget) { // Populate all subnodes and their scores. typedef std::pair ScoredEdge; std::vector scores; - const float cpuct = ComputeCpuct(params_, node->GetN()); + const float cpuct = + ComputeCpuct(params_, node->GetN(), node == search_->root_node_); const float puct_mult = cpuct * std::sqrt(std::max(node->GetChildrenVisits(), 1u)); const float fpu = GetFpu(params_, node, node == search_->root_node_); From 855870d693f84a825cf4741b3467da3eba071f7a Mon Sep 17 00:00:00 2001 From: Tilps Date: Sun, 16 Feb 2020 21:22:54 +1100 Subject: [PATCH 023/151] Ensure not to prune before first eval time is set. (#1080) --- src/mcts/stoppers/stoppers.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mcts/stoppers/stoppers.cc b/src/mcts/stoppers/stoppers.cc index 225689fe32..6783557ef9 100644 --- a/src/mcts/stoppers/stoppers.cc +++ b/src/mcts/stoppers/stoppers.cc @@ -192,6 +192,7 @@ bool SmartPruningStopper::ShouldStop(const IterationStats& stats, first_eval_time_ = stats.time_since_movestart; return false; } + if (!first_eval_time_) return false; if (stats.edge_n.size() == 0) return false; if (stats.time_since_movestart < *first_eval_time_ + kSmartPruningToleranceMs) { From f5d2c10d40c3eaaf132efe1adaaffffe0b1e5205 Mon Sep 17 00:00:00 2001 From: Alexander Lyashuk Date: Mon, 17 Feb 2020 22:01:36 +0100 Subject: [PATCH 024/151] Support for non-zero draw scores (aka humbleness). (#1066) * Non-zero values for draw. * Use WL in WDL reporting. * Changed as discussed in Discord. * Added new score types instead of introducing parameter. * Rename some of parameters. * Typo. * Address review comments. * Address review comments. * 100 is better than 1. * Address review comments. Thanks to borg323@ for flagging an embarrassing error! * Addressed review comments * More comments addressed. --- src/mcts/node.cc | 18 ++--- src/mcts/node.h | 15 ++-- src/mcts/params.cc | 39 ++++++++++- src/mcts/params.h | 14 +++- src/mcts/search.cc | 162 +++++++++++++++++++++++++------------------ src/mcts/search.h | 14 ++-- src/selfplay/game.cc | 4 +- 7 files changed, 175 insertions(+), 91 deletions(-) diff --git a/src/mcts/node.cc b/src/mcts/node.cc index c5a89c524c..29962843d2 100644 --- a/src/mcts/node.cc +++ b/src/mcts/node.cc @@ -212,7 +212,7 @@ std::string Node::DebugString() const { std::ostringstream oss; oss << " Term:" << is_terminal_ << " This:" << this << " Parent:" << parent_ << " Index:" << index_ << " Child:" << child_.get() - << " Sibling:" << sibling_.get() << " Q:" << q_ << " N:" << n_ + << " Sibling:" << sibling_.get() << " WL:" << wl_ << " N:" << n_ << " N_:" << n_in_flight_ << " Edges:" << edges_.size(); return oss.str(); } @@ -220,13 +220,13 @@ std::string Node::DebugString() const { void Node::MakeTerminal(GameResult result) { is_terminal_ = true; if (result == GameResult::DRAW) { - q_ = 0.0f; + wl_ = 0.0f; d_ = 1.0f; } else if (result == GameResult::WHITE_WON) { - q_ = 1.0f; + wl_ = 1.0f; d_ = 0.0f; } else if (result == GameResult::BLACK_WON) { - q_ = -1.0f; + wl_ = -1.0f; d_ = 0.0f; } } @@ -243,13 +243,13 @@ void Node::MakeNotTerminal() { if (n > 0) { n_ += n; // Flip Q for opponent. - q_ += -child.GetQ(0.0f) * n; + wl_ += -child.GetWL() * n; d_ += child.GetD() * n; } } // Recompute with current eval (instead of network's) and children's eval. - q_ /= n_; + wl_ /= n_; d_ /= n_; } } @@ -267,7 +267,7 @@ void Node::CancelScoreUpdate(int multivisit) { void Node::FinalizeScoreUpdate(float v, float d, int multivisit) { // Recompute Q. - q_ += multivisit * (v - q_) / (n_ + multivisit); + wl_ += multivisit * (v - wl_) / (n_ + multivisit); d_ += multivisit * (d - d_) / (n_ + multivisit); // If first visit, update parent's sum of policies visited at least once. @@ -383,8 +383,8 @@ V4TrainingData Node::GetV4TrainingData(GameResult game_result, result.result = 0; } - // Aggregate evaluation Q. - result.root_q = -GetQ(); + // Aggregate evaluation WL. + result.root_q = -GetWL(); result.best_q = best_q; // Draw probability of WDL head. diff --git a/src/mcts/node.h b/src/mcts/node.h index 280b44f8b6..cc82028b3e 100644 --- a/src/mcts/node.h +++ b/src/mcts/node.h @@ -32,6 +32,7 @@ #include #include #include + #include "chess/board.h" #include "chess/callbacks.h" #include "chess/position.h" @@ -152,9 +153,10 @@ class Node { uint32_t GetChildrenVisits() const { return n_ > 0 ? n_ - 1 : 0; } // Returns n = n_if_flight. int GetNStarted() const { return n_ + n_in_flight_; } + float GetQ(float draw_score) const { return wl_ + draw_score * d_; } // Returns node eval, i.e. average subtree V for non-terminal node and -1/0/1 // for terminal nodes. - float GetQ() const { return q_; } + float GetWL() const { return wl_; } float GetD() const { return d_; } // Returns whether the node is known to be draw/lose/win. @@ -269,8 +271,9 @@ class Node { // subtree. For terminal nodes, eval is stored. This is from the perspective // of the player who "just" moved to reach this position, rather than from the // perspective of the player-to-move for the position. - float q_ = 0.0f; - // Averaged draw probability. Works similarly to Q, except that D is not + // WL stands for "W minus L". Is equal to Q if draw score is 0. + float wl_ = 0.0f; + // Averaged draw probability. Works similarly to WL, except that D is not // flipped depending on the side to move. float d_ = 0.0f; // Sum of policy priors which have had at least one playout. @@ -337,13 +340,15 @@ class EdgeAndNode { Node* node() const { return node_; } // Proxy functions for easier access to node/edge. - float GetQ(float default_q, bool logit_q = false) const { + float GetQ(float default_q, float draw_score, bool logit_q = false) const { return (node_ && node_->GetN() > 0) ? // Scale Q slightly to avoid logit(1) = infinity. - (logit_q ? FastLogit(0.9999999f * node_->GetQ()) : node_->GetQ()) + (logit_q ? FastLogit(0.9999999f * node_->GetQ(draw_score)) + : node_->GetQ(draw_score)) : default_q; } + float GetWL() const { return node_ ? node_->GetWL() : 0.0f; } float GetD() const { return (node_ && node_->GetN() > 0) ? node_->GetD() : 0.0f; } diff --git a/src/mcts/params.cc b/src/mcts/params.cc index 0f23e5a228..09344e867a 100644 --- a/src/mcts/params.cc +++ b/src/mcts/params.cc @@ -201,6 +201,18 @@ const OptionId SearchParams::kMaxConcurrentSearchersId{ "max-concurrent-searchers", "MaxConcurrentSearchers", "If not 0, at most this many search workers can be gathering minibatches " "at once."}; +const OptionId SearchParams::kDrawScoreSidetomoveId{ + "draw-score-sidetomove", "DrawScoreSideToMove", + "Score of a drawn game, as seen by a player making the move."}; +const OptionId SearchParams::kDrawScoreOpponentId{ + "draw-score-opponent", "DrawScoreOpponent", + "Score of a drawn game, as seen by the opponent."}; +const OptionId SearchParams::kDrawScoreWhiteId{ + "draw-score-white", "DrawScoreWhite", + "Adjustment, added to a draw score of a white player."}; +const OptionId SearchParams::kDrawScoreBlackId{ + "draw-score-black", "DrawScoreBlack", + "Adjustment, added to a draw score of a black player."}; void SearchParams::Populate(OptionsParser* options) { // Here the uci optimized defaults" are set. @@ -239,14 +251,22 @@ void SearchParams::Populate(OptionsParser* options) { options->Add(kSyzygyFastPlayId) = true; options->Add(kMultiPvId, 1, 500) = 1; options->Add(kPerPvCountersId) = false; - std::vector score_type = {"centipawn", "centipawn_2018", - "win_percentage", "Q"}; + std::vector score_type = {"centipawn", + "centipawn_with_drawscore", + "centipawn_2018", + "win_percentage", + "Q", + "W-L"}; options->Add(kScoreTypeId, score_type) = "centipawn"; std::vector history_fill_opt{"no", "fen_only", "always"}; options->Add(kHistoryFillId, history_fill_opt) = "fen_only"; options->Add(kShortSightednessId, 0.0f, 1.0f) = 0.0f; options->Add(kDisplayCacheUsageId) = false; options->Add(kMaxConcurrentSearchersId, 0, 128) = 0; + options->Add(kDrawScoreSidetomoveId, -100, 100) = 0; + options->Add(kDrawScoreOpponentId, -100, 100) = 0; + options->Add(kDrawScoreWhiteId, -100, 100) = 0; + options->Add(kDrawScoreBlackId, -100, 100) = 0; options->HideOption(kNoiseEpsilonId); options->HideOption(kNoiseAlphaId); @@ -289,10 +309,23 @@ SearchParams::SearchParams(const OptionsDict& options) kShortSightedness(options.Get(kShortSightednessId.GetId())), kDisplayCacheUsage(options.Get(kDisplayCacheUsageId.GetId())), kMaxConcurrentSearchers( - options.Get(kMaxConcurrentSearchersId.GetId())) { + options.Get(kMaxConcurrentSearchersId.GetId())), + kDrawScoreSidetomove{options.Get(kDrawScoreSidetomoveId.GetId()) / + 100.0f}, + kDrawScoreOpponent{options.Get(kDrawScoreOpponentId.GetId()) / + 100.0f}, + kDrawScoreWhite{options.Get(kDrawScoreWhiteId.GetId()) / 100.0f}, + kDrawScoreBlack{options.Get(kDrawScoreBlackId.GetId()) / 100.0f} { if (kCpuct + kCpuctAtRootOffset < 0.0f) { throw Exception("CPuct + CPuctRootOffset must be >= 0."); } + if (std::max(std::abs(kDrawScoreSidetomove), std::abs(kDrawScoreOpponent)) + + std::max(std::abs(kDrawScoreWhite), std::abs(kDrawScoreBlack)) > + 1.0f) { + throw Exception( + "max{|sidetomove|+|opponent|} + max{|white|+|black|} draw score must " + "be <= 100"); + } } } // namespace lczero diff --git a/src/mcts/params.h b/src/mcts/params.h index 8da62928e8..299874e3e2 100644 --- a/src/mcts/params.h +++ b/src/mcts/params.h @@ -102,6 +102,10 @@ class SearchParams { FillEmptyHistory GetHistoryFill() const { return kHistoryFill; } bool GetDisplayCacheUsage() const { return kDisplayCacheUsage; } int GetMaxConcurrentSearchers() const { return kMaxConcurrentSearchers; } + float GetSidetomoveDrawScore() const { return kDrawScoreSidetomove; } + float GetOpponentDrawScore() const { return kDrawScoreOpponent; } + float GetWhiteDrawDelta() const { return kDrawScoreWhite; } + float GetBlackDrawDelta() const { return kDrawScoreBlack; } // Search parameter IDs. static const OptionId kMiniBatchSizeId; @@ -140,6 +144,10 @@ class SearchParams { static const OptionId kShortSightednessId; static const OptionId kDisplayCacheUsageId; static const OptionId kMaxConcurrentSearchersId; + static const OptionId kDrawScoreSidetomoveId; + static const OptionId kDrawScoreOpponentId; + static const OptionId kDrawScoreWhiteId; + static const OptionId kDrawScoreBlackId; private: const OptionsDict& options_; @@ -148,7 +156,7 @@ class SearchParams { // reasons. // 2. Parameter has to stay the say during the search. // TODO(crem) Some of those parameters can be converted to be dynamic after - // trivial search optimiations. + // trivial search optimizations. const bool kLogitQ; const float kCpuct; const float kCpuctAtRootOffset; @@ -172,6 +180,10 @@ class SearchParams { const float kShortSightedness; const bool kDisplayCacheUsage; const int kMaxConcurrentSearchers; + const float kDrawScoreSidetomove; + const float kDrawScoreOpponent; + const float kDrawScoreWhite; + const float kDrawScoreBlack; }; } // namespace lczero diff --git a/src/mcts/search.cc b/src/mcts/search.cc index 1b9bdde9e5..4f182258f6 100644 --- a/src/mcts/search.cc +++ b/src/mcts/search.cc @@ -101,6 +101,7 @@ void Search::SendUciInfo() REQUIRES(nodes_mutex_) { const auto score_type = params_.GetScoreType(); const auto per_pv_counters = params_.GetPerPvCounters(); const auto display_cache_usage = params_.GetDisplayCacheUsage(); + const auto draw_score = GetDrawScore(false); std::vector uci_infos; @@ -128,24 +129,29 @@ void Search::SendUciInfo() REQUIRES(nodes_mutex_) { common_info.tb_hits = tb_hits_.load(std::memory_order_acquire); int multipv = 0; - const auto default_q = -root_node_->GetQ(); + const auto default_q = -root_node_->GetWL(); for (const auto& edge : edges) { ++multipv; uci_infos.emplace_back(common_info); auto& uci_info = uci_infos.back(); - const auto& q = edge.GetQ(default_q); - if (score_type == "centipawn") { + const auto wl = edge.GetWL(); + const auto d = edge.GetD(); + const int w = static_cast(std::round(500.0 * (1.0 + wl - d))); + const auto q = edge.GetQ(default_q, draw_score); + if (score_type == "centipawn_with_drawscore") { uci_info.score = 295 * q / (1 - 0.976953126 * std::pow(q, 14)); + } else if (score_type == "centipawn") { + uci_info.score = 295 * wl / (1 - 0.976953126 * std::pow(q, 14)); } else if (score_type == "centipawn_2018") { - uci_info.score = 290.680623072 * tan(1.548090806 * q); + uci_info.score = 290.680623072 * tan(1.548090806 * wl); } else if (score_type == "win_percentage") { - uci_info.score = q * 5000 + 5000; + uci_info.score = wl * 5000 + 5000; } else if (score_type == "Q") { uci_info.score = q * 10000; + } else if (score_type == "W-L") { + uci_info.score = wl * 10000; } - const auto& d = edge.GetD(); - const int w = static_cast(std::round(500.0 * (1.0 + q - d))); - const int l = static_cast(std::round(500.0 * (1.0 - q - d))); + const int l = static_cast(std::round(500.0 * (1.0 - wl - d))); // Using 1000-w-l instead of 1000*d for D score so that W+D+L add up to // 1000.0. uci_info.wdl = ThinkingInfo::WDL{w, 1000 - w - l, l}; @@ -199,12 +205,23 @@ int64_t Search::GetTimeSinceStart() const { .count(); } +// Root is depth 0, i.e. even depth. +float Search::GetDrawScore(bool is_odd_depth) const { + return (is_odd_depth ? params_.GetOpponentDrawScore() + : params_.GetSidetomoveDrawScore()) + + (is_odd_depth == played_history_.IsBlackToMove() + ? params_.GetWhiteDrawDelta() + : params_.GetBlackDrawDelta()); +} + namespace { -inline float GetFpu(const SearchParams& params, Node* node, bool is_root_node) { +inline float GetFpu(const SearchParams& params, Node* node, bool is_root_node, + float draw_score) { const auto value = params.GetFpuValue(is_root_node); return params.GetFpuAbsolute(is_root_node) ? value - : -node->GetQ() - value * std::sqrt(node->GetVisitedPolicy()); + : -node->GetQ(-draw_score) - + value * std::sqrt(node->GetVisitedPolicy()); } inline float ComputeCpuct(const SearchParams& params, uint32_t N, @@ -217,10 +234,14 @@ inline float ComputeCpuct(const SearchParams& params, uint32_t N, } } // namespace -std::vector Search::GetVerboseStats(Node* node, - bool is_black_to_move) const { - const float fpu = GetFpu(params_, node, node == root_node_); - const float cpuct = ComputeCpuct(params_, node->GetN(), node == root_node_); +std::vector Search::GetVerboseStats(Node* node) const { + assert(node == root_node_ || node->GetParent() == root_node_); + const bool is_root = (node == root_node_); + const bool is_odd_depth = !is_root; + const bool is_black_to_move = (played_history_.IsBlackToMove() == is_root); + const float draw_score = GetDrawScore(is_odd_depth); + const float fpu = GetFpu(params_, node, is_root, draw_score); + const float cpuct = ComputeCpuct(params_, node->GetN(), is_root); const float U_coeff = cpuct * std::sqrt(std::max(node->GetChildrenVisits(), 1u)); const bool logit_q = params_.GetLogitQ(); @@ -252,22 +273,25 @@ std::vector Search::GetVerboseStats(Node* node, oss << "(P: " << std::setw(5) << std::setprecision(2) << edge.GetP() * 100 << "%) "; - oss << "(Q: " << std::setw(8) << std::setprecision(5) << edge.GetQ(fpu) + oss << "(WL: " << std::setw(8) << std::setprecision(5) << edge.GetWL() << ") "; oss << "(D: " << std::setw(6) << std::setprecision(3) << edge.GetD() << ") "; + oss << "(Q: " << std::setw(8) << std::setprecision(5) + << edge.GetQ(fpu, draw_score) << ") "; + oss << "(U: " << std::setw(6) << std::setprecision(5) << edge.GetU(U_coeff) << ") "; oss << "(Q+U: " << std::setw(8) << std::setprecision(5) - << edge.GetQ(fpu, logit_q) + edge.GetU(U_coeff) << ") "; + << edge.GetQ(fpu, draw_score, logit_q) + edge.GetU(U_coeff) << ") "; oss << "(V: "; std::optional v; if (edge.IsTerminal()) { - v = edge.node()->GetQ(); + v = edge.node()->GetQ(draw_score); } else { NNCacheLock nneval = GetCachedNNEval(edge.node()); if (nneval) v = -nneval->q; @@ -286,8 +310,7 @@ std::vector Search::GetVerboseStats(Node* node, } void Search::SendMovesStats() const REQUIRES(counters_mutex_) { - const bool is_black_to_move = played_history_.IsBlackToMove(); - auto move_stats = GetVerboseStats(root_node_, is_black_to_move); + auto move_stats = GetVerboseStats(root_node_); if (params_.GetVerboseStats()) { std::vector infos; @@ -306,8 +329,7 @@ void Search::SendMovesStats() const REQUIRES(counters_mutex_) { LOGFILE << "--- Opponent moves after: " << final_bestmove_.GetMove(played_history_.IsBlackToMove()).as_string(); - for (const auto& line : - GetVerboseStats(final_bestmove_.node(), !is_black_to_move)) { + for (const auto& line : GetVerboseStats(final_bestmove_.node())) { LOGFILE << line; } } @@ -370,11 +392,11 @@ void Search::MaybeTriggerStop(const IterationStats& stats, std::pair Search::GetBestEval() const { SharedMutex::SharedLock lock(nodes_mutex_); Mutex::Lock counters_lock(counters_mutex_); - float parent_q = -root_node_->GetQ(); + float parent_wl = -root_node_->GetWL(); float parent_d = root_node_->GetD(); - if (!root_node_->HasChildren()) return {parent_q, parent_d}; + if (!root_node_->HasChildren()) return {parent_wl, parent_d}; EdgeAndNode best_edge = GetBestChildNoTemperature(root_node_); - return {best_edge.GetQ(parent_q), best_edge.GetD()}; + return {best_edge.GetWL(), best_edge.GetD()}; } std::pair Search::GetBestMove() { @@ -438,9 +460,8 @@ void Search::EnsureBestMoveKnown() REQUIRES(nodes_mutex_) } } - final_bestmove_ = temperature - ? GetBestChildWithTemperature(root_node_, temperature) - : GetBestChildNoTemperature(root_node_); + final_bestmove_ = temperature ? GetBestRootChildWithTemperature(temperature) + : GetBestChildNoTemperature(root_node_); if (final_bestmove_.HasNode() && final_bestmove_.node()->HasChildren()) { final_pondermove_ = GetBestChildNoTemperature(final_bestmove_.node()); @@ -468,8 +489,8 @@ std::vector Search::GetBestChildrenNoTemperature(Node* parent, root_limit.end()) { continue; } - const auto Q = edge.GetQ(0.0f); - edges.emplace_back(edge.IsTerminal() && Q > 0.0f, edge.GetN(), Q, + const auto WL = edge.GetWL(); + edges.emplace_back(edge.IsTerminal() && WL > 0.0f, edge.GetN(), WL, edge.GetP(), edge); } const auto middle = (static_cast(edges.size()) > count) @@ -489,46 +510,45 @@ EdgeAndNode Search::GetBestChildNoTemperature(Node* parent) const { return res.empty() ? EdgeAndNode() : res.front(); } -// Returns a child chosen according to weighted-by-temperature visit count. -EdgeAndNode Search::GetBestChildWithTemperature(Node* parent, - float temperature) const { +// Returns a child of a root chosen according to weighted-by-temperature visit +// count. +EdgeAndNode Search::GetBestRootChildWithTemperature(float temperature) const { + // Root is at even depth. + const bool draw_score = GetDrawScore(/* is_odd_depth= */ false); MoveList root_limit; - if (parent == root_node_) { - PopulateRootMoveLimit(&root_limit); - } + PopulateRootMoveLimit(&root_limit); std::vector cumulative_sums; float sum = 0.0; float max_n = 0.0; const float offset = params_.GetTemperatureVisitOffset(); float max_eval = -1.0f; - const float fpu = GetFpu(params_, parent, parent == root_node_); + const float fpu = + GetFpu(params_, root_node_, /* is_root= */ true, draw_score); - for (auto edge : parent->Edges()) { - if (parent == root_node_ && !root_limit.empty() && - std::find(root_limit.begin(), root_limit.end(), edge.GetMove()) == - root_limit.end()) { + for (auto edge : root_node_->Edges()) { + if (!root_limit.empty() && std::find(root_limit.begin(), root_limit.end(), + edge.GetMove()) == root_limit.end()) { continue; } if (edge.GetN() + offset > max_n) { max_n = edge.GetN() + offset; - max_eval = edge.GetQ(fpu); + max_eval = edge.GetQ(fpu, draw_score); } } // No move had enough visits for temperature, so use default child criteria - if (max_n <= 0.0f) return GetBestChildNoTemperature(parent); + if (max_n <= 0.0f) return GetBestChildNoTemperature(root_node_); // TODO(crem) Simplify this code when samplers.h is merged. const float min_eval = max_eval - params_.GetTemperatureWinpctCutoff() / 50.0f; - for (auto edge : parent->Edges()) { - if (parent == root_node_ && !root_limit.empty() && - std::find(root_limit.begin(), root_limit.end(), edge.GetMove()) == - root_limit.end()) { + for (auto edge : root_node_->Edges()) { + if (!root_limit.empty() && std::find(root_limit.begin(), root_limit.end(), + edge.GetMove()) == root_limit.end()) { continue; } - if (edge.GetQ(fpu) < min_eval) continue; + if (edge.GetQ(fpu, draw_score) < min_eval) continue; sum += std::pow( std::max(0.0f, (static_cast(edge.GetN()) + offset) / max_n), 1 / temperature); @@ -541,13 +561,12 @@ EdgeAndNode Search::GetBestChildWithTemperature(Node* parent, std::lower_bound(cumulative_sums.begin(), cumulative_sums.end(), toss) - cumulative_sums.begin(); - for (auto edge : parent->Edges()) { - if (parent == root_node_ && !root_limit.empty() && - std::find(root_limit.begin(), root_limit.end(), edge.GetMove()) == - root_limit.end()) { + for (auto edge : root_node_->Edges()) { + if (!root_limit.empty() && std::find(root_limit.begin(), root_limit.end(), + edge.GetMove()) == root_limit.end()) { continue; } - if (edge.GetQ(fpu) < min_eval) continue; + if (edge.GetQ(fpu, draw_score) < min_eval) continue; if (idx-- == 0) return edge; } assert(false); @@ -841,6 +860,8 @@ SearchWorker::NodeToProcess SearchWorker::PickNodeToExtend( // True on first iteration, false as we dive deeper. bool is_root_node = true; + const float even_draw_score = search_->GetDrawScore(false); + const float odd_draw_score = search_->GetDrawScore(true); uint16_t depth = 0; bool node_already_updated = true; @@ -890,7 +911,11 @@ SearchWorker::NodeToProcess SearchWorker::PickNodeToExtend( cpuct * std::sqrt(std::max(node->GetChildrenVisits(), 1u)); float best = std::numeric_limits::lowest(); float second_best = std::numeric_limits::lowest(); - const float fpu = GetFpu(params_, node, is_root_node); + // Root depth is 1 here, while for GetDrawScore() it's 0-based, that's why + // the weirdness. + const float draw_score = + (depth % 2 == 0) ? odd_draw_score : even_draw_score; + const float fpu = GetFpu(params_, node, is_root_node, draw_score); for (auto child : node->Edges()) { if (is_root_node) { // If there's no chance to catch up to the current best node with @@ -910,7 +935,7 @@ SearchWorker::NodeToProcess SearchWorker::PickNodeToExtend( continue; } } - const float Q = child.GetQ(fpu, params_.GetLogitQ()); + const float Q = child.GetQ(fpu, draw_score, params_.GetLogitQ()); const float score = child.GetU(puct_mult) + Q; if (score > best) { second_best = best; @@ -1069,14 +1094,16 @@ void SearchWorker::MaybePrefetchIntoCache() { computation_->GetCacheMisses() < params_.GetMaxPrefetchBatch()) { history_.Trim(search_->played_history_.GetLength()); SharedMutex::SharedLock lock(search_->nodes_mutex_); - PrefetchIntoCache(search_->root_node_, params_.GetMaxPrefetchBatch() - - computation_->GetCacheMisses()); + PrefetchIntoCache( + search_->root_node_, + params_.GetMaxPrefetchBatch() - computation_->GetCacheMisses(), false); } } // Prefetches up to @budget nodes into cache. Returns number of nodes // prefetched. -int SearchWorker::PrefetchIntoCache(Node* node, int budget) { +int SearchWorker::PrefetchIntoCache(Node* node, int budget, bool is_odd_depth) { + const float draw_score = search_->GetDrawScore(is_odd_depth); if (budget <= 0) return 0; // We are in a leaf, which is not yet being processed. @@ -1104,11 +1131,13 @@ int SearchWorker::PrefetchIntoCache(Node* node, int budget) { ComputeCpuct(params_, node->GetN(), node == search_->root_node_); const float puct_mult = cpuct * std::sqrt(std::max(node->GetChildrenVisits(), 1u)); - const float fpu = GetFpu(params_, node, node == search_->root_node_); + const float fpu = + GetFpu(params_, node, node == search_->root_node_, draw_score); for (auto edge : node->Edges()) { if (edge.GetP() == 0.0f) continue; // Flip the sign of a score to be able to easily sort. - scores.emplace_back(-edge.GetU(puct_mult) - edge.GetQ(fpu), edge); + scores.emplace_back(-edge.GetU(puct_mult) - edge.GetQ(fpu, draw_score), + edge); } size_t first_unsorted_index = 0; @@ -1138,7 +1167,7 @@ int SearchWorker::PrefetchIntoCache(Node* node, int budget) { if (i != scores.size() - 1) { // Sign of the score was flipped for sorting, so flip it back. const float next_score = -scores[i + 1].first; - const float q = edge.GetQ(-fpu); + const float q = edge.GetQ(-fpu, draw_score); if (next_score > q) { budget_to_spend = std::min(budget, int(edge.GetP() * puct_mult / (next_score - q) - @@ -1149,7 +1178,8 @@ int SearchWorker::PrefetchIntoCache(Node* node, int budget) { } } history_.Append(edge.GetMove()); - const int budget_spent = PrefetchIntoCache(edge.node(), budget_to_spend); + const int budget_spent = + PrefetchIntoCache(edge.node(), budget_to_spend, !is_odd_depth); history_.Pop(); budget -= budget_spent; total_budget_spent += budget_spent; @@ -1178,7 +1208,7 @@ void SearchWorker::FetchSingleNodeResult(NodeToProcess* node_to_process, if (!node_to_process->nn_queried) { // Terminal nodes don't involve the neural NetworkComputation, nor do // they require any further processing after value retrieval. - node_to_process->v = node->GetQ(); + node_to_process->v = node->GetWL(); node_to_process->d = node->GetD(); return; } @@ -1257,7 +1287,7 @@ void SearchWorker::DoBackupUpdateSingleNode( // Current node might have become terminal from some other descendant, so // backup the rest of the way with more accurate values. if (n->IsTerminal()) { - v = n->GetQ(); + v = n->GetWL(); d = n->GetD(); } n->FinalizeScoreUpdate(v / (1.0f + params_.GetShortSightedness() * depth), @@ -1273,9 +1303,9 @@ void SearchWorker::DoBackupUpdateSingleNode( auto all_losing = true; if (can_convert && v <= 0.0f) { for (const auto& edge : p->Edges()) { - const auto Q = edge.GetQ(0.0f); - can_convert = can_convert && edge.IsTerminal() && Q <= 0.0f; - all_losing = all_losing && Q < 0.0f; + const auto WL = edge.GetWL(); + can_convert = can_convert && edge.IsTerminal() && WL <= 0.0f; + all_losing = all_losing && WL < 0.0f; } } diff --git a/src/mcts/search.h b/src/mcts/search.h index 616f025480..7442b48fc7 100644 --- a/src/mcts/search.h +++ b/src/mcts/search.h @@ -101,8 +101,7 @@ class Search { EdgeAndNode GetBestChildNoTemperature(Node* parent) const; std::vector GetBestChildrenNoTemperature(Node* parent, int count) const; - EdgeAndNode GetBestChildWithTemperature(Node* parent, - float temperature) const; + EdgeAndNode GetBestRootChildWithTemperature(float temperature) const; int64_t GetTimeSinceStart() const; void MaybeTriggerStop(const IterationStats& stats, StoppersHints* hints); @@ -126,12 +125,17 @@ class Search { void PopulateCommonIterationStats(IterationStats* stats); // Returns verbose information about given node, as vector of strings. - std::vector GetVerboseStats(Node* node, - bool is_black_to_move) const; + // Node can only be root or ponder (depth 1). + std::vector GetVerboseStats(Node* node) const; // Returns NN eval for a given node from cache, if that node is cached. NNCacheLock GetCachedNNEval(Node* node) const; + // Returns the draw score at the root of the search. At odd depth pass true to + // the value of @is_odd_depth to change the sign of the draw score. + // Depth of a root node is 0 (even number). + float GetDrawScore(bool is_odd_depth) const; + mutable Mutex counters_mutex_ ACQUIRED_AFTER(nodes_mutex_); // Tells all threads to stop. std::atomic stop_{false}; @@ -280,7 +284,7 @@ class SearchWorker { NodeToProcess PickNodeToExtend(int collision_limit); void ExtendNode(Node* node); bool AddNodeToComputation(Node* node, bool add_if_cached); - int PrefetchIntoCache(Node* node, int budget); + int PrefetchIntoCache(Node* node, int budget, bool is_odd_depth); void FetchSingleNodeResult(NodeToProcess* node_to_process, int idx_in_computation); void DoBackupUpdateSingleNode(const NodeToProcess& node_to_process); diff --git a/src/selfplay/game.cc b/src/selfplay/game.cc index e23c985e3c..f67beaff87 100644 --- a/src/selfplay/game.cc +++ b/src/selfplay/game.cc @@ -139,11 +139,11 @@ void SelfPlayGame::Play(int white_threads, int black_threads, bool training, auto best_eval = search_->GetBestEval(); if (training) { // Append training data. The GameResult is later overwritten. - auto best_q = best_eval.first; + auto best_wl = best_eval.first; auto best_d = best_eval.second; training_data_.push_back(tree_[idx]->GetCurrentHead()->GetV4TrainingData( GameResult::UNDECIDED, tree_[idx]->GetPositionHistory(), - search_->GetParams().GetHistoryFill(), best_q, best_d)); + search_->GetParams().GetHistoryFill(), best_wl, best_d)); } float eval = best_eval.first; From 429c7c02e53aa23555f054af65e1265f8555787e Mon Sep 17 00:00:00 2001 From: Alexander Lyashuk Date: Sat, 22 Feb 2020 15:08:36 +0100 Subject: [PATCH 025/151] Rename DirectX 12 backend to dx12 (#1076) * Rename DirectX 12 backend to dx12 * Missing include.# * Attempt to fix namespaces. * include dx_common.h only once * also change appveyor name to dx12 Co-authored-by: borg323 <39573933+borg323@users.noreply.github.com> --- appveyor.yml | 4 ++-- src/neural/dx/dx_common.h | 3 +++ src/neural/dx/layers_dx.h | 16 ++++++++++------ src/neural/dx/network_dx.cc | 9 +++++---- src/neural/dx/network_dx.h | 18 +++++++++++------- 5 files changed, 31 insertions(+), 19 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index 3f1a976901..4bb90dfd34 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -6,7 +6,7 @@ image: environment: matrix: - NAME: gpu-nvidia-cuda - - NAME: gpu-dx + - NAME: gpu-dx12 - NAME: gpu-opencl - NAME: cpu-dnnl - NAME: cpu-openblas @@ -18,7 +18,7 @@ install: - cmd: set BLAS=false - cmd: set GTEST=false - cmd: IF %NAME%==gpu-nvidia-cuda set CUDA=true -- cmd: IF %NAME%==gpu-dx set DX=true +- cmd: IF %NAME%==gpu-dx12 set DX=true - cmd: IF %NAME%==gpu-opencl set OPENCL=true - cmd: IF %NAME%==cpu-dnnl set BLAS=true - cmd: IF %NAME%==cpu-openblas set BLAS=true diff --git a/src/neural/dx/dx_common.h b/src/neural/dx/dx_common.h index 5365f08eed..71b0466762 100644 --- a/src/neural/dx/dx_common.h +++ b/src/neural/dx/dx_common.h @@ -25,10 +25,13 @@ Program grant you additional permission to convey the resulting work. */ #pragma once + #include #include #include + #include + #include "d3dx12.h" #include "fp16_utils.h" diff --git a/src/neural/dx/layers_dx.h b/src/neural/dx/layers_dx.h index 1503f23d98..ac404cb9e8 100644 --- a/src/neural/dx/layers_dx.h +++ b/src/neural/dx/layers_dx.h @@ -24,16 +24,19 @@ terms of the respective license agreement, the licensors of this Program grant you additional permission to convey the resulting work. */ +#pragma once #include + +#include + #include "dx_common.h" #include "shader_wrapper.h" -#include namespace lczero { -class DxContext; - namespace dx_backend { + +class DxContext; constexpr int kMaxSupportedBatchSize = 256; // The Layer objects only hold memory for weights, biases, etc @@ -45,7 +48,8 @@ class BaseLayer { int GetH() const { return H; } int GetW() const { return W; } - BaseLayer(int c, int h, int w, BaseLayer* ip, DxContext* dx_context, bool fp16); + BaseLayer(int c, int h, int w, BaseLayer* ip, DxContext* dx_context, + bool fp16); virtual ~BaseLayer() = default; size_t GetOutputSize(int N) const { return (fp16_ ? sizeof(dx_half) : sizeof(float)) * N * C * H * W; @@ -122,7 +126,6 @@ class ConvMetaCommand { bool IsAvailable() { return create_succeeded_; } }; - class ConvLayer : public BaseLayer { using BaseLayer::C; using BaseLayer::GetC; @@ -203,10 +206,11 @@ class PolicyMapLayer : public BaseLayer { void Eval(int N, DXAlloc output, DXAlloc input, DXAlloc input2, DXAlloc scratch, DXAlloc scratch2, ID3D12GraphicsCommandList4* command_list) override; + private: const int used_size_; DXAlloc weights_; }; } // namespace dx_backend -} // namespace dx_backend +} // namespace lczero diff --git a/src/neural/dx/network_dx.cc b/src/neural/dx/network_dx.cc index 24aa3128fa..30a6108b2b 100644 --- a/src/neural/dx/network_dx.cc +++ b/src/neural/dx/network_dx.cc @@ -24,6 +24,8 @@ terms of the respective license agreement, the licensors of this Program grant you additional permission to convey the resulting work. */ +#include "network_dx.h" + #include #include #include @@ -33,15 +35,13 @@ #include #include "layers_dx.h" -#include "network_dx.h" #include "neural/shared/policy_map.h" #include "shader_wrapper.h" #include "utils/bititer.h" #include "utils/exception.h" namespace lczero { - -using namespace dx_backend; +namespace dx_backend { uint64_t DxContext::FlushCL(ID3D12GraphicsCommandList4* cl) { if (!cl) cl = command_list_; @@ -966,6 +966,7 @@ std::unique_ptr MakeDxNetwork(const WeightsFile& weights, return std::make_unique(weights, options); } -REGISTER_NETWORK("dx", MakeDxNetwork, 120) +REGISTER_NETWORK("dx12", MakeDxNetwork, 120) +} // namespace dx_backend } // namespace lczero diff --git a/src/neural/dx/network_dx.h b/src/neural/dx/network_dx.h index e2c47091c8..98e9bc8fde 100644 --- a/src/neural/dx/network_dx.h +++ b/src/neural/dx/network_dx.h @@ -27,6 +27,7 @@ #pragma once #include "dx_common.h" +#include "layers_dx.h" #include "neural/factory.h" #include "neural/network_legacy.h" @@ -34,14 +35,14 @@ // backend into some base class(es). namespace lczero { +namespace dx_backend { -using namespace dx_backend; class DxNetwork; static constexpr int kNumOutputPolicy = 1858; -// Padding needed because on some HW (e.g: NV) fp16 requires gemm matrix dimensions -// to be multiples of 8 +// Padding needed because on some HW (e.g: NV) fp16 requires gemm matrix +// dimensions to be multiples of 8 static constexpr int kNumOutputPolicyPadded8 = ((kNumOutputPolicy - 1) / 8 + 1) * 8; @@ -53,9 +54,11 @@ struct InputsOutputsDx { bool conv_policy, bool fp16); ~InputsOutputsDx(); - // Wanted to put these in default heap (video memory, mapped to support CPU writes too). + // Wanted to put these in default heap (video memory, mapped to support CPU + // writes too). // - but this isn't supported by DX12 API! - // So right now we have it in upload ueap (system memory mapped for both CPU and GPU). + // So right now we have it in upload ueap (system memory mapped for both CPU + // and GPU). DXAlloc input_masks_mem_gpu_; DXAlloc input_val_mem_gpu_; @@ -164,7 +167,7 @@ class DxContext { void CreateAlloc(size_t size, D3D12_HEAP_TYPE type, DXAlloc& alloc, bool fp16); void UavBarrier(ID3D12GraphicsCommandList4* cl = nullptr); - uint64_t FlushCL(ID3D12GraphicsCommandList4 *cl = nullptr); + uint64_t FlushCL(ID3D12GraphicsCommandList4* cl = nullptr); void WaitForGpu(uint64_t fence_val = 0); void ResetCL(ID3D12GraphicsCommandList4* cl = nullptr, ID3D12CommandAllocator* ca = nullptr, bool reset = true); @@ -235,4 +238,5 @@ class DxNetwork : public Network { std::list> free_inputs_outputs_; }; -}; // namespace lczero +} // namespace dx_backend +} // namespace lczero From 393839775722fd89322179aa54b18f30a86df732 Mon Sep 17 00:00:00 2001 From: Alexander Lyashuk Date: Sat, 22 Feb 2020 15:55:11 +0100 Subject: [PATCH 026/151] Get rid of libproto! (#1079) * Get rid of libproto! * Comments. * Changing meson script. * Remove includes which are not needed. * Add move constructor. * Fix. * When multiple copies of the message appear in protobuf, use the last, not the first. That's more consistent with libprotobuf (although it never happens), and it makes it easier to override the message by appending it. * PUtting #define NOMINMAX at arbirary places to see whether it helps. * Move NOMINMAX to meson.build * Better way to find script. * update appveyor.yml * Optimization (which will never happen anyway). * Introduce builders and use them to adjust older networks. * Ged rid of warnings. * Added some comments. * Fix build. Co-authored-by: borg323 <39573933+borg323@users.noreply.github.com> --- appveyor.yml | 8 +- meson.build | 40 ++- meson_options.txt | 15 -- scripts/compile_proto.py | 505 +++++++++++++++++++++++++++++++++++ src/mcts/params.cc | 2 + src/neural/loader.cc | 88 +++--- src/neural/network_legacy.cc | 1 + src/utils/protomessage.cc | 201 ++++++++++++++ src/utils/protomessage.h | 118 ++++++++ src/utils/weights_adapter.cc | 2 +- src/utils/weights_adapter.h | 3 +- 11 files changed, 892 insertions(+), 91 deletions(-) create mode 100755 scripts/compile_proto.py create mode 100644 src/utils/protomessage.cc create mode 100644 src/utils/protomessage.h diff --git a/appveyor.yml b/appveyor.yml index 4bb90dfd34..4d71ca584d 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -42,12 +42,6 @@ install: - cmd: pip3 install --upgrade meson==0.51.2 - cmd: call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat" amd64 - cmd: set PKG_FOLDER="C:\cache" -- cmd: IF NOT EXIST c:\cache\protobuf\ git clone -b v3.5.1 --single-branch --depth 1 https://github.com/google/protobuf.git -- cmd: IF NOT EXIST c:\cache\protobuf\ mkdir protobuf\build_msvc -- cmd: IF NOT EXIST c:\cache\protobuf\ cd protobuf\build_msvc -- cmd: IF NOT EXIST c:\cache\protobuf\ cmake -G "Visual Studio 15 2017 Win64" -Dprotobuf_BUILD_SHARED_LIBS=NO -Dprotobuf_BUILD_TESTS=OFF -DCMAKE_INSTALL_PREFIX=c:/cache/protobuf ../cmake -- cmd: IF NOT EXIST c:\cache\protobuf\ msbuild INSTALL.vcxproj /p:Configuration=Release /p:Platform=x64 /m -- cmd: set PATH=c:\cache\protobuf\bin;%PATH% - cmd: IF NOT EXIST c:\cache\testnet appveyor DownloadFile http://training.lczero.org/get_network?sha=7170f639ba1cdc407283b8e52377283e36845b954788c6ada8897937637ef032 -Filename c:\cache\testnet - cmd: IF %GTEST%==true IF NOT EXIST C:\cache\syzygy mkdir C:\cache\syzygy - cmd: IF %GTEST%==true cd C:\cache\syzygy @@ -64,7 +58,7 @@ before_build: - cmd: SET BUILD_BLAS=%BLAS% - cmd: IF %OPENCL%==true SET BUILD_BLAS=true - cmd: IF %DX%==true SET BUILD_BLAS=true -- cmd: meson build --backend vs2017 --buildtype release -Dgtest=%GTEST% -Dopencl=%OPENCL% -Dblas=%BUILD_BLAS% -Ddnnl=true -Deigen=true -Ddx=%DX% -Dcudnn=%CUDA% -Dispc_native_only=false -Dpopcnt=false -Dcudnn_include="%CUDA_PATH%\include","%PKG_FOLDER%\cuda\include" -Dcudnn_libdirs="%CUDA_PATH%\lib\x64","%PKG_FOLDER%\cuda\lib\x64" -Dprotobuf_include="%PKG_FOLDER%\protobuf\include" -Dprotobuf_libdir="%PKG_FOLDER%\protobuf\lib" -Dopenblas_include="%PKG_FOLDER%\OpenBLAS\dist64\include" -Dopenblas_libdirs="%PKG_FOLDER%\OpenBLAS\dist64\lib" -Ddnnl_dir="%PKG_FOLDER%\dnnl_win_1.1.1_cpu_vcomp" -Dopencl_include="%PKG_FOLDER%\opencl-nug.0.777.77\build\native\include" -Dopencl_libdirs="%PKG_FOLDER%\opencl-nug.0.777.77\build\native\lib\x64" -Ddefault_library=static +- cmd: meson build --backend vs2017 --buildtype release -Dgtest=%GTEST% -Dopencl=%OPENCL% -Dblas=%BUILD_BLAS% -Ddnnl=true -Deigen=true -Ddx=%DX% -Dcudnn=%CUDA% -Dispc_native_only=false -Dpopcnt=false -Dcudnn_include="%CUDA_PATH%\include","%PKG_FOLDER%\cuda\include" -Dcudnn_libdirs="%CUDA_PATH%\lib\x64","%PKG_FOLDER%\cuda\lib\x64" -Dopenblas_include="%PKG_FOLDER%\OpenBLAS\dist64\include" -Dopenblas_libdirs="%PKG_FOLDER%\OpenBLAS\dist64\lib" -Ddnnl_dir="%PKG_FOLDER%\dnnl_win_1.1.1_cpu_vcomp" -Dopencl_include="%PKG_FOLDER%\opencl-nug.0.777.77\build\native\include" -Dopencl_libdirs="%PKG_FOLDER%\opencl-nug.0.777.77\build\native\lib\x64" -Ddefault_library=static build_script: - cmd: SET PGO=false - cmd: IF %APPVEYOR_REPO_TAG%==true IF %DX%==false SET PGO=true diff --git a/meson.build b/meson.build index 3a258c6259..81e6f76532 100644 --- a/meson.build +++ b/meson.build @@ -28,6 +28,9 @@ if cc.get_id() == 'clang' or cc.get_id() == 'gcc' add_project_arguments('-march=native', language : 'cpp') endif endif +if host_machine.system() == 'windows' + add_project_arguments('-DNOMINMAX', language : 'cpp') +endif # Files to compile. deps = [] @@ -38,28 +41,13 @@ has_backends = false # Third party files. includes += include_directories('third_party', is_system: true) -# Both protobuf and protoc must be the same version, so couple them together. -protobuf_lib = cc.find_library('libprotobuf', dirs : get_option('protobuf_libdir'), required : false) -if not protobuf_lib.found() - protobuf_dep = dependency('protobuf', required : false) -else - protobuf_dep = protobuf_lib -endif -protoc = find_program('protoc', required : false) -# For tensorflow skip system protobuf, chances are it will not work. -if get_option('protobuf-3-6-0') - protobuf_dep = subproject('protobuf-3.6.0').get_variable('protobuf_dep') - protoc = subproject('protobuf-3.6.0').get_variable('protoc') -elif not protobuf_dep.found() or not protoc.found() or get_option('tensorflow') - protobuf_dep = subproject('protobuf').get_variable('protobuf_dep') - protoc = subproject('protobuf').get_variable('protoc') -elif protobuf_lib.found() - includes += include_directories(get_option('protobuf_include')) -endif -deps += protobuf_dep - -gen = generator(protoc, output: ['@BASENAME@.pb.cc', '@BASENAME@.pb.h'], - arguments : ['--proto_path=@CURRENT_SOURCE_DIR@/libs/lczero-common', '--cpp_out=@BUILD_DIR@', '@INPUT@']) +# Compiling protobufs. +compile_proto = find_program('scripts/compile_proto.py') +gen = generator(compile_proto, output: ['@BASENAME@.pb.h'], + arguments : [ + '--proto_path=@CURRENT_SOURCE_DIR@/libs/lczero-common', + '--cpp_out=@BUILD_DIR@', + '@INPUT@']) # Handle submodules. git = find_program('git', required: false) @@ -79,8 +67,10 @@ if run_command('checkdir.py', 'libs/lczero-common/proto').returncode() != 0 endif endif -pb_files = gen.process('libs/lczero-common/proto/net.proto', - preserve_path_from : meson.current_source_dir() + '/libs/lczero-common/') +pb_files = [ + 'src/utils/protomessage.cc', + gen.process('libs/lczero-common/proto/net.proto') +] files += pb_files # Extract git short revision. @@ -546,7 +536,7 @@ if get_option('gtest') test('EncodePositionForNN', executable('encoder_test', 'src/neural/encoder_test.cc', pb_files, include_directories: includes, link_with: lc0_lib, - dependencies: [gtest, protobuf_dep] + dependencies: [gtest] ), args: '--gtest_output=xml:encoder.xml', timeout: 90) endif diff --git a/meson_options.txt b/meson_options.txt index ccd95d7939..9ab6ac514d 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -3,11 +3,6 @@ option('tensorflow_include', value: ['/usr/local/include/tensorflow/'], description: 'Paths to tensorflow include directories') -option('protobuf_include', - type: 'array', - value: ['/usr/local/include/'], - description: 'Paths to protobuf include directories') - option('openblas_include', type: 'array', value: ['/usr/include/openblas/'], @@ -23,11 +18,6 @@ option('tensorflow_libdir', value: ['/usr/local/lib/tensorflow_cc/'], description: 'Paths to tensorflow libraries') -option('protobuf_libdir', - type: 'array', - value: ['/usr/lib/x86_64-linux-gnu/'], - description: 'Paths to protobuf libraries') - option('openblas_libdirs', type: 'array', value: ['/usr/lib/'], @@ -142,8 +132,3 @@ option('gtest', type: 'boolean', value: true, description: 'Build gtest tests') - -option('protobuf-3-6-0', - type: 'boolean', - value: false, - description: 'Use the protobuf 3.6.0 subproject') diff --git a/scripts/compile_proto.py b/scripts/compile_proto.py new file mode 100755 index 0000000000..0c40176b75 --- /dev/null +++ b/scripts/compile_proto.py @@ -0,0 +1,505 @@ +#!/usr/bin/env python3 + +import argparse +import os +import re +import sys + +VARINT_TYPES = { + 'int32': 'std::int32_t', + 'int64': 'std::int64_t', + 'uint32': 'std::uint32_t', + 'uint64': 'std::uint64_t', + 'sint32': 'std::uint32_t', + 'sint64': 'std::uint64_t', + 'bool': 'bool', +} + +FIXED64_TYPES = { + 'fixed64': 'std::uint64_t', + 'sfixed64': 'std::int64_t', + 'double': 'double', +} +FIXED32_TYPES = { + 'fixed32': 'std::uint32_t', + 'sfixed32': 'std::int32_t', + 'float': 'float', +} +BYTES_TYPES = { + 'string': 'std::string_view', + 'bytes': 'std::string_view', +} +ZIGZAG_TYPES = set(['sint32', 'sint64']) + +TYPES = {**VARINT_TYPES, **FIXED32_TYPES, **FIXED64_TYPES, **BYTES_TYPES} + +RESERVED_WORDS = [ + 'syntax', + 'package', + 'message', + 'optional', + 'repeated', + 'enum', +] + list(TYPES.keys()) + +GRAMMAR = ([(x, x) + for x in RESERVED_WORDS] + [('\\' + x, x) for x in '=;{}.'] + [ + (r'/\*.*?\*/', None), # /* Comment */ + (r'//.*?$', None), # // Comment + (r'\s+', None), # Whitespace + (r'$', 'EOF'), + (r'"((?:[^"\\]|\\.)*)"', 'string'), + (r'\d+', 'number'), + (r'\w+', 'identifier'), + ]) + + +class Lexer: + def __init__(self, text): + self.text = text + self.grammar = [(re.compile(x, re.S + re.M), y) for x, y in GRAMMAR] + self.cur_token = None + self.cur_offset = 0 + + def Pick(self): + '''Picks the last token in queue. Doesn't advance the queue.''' + if self.cur_token is None: + self.cur_token = self.NextToken() + return self.cur_token + + def Consume(self, expected_token, value=None, group=0): + '''Gets the token from the queue and advances the queue. + + If @expected_token if of wrong type, or @value is not equal to regexes + @group, throws an error. + ''' + token, match = self.Pick() + if expected_token != token: + self.Error('Expected token type [%s]' % expected_token) + if value is not None and value != match.group(group): + self.Error('Expected value [%s]' % value) + self.cur_offset = match.span()[1] + self.cur_token = None + return match + + def NextToken(self): + '''Reads the stream and returns the next token. + + (which is not whitespace or comment) + ''' + while True: + token, match = self.NextTokenOrWhitespace() + if token is None: + self.cur_offset = match.span()[1] + else: + return token, match + + def NextTokenOrWhitespace(self): + '''Reads the stream and returns the next token (possibly whitespace).''' + for r, token in self.grammar: + m = r.match(self.text, self.cur_offset) + if m: + return (token, m) + self.Error('Unexpected token') + + def Error(self, text): + '''Throws an error with context in the file read.''' + line_start = self.text.rfind('\n', 0, self.cur_offset) + 1 + line_end = self.text.find('\n', line_start) + sys.stderr.write('%s:\n' % text) + sys.stderr.write(self.text[line_start:line_end] + '\n') + sys.stderr.write(' ' * (self.cur_offset - line_start) + '^^^\n') + raise ValueError("Parse error: %s at offset %d." % + (text, self.cur_offset)) + + +def ReadIdentifierPath(lexer): + '''Reads qualified identifier a.b.d into ['a', 'b', 'd'] list''' + path = [] + while True: + path.append(lexer.Consume('identifier').group(0)) + if lexer.Pick()[0] != '.': + return path + lexer.Consume('.') + + +def LookupType(name, stack): + '''Looks up the (possibly qualified) from the innermost scope first.''' + for y in stack: + for x in y: + if not x.IsType(): + continue + if x.GetName() == name[0]: + if len(name) == 1: + return x.GetType() + else: + return LookupType(name[1:], [x.GetObjects()]) + raise ValueError("Cannot find type: %s." % '.'.join(name)) + + +# All *Parser classes have the following semantics: +# * They are called with lexer as input to parse grammar from .proto file. +# * The Generate() function writes relevant portion of .pb.h file. + + +class ProtoTypeParser: + def __init__(self, lexer, object_stack): + token, match = lexer.Pick() + if token in TYPES: + self.typetype = 'basic' + self.name = token + lexer.Consume(token) + elif token == 'identifier': + self.name = ReadIdentifierPath(lexer) + self.typetype = LookupType(self.name, object_stack) + else: + lexer.Error('Type expected') + + def IsZigzag(self): + if self.typetype == 'basic': + return self.name in ZIGZAG_TYPES + return False + + def GetCppType(self): + if self.typetype == 'basic': + return TYPES[self.name] + else: + return '::'.join(self.name) + + def GetWireType(self): + if self.typetype == 'basic': + if self.name in VARINT_TYPES: + return 0 + if self.name in FIXED64_TYPES: + return 1 + if self.name in BYTES_TYPES: + return 2 + if self.name in FIXED32_TYPES: + return 5 + raise ValueError('Unknown type %s' % self.name) + elif self.typetype == 'enum': + return 0 + elif self.typetype == 'message': + return 2 + else: + raise ValueError('Unknown typetype %s' % self.typetype) + + def DecodeFunction(self, wire_id, index): + if self.typetype == 'basic': + if self.name == 'double': + return 'GetDoubleVal(%d, %s)' % (wire_id, index) + if self.name == 'float': + return 'GetFloatVal(%d, %s)' % (wire_id, index) + if self.name in VARINT_TYPES: + return 'GetVarintVal(%d, %s)' % (wire_id, index) + if self.name in FIXED64_TYPES: + return 'GetFixed64Val(%d, %s)' % (wire_id, index) + if self.name in BYTES_TYPES: + return 'GetBytesVal(%d, %s)' % (wire_id, index) + if self.name in FIXED32_TYPES: + return 'GetFixed32Val(%d, %s)' % (wire_id, index) + raise ValueError('Unknown type %s' % self.name) + elif self.typetype == 'enum': + return 'GetVarintVal(%d, %s)' % (wire_id, index) + elif self.typetype == 'message': + return '%s::CreateNotOwned(GetBytesVal(%d, %s))' % ( + self.GetCppType(), wire_id, index) + else: + raise ValueError('Unknown typetype %s' % self.typetype) + + def IsIntegralType(self): + if self.typetype == 'basic': + if self.name == 'double': + return False + if self.name == 'float': + return False + if self.name in BYTES_TYPES: + return False + if self.name in TYPES: + return True + raise ValueError('Unknown type %s' % self.name) + elif self.typetype == 'enum': + return True + elif self.typetype == 'message': + return False + else: + raise ValueError('Unknown typetype %s' % self.typetype) + + +class ProtoFieldParser: + def __init__(self, lexer, object_stack): + token, match = lexer.Pick() + if token not in ['repeated', 'optional', 'required']: + lexer.Error('repeated, optional or required expected') + self.category = token + lexer.Consume(token) + self.type = ProtoTypeParser(lexer, object_stack) + self.name = lexer.Consume('identifier') + lexer.Consume('=') + self.number = int(lexer.Consume('number').group(0)) + lexer.Consume(';') + + def IsType(self): + return False + + def Generate(self, w): + name = self.name.group(0) + index = 'i' if self.category == 'repeated' else 'kLast' + wire_id = self.number * 8 + self.type.GetWireType() + func_body = self.type.DecodeFunction(wire_id, index) + cpp_type = self.type.GetCppType() + if self.type.IsZigzag(): + func_body = 'UnZigZag(%s)' % func_body + if self.type.IsIntegralType(): + func_body = 'lczero::kind_of_bit_cast<%s>(%s)' % ( + self.type.GetCppType(), func_body) + + if self.category == 'repeated': + w.Write('size_t %s_size() const { return WireFieldCount(%d); }' % ( + name, + wire_id, + )) + w.Write('%s %s(size_t i) const { return %s; }' % ( + cpp_type, + name, + func_body, + )) + w.Write('lczero::ProtoIterator<%s> %s() const {' % + (cpp_type, name)) + w.Write(' return lczero::ProtoIterator<%s>(%s_size(), ' + '[this](size_t i) {' % (cpp_type, name)) + w.Write(' return %s;' % func_body) + w.Write(' });') + w.Write('}') + + else: + w.Write('bool has_%s() const { return WireFieldCount(%d) > 0; }' % + ( + name, + wire_id, + )) + w.Write('%s %s() const { return %s; }' % ( + cpp_type, + name, + func_body, + )) + + def GenerateForBuilder(self, w): + name = self.name.group(0) + repeated = self.category == 'repeated' + wire_id = self.number * 8 + self.type.GetWireType() + # w.Write('void clear_%s() { WireFieldClear(%d); }' % (name, wire_id)) + if repeated: + pass + else: + if self.type.typetype == 'enum': + w.Write('void set_%s(%s val) { WireFieldSetVarint' + '(%d, static_cast(val)); }' % + (name, self.type.GetCppType(), wire_id)) + if self.type.typetype == 'message': + w.Write('void set_%s(const %s& val) { WireFieldSetMessage' + '(%d, val); }' % + (name, self.type.GetCppType(), wire_id)) + + +class ProtoEnumParser: + def __init__(self, lexer): + lexer.Consume('enum') + self.name = lexer.Consume('identifier').group(0) + self.values = [] + lexer.Consume('{') + while True: + token, match = lexer.Pick() + if token == '}': + break + key = lexer.Consume('identifier').group(0) + lexer.Consume('=') + value = int(lexer.Consume('number').group(0)) + lexer.Consume(';') + self.values.append((key, value)) + lexer.Consume('}') + + def GetName(self): + return self.name + + def GetType(self): + return 'enum' + + def IsType(self): + return True + + def Generate(self, w): + # Protobuf enum is mapped directly to C++ enum. + w.Write('enum %s {' % self.name) + w.Indent() + for key, value in self.values: + w.Write('%s = %d,' % (key, value)) + w.Unindent() + w.Write('};') + + +class ProtoMessageParser: + def __init__(self, lexer, object_stack): + self.objects = [] + lexer.Consume('message') + self.name = lexer.Consume('identifier').group(0) + lexer.Consume('{') + while True: + token, match = lexer.Pick() + if token == '}': + break + elif token == 'message': + self.objects.append( + ProtoMessageParser(lexer, [self.objects, *object_stack])) + elif token == 'enum': + self.objects.append(ProtoEnumParser(lexer)) + elif token in ['repeated', 'optional', 'required']: + self.objects.append( + ProtoFieldParser(lexer, [self.objects, *object_stack])) + else: + lexer.Error('Expected field or type') + lexer.Consume('}') + + def GetName(self): + return self.name + + def GetType(self): + return 'message' + + def IsType(self): + return True + + def GetObjects(self): + return self.objects + + def GenerateBuilderClass(self, w): + w.Write('class Builder : public lczero::ProtoMessage::Builder {') + w.Write(' public:') + w.Indent() + w.Write( + 'Builder(const %s& msg) : lczero::ProtoMessage::Builder(msg) {}' % + self.name) + w.Write('%s Build() const { return %s(*this); }' % + (self.name, self.name)) + for x in self.objects: + if not x.IsType(): + x.GenerateForBuilder(w) + w.Unindent() + w.Write('};') + + def Generate(self, w): + # Protobuf message is a C++ class. + w.Write('class %s : public lczero::ProtoMessage {' % self.name) + w.Write(' public:') + w.Indent() + # Set of standard constructors. + w.Write('%s() = default;' % (self.name)) + w.Write('%s(const %s&) = default;' % (self.name, self.name)) + w.Write('%s(%s&&) = default;' % (self.name, self.name)) + w.Write('%s& operator=(const %s&) = default;' % (self.name, self.name)) + w.Write('%s& operator=(%s&&) = default;' % (self.name, self.name)) + w.Write( + 'static %s CreateNotOwned(std::string_view s) { return %s(s); }' % + (self.name, self.name)) + # Writing fields, submessages and enums. + for x in self.objects: + x.Generate(w) + self.GenerateBuilderClass(w) + # Set of functions to bind builder with parser classes. + w.Write('Builder AsBuilder() const {') + w.Write(' return Builder(*this);') + w.Write('}') + w.Unindent() + w.Write(' private:') + w.Indent() + w.Write('%s(std::string_view str) : lczero::ProtoMessage(str) {}' % + (self.name)) + w.Write( + '%s(const Builder& builder) : lczero::ProtoMessage(builder) {}' % + (self.name)) + w.Unindent() + w.Write('};') + + +class ProtoFileParser: + '''Root grammar of .proto file''' + def __init__(self, lexer): + self.package = None + self.objects = [] + while True: + token, match = lexer.Pick() + if token == 'EOF': + return + elif token == 'syntax': + self.ParseSyntax(lexer) + elif token == 'package': + self.ParsePackage(lexer) + elif token == 'message': + self.ParseMessage(lexer) + else: + lexer.Error('Expected message or something similar') + + def ParseSyntax(self, lexer): + lexer.Consume('syntax') + lexer.Consume('=') + lexer.Consume('string', 'proto2', 1) + lexer.Consume(';') + + def ParsePackage(self, lexer): + lexer.Consume('package') + if self.package is not None: + lexer.Error('Package was already defined') + self.package = ReadIdentifierPath(lexer) + lexer.Consume(';') + + def ParseMessage(self, lexer): + self.objects.append(ProtoMessageParser(lexer, [self.objects])) + + def Generate(self, w): + w.Write('// This file is AUTOGENERATED, do not edit.') + w.Write('#pragma once') + w.Write('#include "utils/protomessage.h"') + for x in self.package: + w.Write('namespace %s {' % x) + w.Indent() + for object in self.objects: + object.Generate(w) + w.Unindent() + for x in reversed(self.package): + w.Write('} // namespace %s' % x) + + +class Writer: + '''A helper class for writing file line by line with indent.''' + def __init__(self, fo): + self.fo = fo + self.indent = 0 + + def Indent(self): + self.indent += 2 + + def Unindent(self): + self.indent -= 2 + + def Write(self, text): + self.fo.write(' ' * self.indent + text + '\n') + + +if __name__ == "__main__": + # Have the same flags as protoc has. + parser = argparse.ArgumentParser(description="Compile protobuf files.") + parser.add_argument('input', type=str) + parser.add_argument('--proto_path', type=str) + parser.add_argument('--cpp_out', type=str) + args = parser.parse_args() + + rel_path = os.path.relpath(args.input, args.proto_path) + dest_name = os.path.splitext(rel_path)[0] + '.pb.h' + dest_path = os.path.join(args.cpp_out, dest_name) + dest_dir = os.path.dirname(dest_path) + os.makedirs(dest_dir, exist_ok=True) + + with open(args.input, 'r') as input, open(dest_path, 'w') as output: + proto_file = ProtoFileParser(Lexer(input.read())) + writer = Writer(output) + proto_file.Generate(writer) diff --git a/src/mcts/params.cc b/src/mcts/params.cc index 09344e867a..0705e8d283 100644 --- a/src/mcts/params.cc +++ b/src/mcts/params.cc @@ -27,6 +27,8 @@ #include "mcts/params.h" +#include + #include "utils/exception.h" namespace lczero { diff --git a/src/neural/loader.cc b/src/neural/loader.cc index a41ab46e5a..9e375c8802 100644 --- a/src/neural/loader.cc +++ b/src/neural/loader.cc @@ -27,8 +27,6 @@ #include "neural/loader.h" -#include -#include #include #include @@ -80,23 +78,55 @@ std::string DecompressGzip(const std::string& filename) { return buffer; } -WeightsFile ParseWeightsProto(const std::string& buffer) { - WeightsFile net; - using namespace google::protobuf::io; +void FixOlderWeightsFile(WeightsFile* file) { using nf = pblczero::NetworkFormat; + auto network_format = file->format().network_format().network(); + if (file->format().has_network_format() && + network_format != nf::NETWORK_CLASSICAL && + network_format != nf::NETWORK_SE) { + // Already in a new format, return unchanged. + return; + } - ArrayInputStream raw_input_stream(buffer.data(), buffer.size()); - CodedInputStream input_stream(&raw_input_stream); - // Set protobuf limit to 2GB. - // Remove the second parameter when everyone uses newer protobufs. - // Until then, let everyone who uses new libprotobuf observe warnings. :sigh: - input_stream.SetTotalBytesLimit(2000 * 1000000, 500 * 1000000); + WeightsFile::Builder builder(*file); + + auto format = file->format().AsBuilder(); + auto net_builder = file->format().network_format().AsBuilder(); + + if (!file->format().has_network_format()) { + // Older protobufs don't have format definition. + net_builder.set_input(nf::INPUT_CLASSICAL_112_PLANE); + net_builder.set_output(nf::OUTPUT_CLASSICAL); + net_builder.set_network(nf::NETWORK_CLASSICAL_WITH_HEADFORMAT); + net_builder.set_value(nf::VALUE_CLASSICAL); + net_builder.set_policy(nf::POLICY_CLASSICAL); + } else if (network_format == pblczero::NetworkFormat::NETWORK_CLASSICAL) { + // Populate policyFormat and valueFormat fields in old protobufs + // without these fields. + net_builder.set_network(nf::NETWORK_CLASSICAL_WITH_HEADFORMAT); + net_builder.set_value(nf::VALUE_CLASSICAL); + net_builder.set_policy(nf::POLICY_CLASSICAL); + } else if (network_format == pblczero::NetworkFormat::NETWORK_SE) { + net_builder.set_network(nf::NETWORK_SE_WITH_HEADFORMAT); + net_builder.set_value(nf::VALUE_CLASSICAL); + net_builder.set_policy(nf::POLICY_CLASSICAL); + } - if (!net.ParseFromCodedStream(&input_stream)) - throw Exception("Invalid weight file: parse error."); + // It's only possible to replace the particular field completely. + // So first replace network_format in format. + format.set_network_format(net_builder.Build()); + // Then replace format in WeightsFile. + builder.set_format(format.Build()); + *file = builder.Build(); +} - if (net.magic() != kWeightMagic) +WeightsFile ParseWeightsProto(const std::string& buffer) { + WeightsFile net; + net.ParseFromString(buffer); + + if (net.magic() != kWeightMagic) { throw Exception("Invalid weight file: bad header."); + } const auto min_version = GetVersionStr(net.min_version().major(), net.min_version().minor(), @@ -106,6 +136,8 @@ WeightsFile ParseWeightsProto(const std::string& buffer) { GetVersionInt(net.min_version().major(), net.min_version().minor(), net.min_version().patch()); + FixOlderWeightsFile(&net); + // Weights files with this signature are also compatible. if (net_ver != 0x5c99973 && net_ver > lc0_ver) throw Exception("Invalid weight file: lc0 version >= " + min_version + @@ -114,34 +146,6 @@ WeightsFile ParseWeightsProto(const std::string& buffer) { if (net.format().weights_encoding() != pblczero::Format::LINEAR16) throw Exception("Invalid weight file: unsupported encoding."); - // Older protobufs don't have format definition. - // Populate format fields with legacy (or "classical") formats. - if (!net.format().has_network_format()) { - auto net_format = net.mutable_format()->mutable_network_format(); - net_format->set_input(nf::INPUT_CLASSICAL_112_PLANE); - net_format->set_output(nf::OUTPUT_CLASSICAL); - net_format->set_network(nf::NETWORK_CLASSICAL); - } - - // Populate policyFormat and valueFormat fields in old protobufs - // without these fields. - if (net.format().network_format().network() == - pblczero::NetworkFormat::NETWORK_CLASSICAL) { - auto net_format = net.mutable_format()->mutable_network_format(); - - net_format->set_network(nf::NETWORK_CLASSICAL_WITH_HEADFORMAT); - net_format->set_value(nf::VALUE_CLASSICAL); - net_format->set_policy(nf::POLICY_CLASSICAL); - - } else if (net.format().network_format().network() == - pblczero::NetworkFormat::NETWORK_SE) { - auto net_format = net.mutable_format()->mutable_network_format(); - - net_format->set_network(nf::NETWORK_SE_WITH_HEADFORMAT); - net_format->set_value(nf::VALUE_CLASSICAL); - net_format->set_policy(nf::POLICY_CLASSICAL); - } - return net; } diff --git a/src/neural/network_legacy.cc b/src/neural/network_legacy.cc index c091786728..f34733b9c7 100644 --- a/src/neural/network_legacy.cc +++ b/src/neural/network_legacy.cc @@ -20,6 +20,7 @@ #include #include + #include "utils/weights_adapter.h" namespace lczero { diff --git a/src/utils/protomessage.cc b/src/utils/protomessage.cc new file mode 100644 index 0000000000..06ce24cf72 --- /dev/null +++ b/src/utils/protomessage.cc @@ -0,0 +1,201 @@ +#include "utils/protomessage.h" + +#include "utils/exception.h" + +namespace lczero { +namespace { + +uint64_t ReadVarInt(const char** iter, const char* const end) { + uint64_t res = 0; + uint64_t multiplier = 1; + while (*iter < end) { + unsigned char x = **iter; + ++*iter; + res += (x & 0x7f) * multiplier; + if ((x & 0x80) == 0) return res; + multiplier *= 0x80; + } + throw Exception("The file seems truncated."); +} + +std::string EncodeVarInt(std::uint64_t val) { + std::string res; + while (true) { + char c = (val & 0x7f); + val >>= 7; + if (val) c |= 0x80; + res += c; + if (!val) return res; + } +} + +} // namespace + +void ProtoMessage::ParseFromString(const std::string& str) { + // Making the buffer "owned", e.g. copy the string contents. + buffer_ = str; + data_ = buffer_; + RebuildOffsets(); +} + +ProtoMessage::ProtoMessage(std::string_view serialized_proto) + : data_(serialized_proto) { + // Not owned. + RebuildOffsets(); +} + +void ProtoMessage::RebuildOffsets() { + // Builds offsets, e.g. mapping from wire file id to list of field offsets in + // data_. + offsets_.clear(); + const char* const begin = data_.data(); + const char* iter = data_.data(); + const char* const end = data_.data() + data_.size(); + while (iter < end) { + uint64_t field_id = ReadVarInt(&iter, end); + auto offset = iter; + switch (field_id & 0x7) { + case 0: + // Varint field, so read one more varint. + ReadVarInt(&iter, end); + break; + case 1: + // Fixed64, skip 8 bytes. + iter += 8; + break; + case 2: { + // String/submessage. Varint length and then buffer of that length. + size_t size = ReadVarInt(&iter, end); + iter += size; + break; + } + case 5: + // Fixed32, skip 4 bytes. + iter += 4; + break; + default: + throw Exception("The file seems to be unparseable."); + } + offsets_[field_id].push_back({static_cast(offset - begin), + static_cast(iter - offset)}); + } + if (iter != end) { + throw Exception("The file is truncated."); + } +} + +void ProtoMessage::operator=(ProtoMessage&& other) { + buffer_ = std::move(other.buffer_); + offsets_ = std::move(other.offsets_); + if (!buffer_.empty()) { + // If owned, make data_ point to a new buffer_ (the underlying data was + // probably moved though, so probably data_ == other.data_. + data_ = buffer_; + } else { + // Not owned, copy buffer. + data_ = std::move(other.data_); + } +} + +ProtoMessage::ProtoMessage(ProtoMessage&& other) { + operator=(std::move(other)); +} + +size_t ProtoMessage::WireFieldCount(int wire_field_id) const { + auto iter = offsets_.find(wire_field_id); + if (iter == offsets_.end()) return 0; + return iter->second.size(); +} + +const char* ProtoMessage::GetFieldPtr(int wire_field_id, size_t index) const { + auto iter = offsets_.find(wire_field_id); + if (iter == offsets_.end()) return nullptr; + if (index == kLast) return data_.data() + iter->second.back().offset; + return data_.data() + iter->second.at(index).offset; +} + +std::uint64_t ProtoMessage::GetVarintVal(int wire_field_id, + size_t index) const { + auto x = GetFieldPtr(wire_field_id, index); + if (x == nullptr) return 0; + return ReadVarInt(&x, data_.data() + data_.size()); +} + +float ProtoMessage::GetFloatVal(int wire_field_id, size_t index) const { + auto x = GetFieldPtr(wire_field_id, index); + if (x == nullptr) return 0.0f; + float res; + std::memcpy(&res, x, sizeof(res)); + return res; +} +double ProtoMessage::GetDoubleVal(int wire_field_id, size_t index) const { + auto x = GetFieldPtr(wire_field_id, index); + if (x == nullptr) return 0.0; + double res; + std::memcpy(&res, x, sizeof(res)); + return res; +} +std::uint32_t ProtoMessage::GetFixed32Val(int wire_field_id, + size_t index) const { + // WARNING: Doesn't support big-endian. + auto x = GetFieldPtr(wire_field_id, index); + if (x == nullptr) return 0; + std::uint32_t res; + std::memcpy(&res, x, sizeof(res)); + return res; +} +std::uint64_t ProtoMessage::GetFixed64Val(int wire_field_id, + size_t index) const { + // WARNING: Doesn't support big-endian. + auto x = GetFieldPtr(wire_field_id, index); + if (x == nullptr) return 0; + std::uint64_t res; + std::memcpy(&res, x, sizeof(res)); + return res; +} +std::string_view ProtoMessage::GetBytesVal(int wire_field_id, + size_t index) const { + auto x = GetFieldPtr(wire_field_id, index); + if (x == nullptr) return {}; + size_t size = ReadVarInt(&x, data_.data() + data_.size()); + return std::string_view(x, size); +} + +ProtoMessage::Builder::Builder(const ProtoMessage& msg) { + for (const auto& iter : msg.offsets_) { + auto& bucket = fields_[iter.first]; + for (const auto& entry : iter.second) { + bucket.emplace_back(msg.data_.data() + entry.offset, entry.size); + } + } +} + +void ProtoMessage::Builder::WireFieldSetVarint(int wire_field_id, + std::uint64_t value) { + fields_[wire_field_id] = {EncodeVarInt(value)}; +} + +ProtoMessage::ProtoMessage(const ProtoMessage::Builder& builder) { + buffer_ = builder.AsString(); + data_ = buffer_; + RebuildOffsets(); +} + +std::string ProtoMessage::Builder::AsString() const { + std::string res; + for (const auto& iter : fields_) { + for (const auto& entry : iter.second) { + res += EncodeVarInt(iter.first); + res += entry; + } + } + return res; +} + +void ProtoMessage::Builder::WireFieldSetMessage(int wire_field_id, + const ProtoMessage& msg) { + fields_[wire_field_id] = {EncodeVarInt(msg.data_.size()) + + std::string(msg.data_)}; +} + +} // namespace lczero \ No newline at end of file diff --git a/src/utils/protomessage.h b/src/utils/protomessage.h new file mode 100644 index 0000000000..62135bd85e --- /dev/null +++ b/src/utils/protomessage.h @@ -0,0 +1,118 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +// Undef g++ macros to ged rid of warnings. +#ifdef minor +#undef minor +#endif +#ifdef major +#undef major +#endif + +namespace lczero { + +// Kind of bit_cast from C++20, but can convert from uint64_t to smaller types. +template +T kind_of_bit_cast(std::uint64_t from) { + T to; + std::memcpy(&to, &from, sizeof(to)); + return to; +} + +// Iterator for repeated proto fields. +template +class ProtoIterator { + public: + class Iter { + public: + Iter(std::function func, size_t idx) + : accessor_(func), idx_(idx) {} + void operator++() { ++idx_; } + T operator*() { return accessor_(idx_); } + bool operator!=(const Iter& other) { return idx_ != other.idx_; } + + private: + const std::function accessor_; + size_t idx_; + }; + + ProtoIterator(size_t count, std::function accessor) + : count_(count), accessor_(accessor) {} + + Iter begin() const { return Iter(accessor_, 0); } + Iter end() const { return Iter(accessor_, count_); } + + private: + const size_t count_; + const std::function accessor_; +}; + +class ProtoMessage { + public: + void ParseFromString(const std::string&); + + class Builder { + public: + Builder(); + std::string AsString() const; + + protected: + void operator=(const ProtoMessage& msg); + Builder(const ProtoMessage&); + void WireFieldClear(int wire_field_id); + void WireFieldSetVarint(int wire_field_id, std::uint64_t varint); + void WireFieldSetMessage(int wire_field_id, const ProtoMessage& val); + + private: + using Bits = std::vector; + using Fields = std::map; + Fields fields_; + }; + + protected: + ProtoMessage() = default; + ProtoMessage(const ProtoMessage& other); + ProtoMessage(ProtoMessage&& other); + void operator=(const ProtoMessage& other); + void operator=(ProtoMessage&& other); + ProtoMessage(const Builder&); + void operator=(const Builder&); + static constexpr size_t kLast = std::numeric_limits::max(); + + ProtoMessage(std::string_view serialized_proto); + size_t WireFieldCount(int wire_field_id) const; + std::uint64_t GetVarintVal(int wire_field_id, size_t index) const; + float GetFloatVal(int wire_field_id, size_t index) const; + double GetDoubleVal(int wire_field_id, size_t index) const; + std::uint32_t GetFixed32Val(int wire_field_id, size_t index) const; + std::uint64_t GetFixed64Val(int wire_field_id, size_t index) const; + std::string_view GetBytesVal(int wire_field_id, size_t index) const; + + private: + void RebuildOffsets(); + const char* GetFieldPtr(int wire_field_id, size_t index) const; + + struct FieldPos { + size_t offset; + size_t size; + }; + using Offsets = std::vector; + using FieldOffsets = std::map; + + // Map from wire field_id to list of offsets and sizes inside data_. + FieldOffsets offsets_; + // When the class owns the proto, buffer_ contains it. + std::string buffer_; + // String slice of the proto. If owned, points to buffer_. If not owned, + // points to some external location. + std::string_view data_; +}; + +} // namespace lczero \ No newline at end of file diff --git a/src/utils/weights_adapter.cc b/src/utils/weights_adapter.cc index ea54fa381a..fdd5a52c60 100644 --- a/src/utils/weights_adapter.cc +++ b/src/utils/weights_adapter.cc @@ -33,7 +33,7 @@ float LayerAdapter::Iterator::ExtractValue(const uint16_t* ptr, return *ptr / static_cast(0xffff) * adapter->range_ + adapter->min_; } -LayerAdapter::LayerAdapter(const pblczero::Weights_Layer& layer) +LayerAdapter::LayerAdapter(const pblczero::Weights::Layer& layer) : data_(reinterpret_cast(layer.params().data())), size_(layer.params().size() / sizeof(uint16_t)), min_(layer.min_val()), diff --git a/src/utils/weights_adapter.h b/src/utils/weights_adapter.h index e174f5cc49..960022943e 100644 --- a/src/utils/weights_adapter.h +++ b/src/utils/weights_adapter.h @@ -27,6 +27,7 @@ #include #include + #include "proto/net.pb.h" namespace lczero { @@ -71,7 +72,7 @@ class LayerAdapter { const uint16_t* data_ = nullptr; }; - LayerAdapter(const pblczero::Weights_Layer& layer); + LayerAdapter(const pblczero::Weights::Layer& layer); std::vector as_vector() const; size_t size() const { return size_; } float operator[](size_t idx) const { return begin()[idx]; } From 461f324eb4d4b49c38dcc6daf2fef14168a967e1 Mon Sep 17 00:00:00 2001 From: borg323 <39573933+borg323@users.noreply.github.com> Date: Sun, 23 Feb 2020 00:48:28 +0200 Subject: [PATCH 027/151] reduce msvc warnings (#1082) --- meson.build | 4 ++++ src/chess/uciloop.cc | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/meson.build b/meson.build index 81e6f76532..484219c0b0 100644 --- a/meson.build +++ b/meson.build @@ -28,6 +28,10 @@ if cc.get_id() == 'clang' or cc.get_id() == 'gcc' add_project_arguments('-march=native', language : 'cpp') endif endif +if cc.get_id() == 'msvc' + # Silence some zlib warnings. + add_global_arguments('/wd4131', '/wd4267', '/wd4127', '/wd4244', '/wd4245', language : 'c') +endif if host_machine.system() == 'windows' add_project_arguments('-DNOMINMAX', language : 'cpp') endif diff --git a/src/chess/uciloop.cc b/src/chess/uciloop.cc index 8737ea9cc7..0e7a502849 100644 --- a/src/chess/uciloop.cc +++ b/src/chess/uciloop.cc @@ -114,7 +114,7 @@ int GetNumeric(const std::unordered_map& params, throw Exception("expected value after " + key); } return std::stoi(str); - } catch (std::invalid_argument& e) { + } catch (std::invalid_argument&) { throw Exception("invalid value " + str); } } From 3ff07fea993bca7eca972aeb3a70563a98728f9a Mon Sep 17 00:00:00 2001 From: Alexander Lyashuk Date: Sun, 23 Feb 2020 00:15:15 +0100 Subject: [PATCH 028/151] Adjust parameters. (#1088) --- src/mcts/params.cc | 51 +++++++++++++++++++++++++++----------- src/mcts/params.h | 20 ++++++++++----- src/mcts/search.cc | 7 +++--- src/selfplay/tournament.cc | 1 + 4 files changed, 55 insertions(+), 24 deletions(-) diff --git a/src/mcts/params.cc b/src/mcts/params.cc index 0705e8d283..bb357be2db 100644 --- a/src/mcts/params.cc +++ b/src/mcts/params.cc @@ -62,15 +62,26 @@ const OptionId SearchParams::kCpuctId{ "cpuct_init constant from \"UCT search\" algorithm. Higher values promote " "more exploration/wider search, lower values promote more " "confidence/deeper search."}; -const OptionId SearchParams::kCpuctAtRootOffsetId{ - "cpuct-root-offset", "CPuctRootOffset", - "cpuct_init value adjustment for the root node."}; +const OptionId SearchParams::kCpuctAtRootId{ + "cpuct-at-root", "CPuctAtRoot", + "cpuct_init constant from \"UCT search\" algorithm, for root node."}; const OptionId SearchParams::kCpuctBaseId{ "cpuct-base", "CPuctBase", "cpuct_base constant from \"UCT search\" algorithm. Lower value means " "higher growth of Cpuct as number of node visits grows."}; +const OptionId SearchParams::kCpuctBaseAtRootId{ + "cpuct-base-at-root", "CPuctBaseAtRoot", + "cpuct_base constant from \"UCT search\" algorithm, for root node."}; const OptionId SearchParams::kCpuctFactorId{ "cpuct-factor", "CPuctFactor", "Multiplier for the cpuct growth formula."}; +const OptionId SearchParams::kCpuctFactorAtRootId{ + "cpuct-factor-at-root", "CPuctFactorAtRoot", + "Multiplier for the cpuct growth formula at root."}; +const OptionId SearchParams::kRootHasOwnCpuctParamsId{ + "root-has-own-cpuct-params", "RootHasOwnCpuctParams", + "If enabled, cpuct parameters for root node are taken from *AtRoot " + "parameters. Otherwise, they are the same as for the rest of nodes. " + "Temporary flag for transition to a new version."}; const OptionId SearchParams::kTemperatureId{ "temperature", "Temperature", "Tau value from softmax formula for the first move. If equal to 0, the " @@ -222,10 +233,13 @@ void SearchParams::Populate(OptionsParser* options) { options->Add(kMiniBatchSizeId, 1, 1024) = 256; options->Add(kMaxPrefetchBatchId, 0, 1024) = 32; options->Add(kLogitQId) = false; - options->Add(kCpuctId, 0.0f, 100.0f) = 3.0f; - options->Add(kCpuctAtRootOffsetId, -100.0f, 100.0f) = 0.0f; - options->Add(kCpuctBaseId, 1.0f, 1000000000.0f) = 19652.0f; - options->Add(kCpuctFactorId, 0.0f, 1000.0f) = 2.0f; + options->Add(kCpuctId, 0.0f, 100.0f) = 2.147f; + options->Add(kCpuctAtRootId, 0.0f, 100.0f) = 2.147f; + options->Add(kCpuctBaseId, 1.0f, 1000000000.0f) = 18368.0f; + options->Add(kCpuctBaseAtRootId, 1.0f, 1000000000.0f) = 18368.0f; + options->Add(kCpuctFactorId, 0.0f, 1000.0f) = 2.815f; + options->Add(kCpuctFactorAtRootId, 0.0f, 1000.0f) = 2.815f; + options->Add(kRootHasOwnCpuctParamsId) = true; options->Add(kTemperatureId, 0.0f, 100.0f) = 0.0f; options->Add(kTempDecayMovesId, 0, 100) = 0; options->Add(kTemperatureCutoffMoveId, 0, 1000) = 0; @@ -240,12 +254,12 @@ void SearchParams::Populate(OptionsParser* options) { options->Add(kLogLiveStatsId) = false; std::vector fpu_strategy = {"reduction", "absolute"}; options->Add(kFpuStrategyId, fpu_strategy) = "reduction"; - options->Add(kFpuValueId, -100.0f, 100.0f) = 1.2f; + options->Add(kFpuValueId, -100.0f, 100.0f) = 0.443f; fpu_strategy.push_back("same"); options->Add(kFpuStrategyAtRootId, fpu_strategy) = "same"; options->Add(kFpuValueAtRootId, -100.0f, 100.0f) = 1.0f; options->Add(kCacheHistoryLengthId, 0, 7) = 0; - options->Add(kPolicySoftmaxTempId, 0.1f, 10.0f) = 2.2f; + options->Add(kPolicySoftmaxTempId, 0.1f, 10.0f) = 1.607f; options->Add(kMaxCollisionEventsId, 1, 1024) = 32; options->Add(kMaxCollisionVisitsId, 1, 1000000) = 9999; options->Add(kOutOfOrderEvalId) = true; @@ -264,7 +278,7 @@ void SearchParams::Populate(OptionsParser* options) { options->Add(kHistoryFillId, history_fill_opt) = "fen_only"; options->Add(kShortSightednessId, 0.0f, 1.0f) = 0.0f; options->Add(kDisplayCacheUsageId) = false; - options->Add(kMaxConcurrentSearchersId, 0, 128) = 0; + options->Add(kMaxConcurrentSearchersId, 0, 128) = 1; options->Add(kDrawScoreSidetomoveId, -100, 100) = 0; options->Add(kDrawScoreOpponentId, -100, 100) = 0; options->Add(kDrawScoreWhiteId, -100, 100) = 0; @@ -274,15 +288,27 @@ void SearchParams::Populate(OptionsParser* options) { options->HideOption(kNoiseAlphaId); options->HideOption(kLogLiveStatsId); options->HideOption(kDisplayCacheUsageId); + options->HideOption(kRootHasOwnCpuctParamsId); } SearchParams::SearchParams(const OptionsDict& options) : options_(options), kLogitQ(options.Get(kLogitQId.GetId())), kCpuct(options.Get(kCpuctId.GetId())), - kCpuctAtRootOffset(options.Get(kCpuctAtRootOffsetId.GetId())), + kCpuctAtRoot( + options.Get(options.Get(kRootHasOwnCpuctParamsId.GetId()) + ? kCpuctAtRootId.GetId() + : kCpuctId.GetId())), kCpuctBase(options.Get(kCpuctBaseId.GetId())), + kCpuctBaseAtRoot( + options.Get(options.Get(kRootHasOwnCpuctParamsId.GetId()) + ? kCpuctBaseAtRootId.GetId() + : kCpuctBaseId.GetId())), kCpuctFactor(options.Get(kCpuctFactorId.GetId())), + kCpuctFactorAtRoot( + options.Get(options.Get(kRootHasOwnCpuctParamsId.GetId()) + ? kCpuctFactorAtRootId.GetId() + : kCpuctFactorId.GetId())), kNoiseEpsilon(options.Get(kNoiseId.GetId()) ? 0.25f : options.Get(kNoiseEpsilonId.GetId())), @@ -318,9 +344,6 @@ SearchParams::SearchParams(const OptionsDict& options) 100.0f}, kDrawScoreWhite{options.Get(kDrawScoreWhiteId.GetId()) / 100.0f}, kDrawScoreBlack{options.Get(kDrawScoreBlackId.GetId()) / 100.0f} { - if (kCpuct + kCpuctAtRootOffset < 0.0f) { - throw Exception("CPuct + CPuctRootOffset must be >= 0."); - } if (std::max(std::abs(kDrawScoreSidetomove), std::abs(kDrawScoreOpponent)) + std::max(std::abs(kDrawScoreWhite), std::abs(kDrawScoreBlack)) > 1.0f) { diff --git a/src/mcts/params.h b/src/mcts/params.h index 299874e3e2..d53f85b369 100644 --- a/src/mcts/params.h +++ b/src/mcts/params.h @@ -47,10 +47,13 @@ class SearchParams { return options_.Get(kMaxPrefetchBatchId.GetId()); } bool GetLogitQ() const { return kLogitQ; } - float GetCpuct() const { return kCpuct; } - float GetCpuctOffsetAtRoot() const { return kCpuctAtRootOffset; } - float GetCpuctBase() const { return kCpuctBase; } - float GetCpuctFactor() const { return kCpuctFactor; } + float GetCpuct(bool at_root) const { return at_root ? kCpuctAtRoot : kCpuct; } + float GetCpuctBase(bool at_root) const { + return at_root ? kCpuctBaseAtRoot : kCpuctBase; + } + float GetCpuctFactor(bool at_root) const { + return at_root ? kCpuctFactorAtRoot : kCpuctFactor; + } float GetTemperature() const { return options_.Get(kTemperatureId.GetId()); } @@ -112,9 +115,12 @@ class SearchParams { static const OptionId kMaxPrefetchBatchId; static const OptionId kLogitQId; static const OptionId kCpuctId; - static const OptionId kCpuctAtRootOffsetId; + static const OptionId kCpuctAtRootId; static const OptionId kCpuctBaseId; + static const OptionId kCpuctBaseAtRootId; static const OptionId kCpuctFactorId; + static const OptionId kCpuctFactorAtRootId; + static const OptionId kRootHasOwnCpuctParamsId; static const OptionId kTemperatureId; static const OptionId kTempDecayMovesId; static const OptionId kTemperatureCutoffMoveId; @@ -159,9 +165,11 @@ class SearchParams { // trivial search optimizations. const bool kLogitQ; const float kCpuct; - const float kCpuctAtRootOffset; + const float kCpuctAtRoot; const float kCpuctBase; + const float kCpuctBaseAtRoot; const float kCpuctFactor; + const float kCpuctFactorAtRoot; const float kNoiseEpsilon; const float kNoiseAlpha; const bool kFpuAbsolute; diff --git a/src/mcts/search.cc b/src/mcts/search.cc index 4f182258f6..c10f177597 100644 --- a/src/mcts/search.cc +++ b/src/mcts/search.cc @@ -226,10 +226,9 @@ inline float GetFpu(const SearchParams& params, Node* node, bool is_root_node, inline float ComputeCpuct(const SearchParams& params, uint32_t N, bool is_root_node) { - const float init = - params.GetCpuct() + (is_root_node ? params.GetCpuctOffsetAtRoot() : 0.0f); - const float k = params.GetCpuctFactor(); - const float base = params.GetCpuctBase(); + const float init = params.GetCpuct(is_root_node); + const float k = params.GetCpuctFactor(is_root_node); + const float base = params.GetCpuctBase(is_root_node); return init + (k ? k * FastLog((N + base) / base) : 0.0f); } } // namespace diff --git a/src/selfplay/tournament.cc b/src/selfplay/tournament.cc index c08da5f82a..2c603800f2 100644 --- a/src/selfplay/tournament.cc +++ b/src/selfplay/tournament.cc @@ -127,6 +127,7 @@ void SelfPlayTournament::PopulateOptions(OptionsParser* options) { "multiplexing"); defaults->Set(SearchParams::kStickyEndgamesId.GetId(), false); defaults->Set(SearchParams::kLogitQId.GetId(), false); + defaults->Set(SearchParams::kRootHasOwnCpuctParamsId.GetId(), false); } SelfPlayTournament::SelfPlayTournament( From 05b35c6f7a380b873fbc8349bafaff6c153d129d Mon Sep 17 00:00:00 2001 From: Alexander Lyashuk Date: Sun, 23 Feb 2020 10:55:16 +0100 Subject: [PATCH 029/151] Update changelog in preparation to v0.24.0-rc1 (#1089) * Update changelog in preparation to v0.24.0-rc1 * Address review comments. --- changelog.txt | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/changelog.txt b/changelog.txt index 0a1cb50ced..c556411c8f 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,4 +1,39 @@ -v0.23.0 (2019-12-01) +v0.24.0-rc1 (2020-02-23) +~~~~~~~~~~~ + +* Introduced DirectX12 backend. +* Optimized Cpuct/FPU parameters are now default. +* There is now a separate set of CPuct parameters for the root node. +* Support of running selfplay games from an opening book. +* It's possible to adjust draw score from 0 to something else. +* There is a new --max-concurrent-seachers parameter (default is 1) which + helps with thread congestion at the beginning of the search. +* Cache fullness is not reported in UCI info line by default anymore. +* Removed libproto dependency. + +v0.23.3 (2020-02-18) +~~~~~~~ + +* Fix a bug in time management which sometimes led to insta-moves in long time + control. + +v0.23.2 (2019-12-31) +~~~~~~~ + +* Fixed a bug where odd length openings had reversed training data results in + selfplay. +* Fixed a bug where zero length training games could be generated due to + discard pile containing positions that were already considered end of game. +* Add cudnn-auto backend. + +v0.23.1 (2019-12-03) +~~~~~~~ + +* Fixed a bug with Lc0 crashing sometimes during match phase of training game + generation. +* Release packages now include CUDNN version without DLLs bundled. + +v0.23.0 (2019-12-01) ~~~~~~~ * Fixed the order of BLAS options so that Eigen is lower priority, to match From 85970691f6a8a45e8c78c22a6f657870da686f91 Mon Sep 17 00:00:00 2001 From: Alexander Lyashuk Date: Sun, 23 Feb 2020 12:39:46 +0100 Subject: [PATCH 030/151] Master is v0.25.0-dev now (#1090) --- src/version.inc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/version.inc b/src/version.inc index 351665c5ab..04b7196f08 100644 --- a/src/version.inc +++ b/src/version.inc @@ -1,4 +1,4 @@ #define LC0_VERSION_MAJOR 0 -#define LC0_VERSION_MINOR 24 +#define LC0_VERSION_MINOR 25 #define LC0_VERSION_PATCH 0 #define LC0_VERSION_POSTFIX "dev" From cfc6df520d636f01992bd9e541b46aa32d7f3f1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Leandro=20=C3=81lvarez=20Gonz=C3=A1lez?= Date: Mon, 24 Feb 2020 14:58:28 -0500 Subject: [PATCH 031/151] meson, only add -march=native if compiler supports it. (#1083) --- meson.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meson.build b/meson.build index 484219c0b0..7f8c2db233 100644 --- a/meson.build +++ b/meson.build @@ -25,7 +25,7 @@ if cc.get_id() == 'clang' endif if cc.get_id() == 'clang' or cc.get_id() == 'gcc' if get_option('buildtype') == 'release' - add_project_arguments('-march=native', language : 'cpp') + add_project_arguments(cc.get_supported_arguments(['-march=native']), language : 'cpp') endif endif if cc.get_id() == 'msvc' From 31ed442140457a5da78b1c289885d166d8131bf5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Leandro=20=C3=81lvarez=20Gonz=C3=A1lez?= Date: Mon, 24 Feb 2020 16:51:12 -0500 Subject: [PATCH 032/151] meson, don't link pthread when host is Android. (#1087) * let meson take care of threads dependency --- meson.build | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/meson.build b/meson.build index 7f8c2db233..7c6865775c 100644 --- a/meson.build +++ b/meson.build @@ -145,6 +145,7 @@ files += [ ] includes += include_directories('src') +deps += dependency('threads') ############################################################################# ## Platform specific files @@ -153,9 +154,6 @@ if host_machine.system() == 'windows' files += 'src/utils/filesystem.win32.cc' else files += 'src/utils/filesystem.posix.cc' - deps += [ - cc.find_library('pthread'), - ] endif From f2a4a5634b59a797af812bb617c2aa87d9690eaf Mon Sep 17 00:00:00 2001 From: borg323 <39573933+borg323@users.noreply.github.com> Date: Tue, 25 Feb 2020 00:41:59 +0200 Subject: [PATCH 033/151] stricter appveyor artifacts wildcard (#1093) --- appveyor.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/appveyor.yml b/appveyor.yml index 4d71ca584d..69fe266981 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -98,7 +98,7 @@ after_build: artifacts: - path: build/lc0.exe name: lc0-$(NAME) - - path: /*.zip/ + - path: /lc0*.zip/ name: lc0-$(APPVEYOR_REPO_TAG_NAME)-windows-$(NAME)-zip - path: build/lc0.pdb name: lc0-debug-symbols From bb465288b732c8706287f8614384041095929879 Mon Sep 17 00:00:00 2001 From: borg323 <39573933+borg323@users.noreply.github.com> Date: Wed, 26 Feb 2020 02:53:12 +0200 Subject: [PATCH 034/151] fix check_dx.bat (#1096) --- scripts/check_dx.bat | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/check_dx.bat b/scripts/check_dx.bat index c594361bb9..fee708729a 100644 --- a/scripts/check_dx.bat +++ b/scripts/check_dx.bat @@ -1,5 +1,5 @@ @ECHO OFF ECHO Sanity checking the dx12 driver. -lc0 benchmark --backend=check --backend-opts=mode=check,freq=1.0,atol=5e-1,dx,blas %* +lc0 benchmark --backend=check --backend-opts=mode=check,freq=1.0,atol=5e-1,dx12,blas %* PAUSE From dcd0d89fefcfff212ff61d168b3b2898c7f9da94 Mon Sep 17 00:00:00 2001 From: borg323 <39573933+borg323@users.noreply.github.com> Date: Wed, 26 Feb 2020 11:59:27 +0200 Subject: [PATCH 035/151] rename dx12 artifact to include windows10 (#1097) --- appveyor.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/appveyor.yml b/appveyor.yml index 69fe266981..8e28c4fafa 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -95,6 +95,7 @@ after_build: - cmd: IF %APPVEYOR_REPO_TAG%==true IF %CUDA%==true copy "%PKG_FOLDER%\cuda\NVIDIA_SLA_cuDNN_Support.txt" dist\CUDNN.txt - cmd: IF %APPVEYOR_REPO_TAG%==true IF %CUDA%==true type dist\README-cuda.txt |more /P > dist\README.txt - cmd: IF %APPVEYOR_REPO_TAG%==true IF %CUDA%==true 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip .\dist\README.txt .\dist\CUDA.txt .\dist\CUDNN.txt +- cmd: IF EXIST lc0-%APPVEYOR_REPO_TAG_NAME%-windows-gpu-dx12.zip ren lc0-%APPVEYOR_REPO_TAG_NAME%-windows-gpu-dx12.zip lc0-%APPVEYOR_REPO_TAG_NAME%-windows10-gpu-dx12.zip artifacts: - path: build/lc0.exe name: lc0-$(NAME) From 587a3cd1ed75debc06cb1af76937028753219ac9 Mon Sep 17 00:00:00 2001 From: borg323 <39573933+borg323@users.noreply.github.com> Date: Wed, 26 Feb 2020 12:29:51 +0200 Subject: [PATCH 036/151] move checkdir.py into scripts (#1098) --- meson.build | 4 ++-- checkdir.py => scripts/checkdir.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) rename checkdir.py => scripts/checkdir.py (80%) diff --git a/meson.build b/meson.build index 7c6865775c..4a8639a956 100644 --- a/meson.build +++ b/meson.build @@ -55,7 +55,7 @@ gen = generator(compile_proto, output: ['@BASENAME@.pb.h'], # Handle submodules. git = find_program('git', required: false) -if run_command('checkdir.py', 'libs/lczero-common/proto').returncode() != 0 +if run_command('scripts/checkdir.py', 'libs/lczero-common/proto').returncode() != 0 if git.found() if run_command(git, 'status').returncode() == 0 message('updating git submodule libs/lczero-common') @@ -398,7 +398,7 @@ if get_option('build_backends') if get_option('cudnn') and cu_blas.found() and cu_dnn.found() and cu_dart.found() and nvcc.found() deps += [cu_blas, cu_dnn, cu_dart] foreach d : get_option('cudnn_include') - if run_command('checkdir.py', d).returncode() == 0 + if run_command('scripts/checkdir.py', d).returncode() == 0 includes += include_directories(d) endif endforeach diff --git a/checkdir.py b/scripts/checkdir.py similarity index 80% rename from checkdir.py rename to scripts/checkdir.py index f144289ae3..4dd8001151 100644 --- a/checkdir.py +++ b/scripts/checkdir.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python3 import sys import os From c6fb19aa6226693b89e9e2d0a5eee0eb43219e2f Mon Sep 17 00:00:00 2001 From: Alexander Lyashuk Date: Sat, 29 Feb 2020 16:19:42 +0100 Subject: [PATCH 037/151] Update issue templates (#1100) * Update issue templates * Address revuew comments. --- .github/ISSUE_TEMPLATE/bug_report.md | 65 ++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/bug_report.md diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000000..a6fdae12d0 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,65 @@ +--- +name: Bug report +about: Create a report to help us improve +title: '' +labels: '' +assignees: '' + +--- + +**WARNING: Please only use GitHub issues for BUG REPORTS and FEATURE REQUESTS** +Lengthy algorithm discussions etc are fine too, but also consider creating a wiki page or a page on website. + +Here is what to do with other types of questions: +1. Build problems: ask in #help channel in our discord: https://discord.gg/pKujYxD +2. Configuration questions: ask in #help channel in our discord: https://discord.gg/pKujYxD +3. Running problems: ask in #help channel in our discord: https://discord.gg/pKujYxD + + +If you are filing a bug report, please fill the fields below. +Otherwise, feel free to remove this text and type a free-form issue as usual. + +BUG REPORT + +**Describe the bug** +A clear and concise description of what the bug is. + +**Steps to Reproduce** +1. +2. +3. +4. +Expected behavior: +Observed behavior: + +**Lc0 version** +Include Lc0 version/operating system/backend type. + +**Lc0 parameters** +Command line, if not default. +Include screenshot of configuration window, if using through GUI. + +**Hardware** +* Number and model of GPUs that you use. +* Amount of RAM in the system +* Other specs (CPU etc) if it may be relevant + +**Lc0 logs** +Please attach Lc0 logs. Here is how to produce them (e.g. for D:\logfile.txt): + +Set the following UCI option: +**Logfile:** D:\\logfile.txt +OR +pass this as a command line argument: +`--logfile=D:\logfile.txt` +OR +Create **lc0.config** file in the same directory as your **lc0.exe** is located, with the following contents: +``` +logfile=D:\logfile.txt +``` + +After running Lc0, **D:\logfile.txt** should appear. + + +**Chess GUI logs** +If there is a problem with particular GUI (cutechess/arena/etc), also attach logs of that program. From ef0e277e75cf79eead3ee99fc40fba69300b774b Mon Sep 17 00:00:00 2001 From: borg323 <39573933+borg323@users.noreply.github.com> Date: Sat, 29 Feb 2020 23:19:25 +0200 Subject: [PATCH 038/151] add a default network to the release zip files (#1099) --- appveyor.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index 8e28c4fafa..01c1f7fbfa 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -42,7 +42,8 @@ install: - cmd: pip3 install --upgrade meson==0.51.2 - cmd: call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat" amd64 - cmd: set PKG_FOLDER="C:\cache" -- cmd: IF NOT EXIST c:\cache\testnet appveyor DownloadFile http://training.lczero.org/get_network?sha=7170f639ba1cdc407283b8e52377283e36845b954788c6ada8897937637ef032 -Filename c:\cache\testnet +- cmd: IF NOT EXIST c:\cache mkdir c:\cache +- cmd: IF NOT EXIST c:\cache\591226.pb.gz appveyor DownloadFile http://training.lczero.org/get_network?sha=47e3f899519dc1bc95496a457b77730fce7b0b89b6187af5c01ecbbd02e88398 -Filename c:\cache\591226.pb.gz - cmd: IF %GTEST%==true IF NOT EXIST C:\cache\syzygy mkdir C:\cache\syzygy - cmd: IF %GTEST%==true cd C:\cache\syzygy - cmd: IF %GTEST%==true IF NOT EXIST KQvK.rtbz curl --remote-name-all https://tablebase.lichess.ovh/tables/standard/3-4-5/K{P,N,R,B,Q}vK.rtb{w,z} @@ -70,7 +71,7 @@ build_script: - cmd: IF %PGO%==true IF %OPENCL%==true copy C:\cache\opencl-nug.0.777.77\build\native\bin\OpenCL.dll - cmd: IF %PGO%==true IF %CUDA%==true copy "%CUDA_PATH%"\bin\*.dll - cmd: IF %PGO%==true IF %CUDA%==true copy %PKG_FOLDER%\cuda\bin\cudnn64_7.dll -- cmd: IF %PGO%==true lc0 benchmark --weights=c:\cache\testnet --backend=random --movetime=10000 +- cmd: IF %PGO%==true lc0 benchmark --weights=c:\cache\591226.pb.gz --backend=random --movetime=10000 - cmd: cd .. - cmd: IF %PGO%==true msbuild "C:\projects\lc0\build\lc0.sln" /m /p:WholeProgramOptimization=PGOptimize /p:DebugInformationFormat=ProgramDatabase /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" after_build: @@ -79,6 +80,7 @@ after_build: - cmd: IF %APPVEYOR_REPO_TAG%==true 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip client.exe - cmd: IF %APPVEYOR_REPO_TAG%==true type COPYING |more /P > dist\COPYING - cmd: IF %APPVEYOR_REPO_TAG%==true 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip .\dist\COPYING +- cmd: IF %APPVEYOR_REPO_TAG%==true 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip c:\cache\591226.pb.gz - cmd: IF %APPVEYOR_REPO_TAG%==true IF %CUDA%==true copy lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%-nodll.zip - cmd: IF %APPVEYOR_REPO_TAG%==true IF %NAME%==cpu-openblas 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip C:\cache\OpenBLAS\dist64\bin\libopenblas.dll - cmd: IF %APPVEYOR_REPO_TAG%==true IF %NAME%==cpu-dnnl 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip C:\cache\dnnl_win_1.1.1_cpu_vcomp\bin\dnnl.dll From e329b76eb068cff55b64f2ffd28ffa49ba98b13c Mon Sep 17 00:00:00 2001 From: Tilps Date: Sun, 1 Mar 2020 09:36:37 +1100 Subject: [PATCH 039/151] Add a new mode to benchmark the backend specifically. (#1101) * Add a new mode to benchmark the backend specifically. * Add throughput reporting. * bbench -> backendbench * Add mode registration for backendbench --- meson.build | 1 + src/benchmark/backendbench.cc | 94 +++++++++++++++++++++++++++++++++++ src/benchmark/backendbench.h | 39 +++++++++++++++ src/main.cc | 6 +++ 4 files changed, 140 insertions(+) create mode 100644 src/benchmark/backendbench.cc create mode 100644 src/benchmark/backendbench.h diff --git a/meson.build b/meson.build index 4a8639a956..aeaaa2cb34 100644 --- a/meson.build +++ b/meson.build @@ -105,6 +105,7 @@ message('Using build identifier "' + build_identifier + '".') files += [ 'src/engine.cc', 'src/version.cc', + 'src/benchmark/backendbench.cc', 'src/benchmark/benchmark.cc', 'src/chess/bitboard.cc', 'src/chess/board.cc', diff --git a/src/benchmark/backendbench.cc b/src/benchmark/backendbench.cc new file mode 100644 index 0000000000..6f13a11d46 --- /dev/null +++ b/src/benchmark/backendbench.cc @@ -0,0 +1,94 @@ +/* + This file is part of Leela Chess Zero. + Copyright (C) 2020 The LCZero Authors + + Leela Chess is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Leela Chess is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Leela Chess. If not, see . + + Additional permission under GNU GPL version 3 section 7 + + If you modify this Program, or any covered work, by linking or + combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA + Toolkit and the NVIDIA CUDA Deep Neural Network library (or a + modified version of those libraries), containing parts covered by the + terms of the respective license agreement, the licensors of this + Program grant you additional permission to convey the resulting work. +*/ + +#include "benchmark/backendbench.h" + +#include "chess/board.h" +#include "mcts/node.h" +#include "neural/factory.h" +#include "utils/optionsparser.h" + +namespace lczero { +namespace { +const int kDefaultThreads = 1; + +const OptionId kThreadsOptionId{"threads", "Threads", + "Number of (CPU) worker threads to use.", 't'}; +const OptionId kBatchesId{"batches", "", + "Number of batches to run as a benchmark."}; +const OptionId kMaxBatchSizeId{"max-batch-size", "", + "Maximum batch size to benchmark."}; +const OptionId kFenId{"fen", "", "Benchmark initial position FEN."}; +} // namespace + +void BackendBenchmark::Run() { + OptionsParser options; + NetworkFactory::PopulateOptions(&options); + options.Add(kThreadsOptionId, 1, 128) = kDefaultThreads; + + options.Add(kBatchesId, 1, 999999999) = 100; + options.Add(kMaxBatchSizeId, 1, 1024) = 256; + options.Add(kFenId) = ChessBoard::kStartposFen; + + if (!options.ProcessAllFlags()) return; + + try { + auto option_dict = options.GetOptionsDict(); + + auto network = NetworkFactory::LoadNetwork(option_dict); + + NodeTree tree; + tree.ResetToPosition(option_dict.Get(kFenId.GetId()), {}); + const int batches = option_dict.Get(kBatchesId.GetId()); + + for (int i = 1; i <= option_dict.Get(kMaxBatchSizeId.GetId()); i++) { + const auto start = std::chrono::steady_clock::now(); + // TODO: support threads not equal to 1 to be able to more sensibly test + // multiplexing backend. + for (int j = 0; j < batches; j++) { + // Put i copies of tree root node into computation and compute. + auto computation = network->NewComputation(); + for (int k = 0; k < i; k++) { + computation->AddInput(EncodePositionForNN( + network->GetCapabilities().input_format, + tree.GetPositionHistory(), 8, FillEmptyHistory::ALWAYS)); + } + computation->ComputeBlocking(); + } + + const auto end = std::chrono::steady_clock::now(); + std::chrono::duration time = end - start; + std::cout << "Benchmark batch size " << i + << " with inference average time " + << time.count() / batches * 1000 << "ms - throughput " + << i * batches / time.count() << " nps." << std::endl; + } + } catch (Exception& ex) { + std::cerr << ex.what() << std::endl; + } +} +} // namespace lczero diff --git a/src/benchmark/backendbench.h b/src/benchmark/backendbench.h new file mode 100644 index 0000000000..b468fa00f0 --- /dev/null +++ b/src/benchmark/backendbench.h @@ -0,0 +1,39 @@ +/* + This file is part of Leela Chess Zero. + Copyright (C) 2020 The LCZero Authors + + Leela Chess is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Leela Chess is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Leela Chess. If not, see . + + Additional permission under GNU GPL version 3 section 7 + + If you modify this Program, or any covered work, by linking or + combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA + Toolkit and the NVIDIA CUDA Deep Neural Network library (or a + modified version of those libraries), containing parts covered by the + terms of the respective license agreement, the licensors of this + Program grant you additional permission to convey the resulting work. +*/ + +#pragma once + +namespace lczero { + +class BackendBenchmark { + public: + BackendBenchmark() = default; + + void Run(); +}; + +} // namespace lczero diff --git a/src/main.cc b/src/main.cc index 584f0f1a11..319e275bd7 100644 --- a/src/main.cc +++ b/src/main.cc @@ -26,6 +26,7 @@ */ #include "benchmark/benchmark.h" +#include "benchmark/backendbench.h" #include "chess/board.h" #include "engine.h" #include "selfplay/loop.h" @@ -50,6 +51,7 @@ int main(int argc, const char** argv) { CommandLine::RegisterMode("uci", "(default) Act as UCI engine"); CommandLine::RegisterMode("selfplay", "Play games with itself"); CommandLine::RegisterMode("benchmark", "Quick benchmark"); + CommandLine::RegisterMode("backendbench", "Quick benchmark of backend only"); if (CommandLine::ConsumeCommand("selfplay")) { // Selfplay mode. @@ -59,6 +61,10 @@ int main(int argc, const char** argv) { // Benchmark mode. Benchmark benchmark; benchmark.Run(); + } else if (CommandLine::ConsumeCommand("backendbench")) { + // Backend Benchmark mode. + BackendBenchmark benchmark; + benchmark.Run(); } else { // Consuming optional "uci" mode. CommandLine::ConsumeCommand("uci"); From 4792ce027e314682e302df36409a766cf158999c Mon Sep 17 00:00:00 2001 From: borg323 <39573933+borg323@users.noreply.github.com> Date: Sun, 1 Mar 2020 12:20:30 +0200 Subject: [PATCH 040/151] set net time (#1102) --- appveyor.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/appveyor.yml b/appveyor.yml index 01c1f7fbfa..bcd9f5ebb4 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -44,6 +44,7 @@ install: - cmd: set PKG_FOLDER="C:\cache" - cmd: IF NOT EXIST c:\cache mkdir c:\cache - cmd: IF NOT EXIST c:\cache\591226.pb.gz appveyor DownloadFile http://training.lczero.org/get_network?sha=47e3f899519dc1bc95496a457b77730fce7b0b89b6187af5c01ecbbd02e88398 -Filename c:\cache\591226.pb.gz +- cmd: touch -t 201801010000.00 c:\cache\591226.pb.gz - cmd: IF %GTEST%==true IF NOT EXIST C:\cache\syzygy mkdir C:\cache\syzygy - cmd: IF %GTEST%==true cd C:\cache\syzygy - cmd: IF %GTEST%==true IF NOT EXIST KQvK.rtbz curl --remote-name-all https://tablebase.lichess.ovh/tables/standard/3-4-5/K{P,N,R,B,Q}vK.rtb{w,z} From 8c71db3a1e55767fdf12518ff0a5075a3d20e807 Mon Sep 17 00:00:00 2001 From: borg323 <39573933+borg323@users.noreply.github.com> Date: Sun, 1 Mar 2020 22:18:21 +0200 Subject: [PATCH 041/151] Add android to the default builds (#1095) * try to build for android * move some appveyor logic to external scripts * release test * cleanup * rename android binary * add armv7a * simplify * restore deployment authentication token * build with openblas --- appveyor.yml | 70 ++++++++++++++----------------- cross-files/aarch64-linux-android | 28 +++++++++++++ cross-files/armv7a-linux-android | 27 ++++++++++++ scripts/appveyor_win_build.cmd | 15 +++++++ scripts/appveyor_win_package.cmd | 23 ++++++++++ 5 files changed, 125 insertions(+), 38 deletions(-) create mode 100644 cross-files/aarch64-linux-android create mode 100644 cross-files/armv7a-linux-android create mode 100644 scripts/appveyor_win_build.cmd create mode 100644 scripts/appveyor_win_package.cmd diff --git a/appveyor.yml b/appveyor.yml index bcd9f5ebb4..360a778729 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -10,6 +10,8 @@ environment: - NAME: gpu-opencl - NAME: cpu-dnnl - NAME: cpu-openblas + - NAME: android-aarch64 + - NAME: android-armv7a clone_folder: c:\projects\lc0 install: - cmd: set CUDA=false @@ -17,6 +19,9 @@ install: - cmd: set OPENCL=false - cmd: set BLAS=false - cmd: set GTEST=false +- cmd: set ANDROID=false +- cmd: IF %NAME%==android-aarch64 set ANDROID=true +- cmd: IF %NAME%==android-armv7a set ANDROID=true - cmd: IF %NAME%==gpu-nvidia-cuda set CUDA=true - cmd: IF %NAME%==gpu-dx12 set DX=true - cmd: IF %NAME%==gpu-opencl set OPENCL=true @@ -39,11 +44,21 @@ install: - cmd: IF %CUDA%==true IF NOT EXIST C:\cache\cuda appveyor DownloadFile http://developer.download.nvidia.com/compute/redist/cudnn/v7.4.2/cudnn-10.0-windows10-x64-v7.4.2.24.zip - cmd: IF %CUDA%==true IF NOT EXIST C:\cache\cuda 7z x cudnn-10.0-windows10-x64-v7.4.2.24.zip -oC:\cache - cmd: set PATH=C:\Python36;C:\Python36\scripts;%PATH% -- cmd: pip3 install --upgrade meson==0.51.2 +- cmd: pip3 install --upgrade meson - cmd: call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat" amd64 +- cmd: IF %NAME%==android-aarch64 C:\ProgramData\Microsoft\AndroidNDK64\android-ndk-r17\build\tools\make_standalone_toolchain.py --arch arm64 --api 21 --stl libc++ --install-dir \android-standalone-64 +- cmd: IF %NAME%==android-aarch64 set PATH=C:\android-standalone-64\bin;%PATH% +- cmd: IF %NAME%==android-aarch64 sed "s/clang+*/&.cmd/" cross-files/aarch64-linux-android >crossfile +- cmd: IF %NAME%==android-aarch64 IF NOT EXIST C:\cache\OpenBLAS appveyor DownloadFile https://github.com/borg323/OpenBLAS/releases/download/android-0.3.8/openblas-android-aarch64.zip +- cmd: IF %NAME%==android-aarch64 IF NOT EXIST C:\cache\OpenBLAS 7z x openblas-android-aarch64.zip -oC:\cache\OpenBLAS +- cmd: IF %NAME%==android-armv7a C:\ProgramData\Microsoft\AndroidNDK64\android-ndk-r17\build\tools\make_standalone_toolchain.py --arch arm --api 24 --stl libc++ --install-dir \android-standalone-32 +- cmd: IF %NAME%==android-armv7a set PATH=C:\android-standalone-32\bin;%PATH% +- cmd: IF %NAME%==android-armv7a sed "s/clang+*/&.cmd/" cross-files/armv7a-linux-android >crossfile +- cmd: IF %NAME%==android-armv7a IF NOT EXIST C:\cache\OpenBLAS appveyor DownloadFile https://github.com/borg323/OpenBLAS/releases/download/android-0.3.8/openblas-android-armv7a.zip +- cmd: IF %NAME%==android-armv7a IF NOT EXIST C:\cache\OpenBLAS 7z x openblas-android-armv7a.zip -oC:\cache\OpenBLAS - cmd: set PKG_FOLDER="C:\cache" - cmd: IF NOT EXIST c:\cache mkdir c:\cache -- cmd: IF NOT EXIST c:\cache\591226.pb.gz appveyor DownloadFile http://training.lczero.org/get_network?sha=47e3f899519dc1bc95496a457b77730fce7b0b89b6187af5c01ecbbd02e88398 -Filename c:\cache\591226.pb.gz +- cmd: IF NOT EXIST c:\cache\591226.pb.gz IF %ANDROID%==false appveyor DownloadFile http://training.lczero.org/get_network?sha=47e3f899519dc1bc95496a457b77730fce7b0b89b6187af5c01ecbbd02e88398 -Filename c:\cache\591226.pb.gz - cmd: touch -t 201801010000.00 c:\cache\591226.pb.gz - cmd: IF %GTEST%==true IF NOT EXIST C:\cache\syzygy mkdir C:\cache\syzygy - cmd: IF %GTEST%==true cd C:\cache\syzygy @@ -60,48 +75,21 @@ before_build: - cmd: SET BUILD_BLAS=%BLAS% - cmd: IF %OPENCL%==true SET BUILD_BLAS=true - cmd: IF %DX%==true SET BUILD_BLAS=true -- cmd: meson build --backend vs2017 --buildtype release -Dgtest=%GTEST% -Dopencl=%OPENCL% -Dblas=%BUILD_BLAS% -Ddnnl=true -Deigen=true -Ddx=%DX% -Dcudnn=%CUDA% -Dispc_native_only=false -Dpopcnt=false -Dcudnn_include="%CUDA_PATH%\include","%PKG_FOLDER%\cuda\include" -Dcudnn_libdirs="%CUDA_PATH%\lib\x64","%PKG_FOLDER%\cuda\lib\x64" -Dopenblas_include="%PKG_FOLDER%\OpenBLAS\dist64\include" -Dopenblas_libdirs="%PKG_FOLDER%\OpenBLAS\dist64\lib" -Ddnnl_dir="%PKG_FOLDER%\dnnl_win_1.1.1_cpu_vcomp" -Dopencl_include="%PKG_FOLDER%\opencl-nug.0.777.77\build\native\include" -Dopencl_libdirs="%PKG_FOLDER%\opencl-nug.0.777.77\build\native\lib\x64" -Ddefault_library=static +- cmd: IF %ANDROID%==false meson build --backend vs2017 --buildtype release -Dgtest=%GTEST% -Dopencl=%OPENCL% -Dblas=%BUILD_BLAS% -Ddnnl=true -Deigen=true -Ddx=%DX% -Dcudnn=%CUDA% -Dispc_native_only=false -Dpopcnt=false -Dcudnn_include="%CUDA_PATH%\include","%PKG_FOLDER%\cuda\include" -Dcudnn_libdirs="%CUDA_PATH%\lib\x64","%PKG_FOLDER%\cuda\lib\x64" -Dopenblas_include="%PKG_FOLDER%\OpenBLAS\dist64\include" -Dopenblas_libdirs="%PKG_FOLDER%\OpenBLAS\dist64\lib" -Ddnnl_dir="%PKG_FOLDER%\dnnl_win_1.1.1_cpu_vcomp" -Dopencl_include="%PKG_FOLDER%\opencl-nug.0.777.77\build\native\include" -Dopencl_libdirs="%PKG_FOLDER%\opencl-nug.0.777.77\build\native\lib\x64" -Ddefault_library=static +- cmd: IF %ANDROID%==true meson build --buildtype release -Dgtest=false -Dopenblas_include="%PKG_FOLDER%\OpenBLAS\%NAME%\include" -Dopenblas_libdirs="%PKG_FOLDER%\OpenBLAS\%NAME%\lib" -Ddefault_library=static --cross-file crossfile build_script: -- cmd: SET PGO=false -- cmd: IF %APPVEYOR_REPO_TAG%==true IF %DX%==false SET PGO=true -- cmd: IF %PGO%==false msbuild "C:\projects\lc0\build\lc0.sln" /m /p:WholeProgramOptimization=true /p:DebugInformationFormat=ProgramDatabase /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" -- cmd: IF %PGO%==true msbuild "C:\projects\lc0\build\lc0.sln" /m /p:WholeProgramOptimization=PGInstrument /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" +- cmd: IF %ANDROID%==false call scripts\appveyor_win_build.cmd - cmd: cd build -- cmd: IF %NAME%==cpu-openblas copy C:\cache\OpenBLAS\dist64\bin\libopenblas.dll -- cmd: IF %NAME%==cpu-dnnl copy C:\cache\dnnl_win_1.1.1_cpu_vcomp\bin\dnnl.dll -- cmd: IF %PGO%==true IF %OPENCL%==true copy C:\cache\opencl-nug.0.777.77\build\native\bin\OpenCL.dll -- cmd: IF %PGO%==true IF %CUDA%==true copy "%CUDA_PATH%"\bin\*.dll -- cmd: IF %PGO%==true IF %CUDA%==true copy %PKG_FOLDER%\cuda\bin\cudnn64_7.dll -- cmd: IF %PGO%==true lc0 benchmark --weights=c:\cache\591226.pb.gz --backend=random --movetime=10000 -- cmd: cd .. -- cmd: IF %PGO%==true msbuild "C:\projects\lc0\build\lc0.sln" /m /p:WholeProgramOptimization=PGOptimize /p:DebugInformationFormat=ProgramDatabase /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" +- cmd: IF %ANDROID%==true ninja +- cmd: IF %ANDROID%==true ren lc0 lc0-%NAME% +- cmd: cd C:\projects\lc0 after_build: -- cmd: IF %APPVEYOR_REPO_TAG%==true 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip %APPVEYOR_BUILD_FOLDER%\build\lc0.exe -- cmd: IF %APPVEYOR_REPO_TAG%==true appveyor DownloadFile "https://ci.appveyor.com/api/projects/LeelaChessZero/lczero-client/artifacts/client.exe?branch=release&pr=false&job=Environment%%3A%%20NAME%%3D.exe%%2C%%20GOOS%%3Dwindows" -- cmd: IF %APPVEYOR_REPO_TAG%==true 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip client.exe -- cmd: IF %APPVEYOR_REPO_TAG%==true type COPYING |more /P > dist\COPYING -- cmd: IF %APPVEYOR_REPO_TAG%==true 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip .\dist\COPYING -- cmd: IF %APPVEYOR_REPO_TAG%==true 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip c:\cache\591226.pb.gz -- cmd: IF %APPVEYOR_REPO_TAG%==true IF %CUDA%==true copy lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%-nodll.zip -- cmd: IF %APPVEYOR_REPO_TAG%==true IF %NAME%==cpu-openblas 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip C:\cache\OpenBLAS\dist64\bin\libopenblas.dll -- cmd: IF %APPVEYOR_REPO_TAG%==true IF %NAME%==cpu-dnnl 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip C:\cache\dnnl_win_1.1.1_cpu_vcomp\bin\dnnl.dll -- cmd: IF %APPVEYOR_REPO_TAG%==true IF %OPENCL%==true 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip C:\cache\opencl-nug.0.777.77\build\native\bin\OpenCL.dll -- cmd: IF %APPVEYOR_REPO_TAG%==true IF %CUDA%==true 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip "%CUDA_PATH%\bin\cudart64_100.dll" "%CUDA_PATH%\bin\cublas64_100.dll" -- cmd: IF %APPVEYOR_REPO_TAG%==true IF %CUDA%==true 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip "%PKG_FOLDER%\cuda\bin\cudnn64_7.dll" -- cmd: IF %APPVEYOR_REPO_TAG%==true IF %NAME%==cpu-dnnl copy "%PKG_FOLDER%\dnnl_win_1.1.1_cpu_vcomp\LICENSE" dist\DNNL-LICENSE -- cmd: IF %APPVEYOR_REPO_TAG%==true IF %NAME%==cpu-dnnl 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip .\dist\DNNL-LICENSE -- cmd: IF %APPVEYOR_REPO_TAG%==true IF %OPENCL%==true type scripts\check_opencl.bat |more /P > dist\check_opencl.bat -- cmd: IF %APPVEYOR_REPO_TAG%==true IF %OPENCL%==true 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip .\dist\check_opencl.bat -- cmd: IF %APPVEYOR_REPO_TAG%==true IF %DX%==true type scripts\check_dx.bat |more /P > dist\check_dx.bat -- cmd: IF %APPVEYOR_REPO_TAG%==true IF %DX%==true 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip .\dist\check_dx.bat -- cmd: IF %APPVEYOR_REPO_TAG%==true IF %CUDA%==true copy "%CUDA_PATH%\EULA.txt" dist\CUDA.txt -- cmd: IF %APPVEYOR_REPO_TAG%==true IF %CUDA%==true copy "%PKG_FOLDER%\cuda\NVIDIA_SLA_cuDNN_Support.txt" dist\CUDNN.txt -- cmd: IF %APPVEYOR_REPO_TAG%==true IF %CUDA%==true type dist\README-cuda.txt |more /P > dist\README.txt -- cmd: IF %APPVEYOR_REPO_TAG%==true IF %CUDA%==true 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip .\dist\README.txt .\dist\CUDA.txt .\dist\CUDNN.txt -- cmd: IF EXIST lc0-%APPVEYOR_REPO_TAG_NAME%-windows-gpu-dx12.zip ren lc0-%APPVEYOR_REPO_TAG_NAME%-windows-gpu-dx12.zip lc0-%APPVEYOR_REPO_TAG_NAME%-windows10-gpu-dx12.zip +- cmd: IF %APPVEYOR_REPO_TAG%==true IF %ANDROID%==false call scripts\appveyor_win_package.cmd artifacts: - path: build/lc0.exe name: lc0-$(NAME) + - path: build/lc0-$(NAME) + name: lc0-$(NAME) - path: /lc0*.zip/ name: lc0-$(APPVEYOR_REPO_TAG_NAME)-windows-$(NAME)-zip - path: build/lc0.pdb @@ -113,6 +101,12 @@ deploy: secure: USFAdwQKTXqOXQjCYQfzWvzRpUhvqJLBkN4hbOg+j876vDxGZHt9bMYayb5evePp on: appveyor_repo_tag: true + - provider: GitHub + artifact: /lc0-android.*/ + auth_token: + secure: USFAdwQKTXqOXQjCYQfzWvzRpUhvqJLBkN4hbOg+j876vDxGZHt9bMYayb5evePp + on: + appveyor_repo_tag: true test_script: - cmd: cd build - cmd: IF %GTEST%==true xcopy /s /i C:\cache\syzygy syzygy diff --git a/cross-files/aarch64-linux-android b/cross-files/aarch64-linux-android new file mode 100644 index 0000000000..afb3596b7d --- /dev/null +++ b/cross-files/aarch64-linux-android @@ -0,0 +1,28 @@ + +# Tested with Android NDK r18, default toolchain +# Targeting API level 21 + +# Make the standalone toolchain +# cd android-ndk-r18b/build/tools/ +# ./make_standalone_toolchain.py --arch arm64 --api 21 --stl libc++ --install-dir android-standalone-64 + +# Set the toolchain path on your environment +# export PATH="$HOME/.local/share/android-sdk/android-toolchains/android-standalone-64/bin:$PATH" + +[host_machine] +system = 'android' +cpu_family = 'arm' +cpu = 'aarch64' +endian = 'little' + +[properties] +cpp_link_args = ['-llog', '-static-libstdc++'] + +[binaries] +c = 'aarch64-linux-android-clang' +cpp = 'aarch64-linux-android-clang++' +ar = 'aarch64-linux-android-ar' +strip = 'aarch64-linux-android-strip' +ld = 'aarch64-linux-android-ld' +ranlib = 'aarch64-linux-android-ranlib' +as = 'aarch64-linux-android-as' diff --git a/cross-files/armv7a-linux-android b/cross-files/armv7a-linux-android new file mode 100644 index 0000000000..ae01719bed --- /dev/null +++ b/cross-files/armv7a-linux-android @@ -0,0 +1,27 @@ + +# Tested with Android NDK r18, standalone toolchain +# Targeting API level 24 +# +# First create the standalone toolchain: +# ./make_standalone_toolchain.py --arch arm --api 24 --stl libc++ --install-dir android-standalone-32 +# +# Then set the toolchain path on your environment: +# export PATH="$HOME/.local/share/android-sdk/android-toolchains/android-standalone-32/bin:$PATH" + +[host_machine] +system = 'android' +cpu_family = 'arm' +cpu = 'armv7a' +endian = 'little' + +[properties] +cpp_link_args = ['-llog', '-static-libstdc++'] + +[binaries] +c = 'arm-linux-androideabi-clang' +cpp = 'arm-linux-androideabi-clang++' +ar = 'arm-linux-androideabi-ar' +strip = 'arm-linux-androideabi-strip' +ld = 'arm-linux-androideabi-ld' +ranlib = 'arm-linux-androideabi-ranlib' +as = 'arm-linux-androideabi-as' diff --git a/scripts/appveyor_win_build.cmd b/scripts/appveyor_win_build.cmd new file mode 100644 index 0000000000..14eae16c07 --- /dev/null +++ b/scripts/appveyor_win_build.cmd @@ -0,0 +1,15 @@ +SET PGO=false +IF %APPVEYOR_REPO_TAG%==true IF %DX%==false SET PGO=true +IF %PGO%==false msbuild "C:\projects\lc0\build\lc0.sln" /m /p:WholeProgramOptimization=true /p:DebugInformationFormat=ProgramDatabase /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" +IF %PGO%==true msbuild "C:\projects\lc0\build\lc0.sln" /m /p:WholeProgramOptimization=PGInstrument /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" +cd build +IF %NAME%==cpu-openblas copy C:\cache\OpenBLAS\dist64\bin\libopenblas.dll +IF %NAME%==cpu-dnnl copy C:\cache\dnnl_win_1.1.1_cpu_vcomp\bin\dnnl.dll +IF %PGO%==true ( + IF %OPENCL%==true copy C:\cache\opencl-nug.0.777.77\build\native\bin\OpenCL.dll + IF %CUDA%==true copy "%CUDA_PATH%"\bin\*.dll + IF %CUDA%==true copy %PKG_FOLDER%\cuda\bin\cudnn64_7.dll + lc0 benchmark --weights=c:\cache\591226.pb.gz --backend=random --movetime=10000 +) +cd .. +IF %PGO%==true msbuild "C:\projects\lc0\build\lc0.sln" /m /p:WholeProgramOptimization=PGOptimize /p:DebugInformationFormat=ProgramDatabase /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" diff --git a/scripts/appveyor_win_package.cmd b/scripts/appveyor_win_package.cmd new file mode 100644 index 0000000000..16819a3638 --- /dev/null +++ b/scripts/appveyor_win_package.cmd @@ -0,0 +1,23 @@ +7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip %APPVEYOR_BUILD_FOLDER%\build\lc0.exe +appveyor DownloadFile "https://ci.appveyor.com/api/projects/LeelaChessZero/lczero-client/artifacts/client.exe?branch=release&pr=false&job=Environment%%3A%%20NAME%%3D.exe%%2C%%20GOOS%%3Dwindows" +7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip client.exe +type COPYING |more /P > dist\COPYING +7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip .\dist\COPYING +7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip c:\cache\591226.pb.gz +IF %CUDA%==true copy lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%-nodll.zip +IF %NAME%==cpu-openblas 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip C:\cache\OpenBLAS\dist64\bin\libopenblas.dll +IF %NAME%==cpu-dnnl 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip C:\cache\dnnl_win_1.1.1_cpu_vcomp\bin\dnnl.dll +IF %OPENCL%==true 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip C:\cache\opencl-nug.0.777.77\build\native\bin\OpenCL.dll +IF %CUDA%==true 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip "%CUDA_PATH%\bin\cudart64_100.dll" "%CUDA_PATH%\bin\cublas64_100.dll" +IF %CUDA%==true 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip "%PKG_FOLDER%\cuda\bin\cudnn64_7.dll" +IF %NAME%==cpu-dnnl copy "%PKG_FOLDER%\dnnl_win_1.1.1_cpu_vcomp\LICENSE" dist\DNNL-LICENSE +IF %NAME%==cpu-dnnl 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip .\dist\DNNL-LICENSE +IF %OPENCL%==true type scripts\check_opencl.bat |more /P > dist\check_opencl.bat +IF %OPENCL%==true 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip .\dist\check_opencl.bat +IF %DX%==true type scripts\check_dx.bat |more /P > dist\check_dx.bat +IF %DX%==true 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip .\dist\check_dx.bat +IF %CUDA%==true copy "%CUDA_PATH%\EULA.txt" dist\CUDA.txt +IF %CUDA%==true copy "%PKG_FOLDER%\cuda\NVIDIA_SLA_cuDNN_Support.txt" dist\CUDNN.txt +IF %CUDA%==true type dist\README-cuda.txt |more /P > dist\README.txt +IF %CUDA%==true 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip .\dist\README.txt .\dist\CUDA.txt .\dist\CUDNN.txt +IF EXIST lc0-%APPVEYOR_REPO_TAG_NAME%-windows-gpu-dx12.zip ren lc0-%APPVEYOR_REPO_TAG_NAME%-windows-gpu-dx12.zip lc0-%APPVEYOR_REPO_TAG_NAME%-windows10-gpu-dx12.zip From 787d5de49541876facda2cde5407458ac6a387b7 Mon Sep 17 00:00:00 2001 From: borg323 <39573933+borg323@users.noreply.github.com> Date: Sun, 1 Mar 2020 22:59:24 +0200 Subject: [PATCH 042/151] preserve_path_from is needed to avoid spurious rebuilds (#1103) --- meson.build | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/meson.build b/meson.build index aeaaa2cb34..7a9f749bb6 100644 --- a/meson.build +++ b/meson.build @@ -73,7 +73,8 @@ endif pb_files = [ 'src/utils/protomessage.cc', - gen.process('libs/lczero-common/proto/net.proto') + gen.process('libs/lczero-common/proto/net.proto', + preserve_path_from : meson.current_source_dir() + '/libs/lczero-common/') ] files += pb_files From 08bbb1ff24573ef5094519382d80f7b1db721208 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Forst=C3=A9n?= Date: Wed, 4 Mar 2020 16:08:20 +0200 Subject: [PATCH 043/151] Moves left head (#961) * Moves left head * Only use moves_left when over score threshold * Configurable threshold for moves_left * Modify moves_left logic * Add moves_left scale parameter * Use moves left logic only with supported weight files * Default value for moves left * dx backend doesn't support moves left head * Fix review comments * Keep track of plies left in tree search Use the estimate for better centering the moves left score bonus. * Moves left search logic simplification * Simplify moves left logic * Add comment for game_ends in MakeTerminal * clang-format * Fix missing argument in UglyFunctionToSilenceNvccWarning * Hopefully fix dx backend compile error Can't test this on Linux. * Fix review comments * Better moves left for terminals and TB nodes Propagate moves left for certain nodes and assign moves left for TB nodes based on parent node. --- libs/lczero-common | 2 +- src/mcts/node.cc | 6 +- src/mcts/node.h | 10 ++- src/mcts/params.cc | 22 ++++++ src/mcts/params.h | 9 +++ src/mcts/search.cc | 56 ++++++++++++-- src/mcts/search.h | 13 +++- src/neural/blas/network_blas.cc | 7 +- src/neural/cache.cc | 7 ++ src/neural/cache.h | 5 +- src/neural/cuda/network_cudnn.cc | 113 ++++++++++++++++++++++++---- src/neural/dx/network_dx.cc | 3 +- src/neural/dx/network_dx.h | 4 + src/neural/network.h | 2 + src/neural/network_check.cc | 4 + src/neural/network_demux.cc | 6 ++ src/neural/network_legacy.cc | 7 +- src/neural/network_legacy.h | 7 ++ src/neural/network_mux.cc | 4 + src/neural/network_random.cc | 8 +- src/neural/opencl/network_opencl.cc | 7 +- 21 files changed, 264 insertions(+), 38 deletions(-) diff --git a/libs/lczero-common b/libs/lczero-common index bea03f8911..5b8667e4ab 160000 --- a/libs/lczero-common +++ b/libs/lczero-common @@ -1 +1 @@ -Subproject commit bea03f891184366c6ba17d401dacb662a1a0e4b7 +Subproject commit 5b8667e4ab51e18b2ea26ac221723d6dd8f95533 diff --git a/src/mcts/node.cc b/src/mcts/node.cc index 29962843d2..a9490c9188 100644 --- a/src/mcts/node.cc +++ b/src/mcts/node.cc @@ -217,8 +217,9 @@ std::string Node::DebugString() const { return oss.str(); } -void Node::MakeTerminal(GameResult result) { +void Node::MakeTerminal(GameResult result, float plies_left) { is_terminal_ = true; + m_ = plies_left; if (result == GameResult::DRAW) { wl_ = 0.0f; d_ = 1.0f; @@ -265,10 +266,11 @@ void Node::CancelScoreUpdate(int multivisit) { best_child_cached_ = nullptr; } -void Node::FinalizeScoreUpdate(float v, float d, int multivisit) { +void Node::FinalizeScoreUpdate(float v, float d, float m, int multivisit) { // Recompute Q. wl_ += multivisit * (v - wl_) / (n_ + multivisit); d_ += multivisit * (d - d_) / (n_ + multivisit); + m_ += multivisit * (m - m_) / (n_ + multivisit); // If first visit, update parent's sum of policies visited at least once. if (n_ == 0 && parent_ != nullptr) { diff --git a/src/mcts/node.h b/src/mcts/node.h index cc82028b3e..0ef87b2ee9 100644 --- a/src/mcts/node.h +++ b/src/mcts/node.h @@ -158,13 +158,14 @@ class Node { // for terminal nodes. float GetWL() const { return wl_; } float GetD() const { return d_; } + float GetM() const { return m_; } // Returns whether the node is known to be draw/lose/win. bool IsTerminal() const { return is_terminal_; } uint16_t GetNumEdges() const { return edges_.size(); } // Makes the node terminal and sets it's score. - void MakeTerminal(GameResult result); + void MakeTerminal(GameResult result, float plies_left = 0.0f); // Makes the node not terminal and updates its visits. void MakeNotTerminal(); @@ -180,7 +181,7 @@ class Node { // * Q (weighted average of all V in a subtree) // * N (+=1) // * N-in-flight (-=1) - void FinalizeScoreUpdate(float v, float d, int multivisit); + void FinalizeScoreUpdate(float v, float d, float m, int multivisit); // When search decides to treat one visit as several (in case of collisions // or visiting terminal nodes several times), it amplifies the visit by // incrementing n_in_flight. @@ -276,6 +277,8 @@ class Node { // Averaged draw probability. Works similarly to WL, except that D is not // flipped depending on the side to move. float d_ = 0.0f; + // Estimated remaining plies. + float m_ = 0.0f; // Sum of policy priors which have had at least one playout. float visited_policy_ = 0.0f; // How many completed visits this node had. @@ -352,6 +355,9 @@ class EdgeAndNode { float GetD() const { return (node_ && node_->GetN() > 0) ? node_->GetD() : 0.0f; } + float GetM(float default_m) const { + return (node_ && node_->GetN() > 0) ? node_->GetM() : default_m; + } // N-related getters, from Node (if exists). uint32_t GetN() const { return node_ ? node_->GetN() : 0; } int GetNStarted() const { return node_ ? node_->GetNStarted() : 0; } diff --git a/src/mcts/params.cc b/src/mcts/params.cc index bb357be2db..307d2b7fde 100644 --- a/src/mcts/params.cc +++ b/src/mcts/params.cc @@ -204,6 +204,22 @@ const OptionId SearchParams::kHistoryFillId{ "one. During the first moves of the game such historical positions don't " "exist, but they can be synthesized. This parameter defines when to " "synthesize them (always, never, or only at non-standard fen position)."}; +const OptionId SearchParams::kMovesLeftFactorId{ + "moves-left-factor", "MovesLeftFactor", + "Bonus to add to the score of a node based on how much shorter/longer " + "it makes when winning/losing."}; +const OptionId SearchParams::kMovesLeftThresholdId{ + "moves-left-threshold", "MovesLeftThreshold", + "Absolute value of node Q needs to exceed this value before shorter wins " + "or longer losses are considered."}; +const OptionId SearchParams::kMovesLeftScaleId{ + "moves-left-scale", "MovesLeftScale", + "Controls how the bonus for shorter wins or longer losses is adjusted " + "based on how many moves the move is estimated to shorten/lengthen the " + "game. The move shortening/lengthening the game by this amount of plies " + "or more compared to the best node, gets the full MovesLeftFactor bonus " + "added. Moves shortening/lengthening by less amount of moves have bonus " + "scaled linearly."}; const OptionId SearchParams::kShortSightednessId{ "short-sightedness", "ShortSightedness", "Used to focus more on short term gains over long term."}; @@ -276,6 +292,9 @@ void SearchParams::Populate(OptionsParser* options) { options->Add(kScoreTypeId, score_type) = "centipawn"; std::vector history_fill_opt{"no", "fen_only", "always"}; options->Add(kHistoryFillId, history_fill_opt) = "fen_only"; + options->Add(kMovesLeftFactorId, 0.0f, 1.0f) = 0.0f; + options->Add(kMovesLeftThresholdId, 0.0f, 1.0f) = 1.0f; + options->Add(kMovesLeftScaleId, 1.0f, 100.0f) = 10.0f; options->Add(kShortSightednessId, 0.0f, 1.0f) = 0.0f; options->Add(kDisplayCacheUsageId) = false; options->Add(kMaxConcurrentSearchersId, 0, 128) = 1; @@ -334,6 +353,9 @@ SearchParams::SearchParams(const OptionsDict& options) kHistoryFill( EncodeHistoryFill(options.Get(kHistoryFillId.GetId()))), kMiniBatchSize(options.Get(kMiniBatchSizeId.GetId())), + kMovesLeftFactor(options.Get(kMovesLeftFactorId.GetId())), + kMovesLeftThreshold(options.Get(kMovesLeftThresholdId.GetId())), + kMovesLeftScale(options.Get(kMovesLeftScaleId.GetId())), kShortSightedness(options.Get(kShortSightednessId.GetId())), kDisplayCacheUsage(options.Get(kDisplayCacheUsageId.GetId())), kMaxConcurrentSearchers( diff --git a/src/mcts/params.h b/src/mcts/params.h index d53f85b369..1dae56e8ca 100644 --- a/src/mcts/params.h +++ b/src/mcts/params.h @@ -103,6 +103,9 @@ class SearchParams { return options_.Get(kScoreTypeId.GetId()); } FillEmptyHistory GetHistoryFill() const { return kHistoryFill; } + float GetMovesLeftFactor() const { return kMovesLeftFactor; } + float GetMovesLeftThreshold() const { return kMovesLeftThreshold; } + float GetMovesLeftScale() const { return kMovesLeftScale; } bool GetDisplayCacheUsage() const { return kDisplayCacheUsage; } int GetMaxConcurrentSearchers() const { return kMaxConcurrentSearchers; } float GetSidetomoveDrawScore() const { return kDrawScoreSidetomove; } @@ -147,6 +150,9 @@ class SearchParams { static const OptionId kPerPvCountersId; static const OptionId kScoreTypeId; static const OptionId kHistoryFillId; + static const OptionId kMovesLeftFactorId; + static const OptionId kMovesLeftThresholdId; + static const OptionId kMovesLeftScaleId; static const OptionId kShortSightednessId; static const OptionId kDisplayCacheUsageId; static const OptionId kMaxConcurrentSearchersId; @@ -185,6 +191,9 @@ class SearchParams { const bool kSyzygyFastPlay; const FillEmptyHistory kHistoryFill; const int kMiniBatchSize; + const float kMovesLeftFactor; + const float kMovesLeftThreshold; + const float kMovesLeftScale; const float kShortSightedness; const bool kDisplayCacheUsage; const int kMaxConcurrentSearchers; diff --git a/src/mcts/search.cc b/src/mcts/search.cc index c10f177597..1cd0370103 100644 --- a/src/mcts/search.cc +++ b/src/mcts/search.cc @@ -278,6 +278,9 @@ std::vector Search::GetVerboseStats(Node* node) const { oss << "(D: " << std::setw(6) << std::setprecision(3) << edge.GetD() << ") "; + oss << "(M: " << std::setw(4) << std::setprecision(1) << edge.GetM(0.0f) + << ") "; + oss << "(Q: " << std::setw(8) << std::setprecision(5) << edge.GetQ(fpu, draw_score) << ") "; @@ -877,6 +880,7 @@ SearchWorker::NodeToProcess SearchWorker::PickNodeToExtend( } best_edge.Reset(); depth++; + // n_in_flight_ is incremented. If the method returns false, then there is // a search collision, and this node is already being expanded. if (!node->TryStartScoreUpdate()) { @@ -915,6 +919,12 @@ SearchWorker::NodeToProcess SearchWorker::PickNodeToExtend( const float draw_score = (depth % 2 == 0) ? odd_draw_score : even_draw_score; const float fpu = GetFpu(params_, node, is_root_node, draw_score); + + const float node_q = node->GetQ(0.0f); + const bool do_moves_left_adjustment = + moves_left_support_ && + (std::abs(node_q) > params_.GetMovesLeftThreshold()); + for (auto child : node->Edges()) { if (is_root_node) { // If there's no chance to catch up to the current best node with @@ -934,8 +944,18 @@ SearchWorker::NodeToProcess SearchWorker::PickNodeToExtend( continue; } } + + float M = 0.0f; + if (do_moves_left_adjustment) { + const float m_scale = params_.GetMovesLeftScale(); + const float parent_m = node->GetM(); + const float child_m = child.GetM(parent_m); + M = std::clamp(child_m - parent_m, -m_scale, m_scale) / m_scale * + std::copysign(params_.GetMovesLeftFactor(), node_q); + } + const float Q = child.GetQ(fpu, draw_score, params_.GetLogitQ()); - const float score = child.GetU(puct_mult) + Q; + const float score = child.GetU(puct_mult) + Q + M; if (score > best) { second_best = best; second_best_edge = best_edge; @@ -1028,13 +1048,19 @@ void SearchWorker::ExtendNode(Node* node) { // Only fail state means the WDL is wrong, probe_wdl may produce correct // result with a stat other than OK. if (state != FAIL) { + // TB nodes don't have NN evaluation, assign M from parent node. + float m = 0.0f; + auto parent = node->GetParent(); + if (parent) { + m = std::max(0.0f, parent->GetM() - 1.0f); + } // If the colors seem backwards, check the checkmate check above. if (wdl == WDL_WIN) { - node->MakeTerminal(GameResult::BLACK_WON); + node->MakeTerminal(GameResult::BLACK_WON, m); } else if (wdl == WDL_LOSS) { - node->MakeTerminal(GameResult::WHITE_WON); + node->MakeTerminal(GameResult::WHITE_WON, m); } else { // Cursed wins and blessed losses count as draws. - node->MakeTerminal(GameResult::DRAW); + node->MakeTerminal(GameResult::DRAW, m); } search_->tb_hits_.fetch_add(1, std::memory_order_acq_rel); return; @@ -1209,12 +1235,14 @@ void SearchWorker::FetchSingleNodeResult(NodeToProcess* node_to_process, // they require any further processing after value retrieval. node_to_process->v = node->GetWL(); node_to_process->d = node->GetD(); + node_to_process->m = node->GetM(); return; } // For NN results, we need to populate policy as well as value. // First the value... node_to_process->v = -computation_->GetQVal(idx_in_computation); node_to_process->d = computation_->GetDVal(idx_in_computation); + node_to_process->m = computation_->GetMVal(idx_in_computation); // ...and secondly, the policy data. // Calculate maximum first. float max_p = -std::numeric_limits::infinity(); @@ -1279,6 +1307,7 @@ void SearchWorker::DoBackupUpdateSingleNode( // Backup V value up to a root. After 1 visit, V = Q. float v = node_to_process.v; float d = node_to_process.d; + float m = node_to_process.m; int depth = 0; for (Node *n = node, *p; n != search_->root_node_->GetParent(); n = p) { p = n->GetParent(); @@ -1288,9 +1317,10 @@ void SearchWorker::DoBackupUpdateSingleNode( if (n->IsTerminal()) { v = n->GetWL(); d = n->GetD(); + m = n->GetM(); } n->FinalizeScoreUpdate(v / (1.0f + params_.GetShortSightedness() * depth), - d, node_to_process.multivisit); + d, m, node_to_process.multivisit); // Nothing left to do without ancestors to update. if (!p) break; @@ -1300,11 +1330,14 @@ void SearchWorker::DoBackupUpdateSingleNode( // A non-winning terminal move needs all other moves to be similar. auto all_losing = true; + float losing_m = 0.0f; if (can_convert && v <= 0.0f) { for (const auto& edge : p->Edges()) { const auto WL = edge.GetWL(); can_convert = can_convert && edge.IsTerminal() && WL <= 0.0f; + if (!can_convert) break; all_losing = all_losing && WL < 0.0f; + losing_m = std::max(losing_m, edge.GetM(0.0f)); } } @@ -1312,14 +1345,21 @@ void SearchWorker::DoBackupUpdateSingleNode( // to a terminal win if all moves are losing; otherwise there's a mix of // draws and losing, so at best it's a draw. if (can_convert) { - p->MakeTerminal(v > 0.0f ? GameResult::BLACK_WON - : all_losing ? GameResult::WHITE_WON - : GameResult::DRAW); + // Doesn't give the correct distance to mate because siblings are not + // considered but more accurate than doing nothing. This shouldn't + // underestimate the distance to mate since at worst we miss shorter + // moves. + float terminal_m = std::max(losing_m, m) + 1.0f; + p->MakeTerminal( + v > 0.0f ? GameResult::BLACK_WON + : all_losing ? GameResult::WHITE_WON : GameResult::DRAW, + terminal_m); } // Q will be flipped for opponent. v = -v; depth++; + m++; // Update the stats. // Best move. diff --git a/src/mcts/search.h b/src/mcts/search.h index 7442b48fc7..6568517522 100644 --- a/src/mcts/search.h +++ b/src/mcts/search.h @@ -175,7 +175,7 @@ class Search { int64_t total_playouts_ GUARDED_BY(nodes_mutex_) = 0; // Maximum search depth = length of longest path taken in PickNodetoExtend. uint16_t max_depth_ GUARDED_BY(nodes_mutex_) = 0; - // Cummulative depth of all paths taken in PickNodetoExtend. + // Cumulative depth of all paths taken in PickNodetoExtend. uint64_t cum_depth_ GUARDED_BY(nodes_mutex_) = 0; std::optional nps_start_time_; std::atomic tb_hits_{0}; @@ -194,7 +194,11 @@ class Search { class SearchWorker { public: SearchWorker(Search* search, const SearchParams& params) - : search_(search), history_(search_->played_history_), params_(params) {} + : search_(search), + history_(search_->played_history_), + params_(params), + moves_left_support_(search_->network_->GetCapabilities().moves_left != + pblczero::NetworkFormat::MOVES_LEFT_NONE) {} // Runs iterations while needed. void RunBlocking() { @@ -257,8 +261,10 @@ class SearchWorker { Node* node; // Value from NN's value head, or -1/0/1 for terminal nodes. float v; - // Draw probability for NN's with WDL value head + // Draw probability for NN's with WDL value head. float d; + // Estimated remaining plies left. + float m; int multivisit = 0; uint16_t depth; bool nn_queried = false; @@ -300,6 +306,7 @@ class SearchWorker { int number_out_of_order_ = 0; const SearchParams& params_; std::unique_ptr precached_node_; + const bool moves_left_support_; IterationStats iteration_stats_; StoppersHints latest_time_manager_hints_; }; diff --git a/src/neural/blas/network_blas.cc b/src/neural/blas/network_blas.cc index 5fd10baf8d..76f8e8440a 100644 --- a/src/neural/blas/network_blas.cc +++ b/src/neural/blas/network_blas.cc @@ -80,6 +80,10 @@ class BlasComputation : public NetworkComputation { } } + float GetMVal(int /* sample */) const override { + return 0.0f; + } + // Returns P value @move_id of @sample. float GetPVal(int sample, int move_id) const override { return policies_[sample][move_id]; @@ -364,7 +368,8 @@ void BlasComputation::EncodePlanes(const InputPlanes& sample, float* buffer) { } BlasNetwork::BlasNetwork(const WeightsFile& file, const OptionsDict& options) - : capabilities_{file.format().network_format().input()}, + : capabilities_{file.format().network_format().input(), + pblczero::NetworkFormat::MOVES_LEFT_NONE}, weights_(file.weights()) { #ifndef USE_EIGEN blas_cores_ = options.GetOrDefault("blas_cores", 1); diff --git a/src/neural/cache.cc b/src/neural/cache.cc index b6ee6f755a..111413fb45 100644 --- a/src/neural/cache.cc +++ b/src/neural/cache.cc @@ -83,6 +83,7 @@ void CachingComputation::ComputeBlocking() { std::make_unique(item.probabilities_to_cache.size()); req->q = parent_->GetQVal(item.idx_in_parent); req->d = parent_->GetDVal(item.idx_in_parent); + req->m = parent_->GetMVal(item.idx_in_parent); int idx = 0; for (auto x : item.probabilities_to_cache) { req->p[idx++] = @@ -104,6 +105,12 @@ float CachingComputation::GetDVal(int sample) const { return item.lock->d; } +float CachingComputation::GetMVal(int sample) const { + const auto& item = batch_[sample]; + if (item.idx_in_parent >= 0) return parent_->GetMVal(item.idx_in_parent); + return item.lock->m; +} + float CachingComputation::GetPVal(int sample, int move_id) const { auto& item = batch_[sample]; if (item.idx_in_parent >= 0) diff --git a/src/neural/cache.h b/src/neural/cache.h index 9aaaa570c7..7f1e8ecb10 100644 --- a/src/neural/cache.h +++ b/src/neural/cache.h @@ -37,6 +37,7 @@ struct CachedNNRequest { typedef std::pair IdxAndProb; float q; float d; + float m; // TODO(mooskagh) Don't really need index if using perfect hash. SmallArray p; }; @@ -72,8 +73,10 @@ class CachingComputation { void ComputeBlocking(); // Returns Q value of @sample. float GetQVal(int sample) const; - // Returns probability of draw if NN has WDL value head + // Returns probability of draw if NN has WDL value head. float GetDVal(int sample) const; + // Returns estimated remaining moves. + float GetMVal(int sample) const; // Returns P value @move_id of @sample. float GetPVal(int sample, int move_id) const; // Pops last input from the computation. Only allowed for inputs which were diff --git a/src/neural/cuda/network_cudnn.cc b/src/neural/cuda/network_cudnn.cc index 94a6d46bd6..4759cd9eed 100644 --- a/src/neural/cuda/network_cudnn.cc +++ b/src/neural/cuda/network_cudnn.cc @@ -79,7 +79,7 @@ void dumpTensor(void *memory, int elements, char *message, bool fp16 = false) #endif struct InputsOutputs { - InputsOutputs(int maxBatchSize, bool wdl) { + InputsOutputs(int maxBatchSize, bool wdl, bool moves_left) { ReportCUDAErrors(cudaHostAlloc( &input_masks_mem_, maxBatchSize * kInputPlanes * sizeof(uint64_t), cudaHostAllocMapped)); @@ -106,6 +106,13 @@ struct InputsOutputs { cudaHostAllocMapped)); ReportCUDAErrors( cudaHostGetDevicePointer(&op_value_mem_gpu_, op_value_mem_, 0)); + if (moves_left) { + ReportCUDAErrors(cudaHostAlloc(&op_moves_left_mem_, + maxBatchSize * sizeof(float), + cudaHostAllocMapped)); + ReportCUDAErrors(cudaHostGetDevicePointer(&op_moves_left_mem_gpu_, + op_moves_left_mem_, 0)); + } } ~InputsOutputs() { ReportCUDAErrors(cudaFreeHost(input_masks_mem_)); @@ -118,11 +125,13 @@ struct InputsOutputs { float* input_val_mem_; float* op_policy_mem_; float* op_value_mem_; + float* op_moves_left_mem_; // GPU pointers for the above allocations. uint64_t* input_masks_mem_gpu_; float* input_val_mem_gpu_; float* op_value_mem_gpu_; + float* op_moves_left_mem_gpu_; // This is a seperate copy. float* op_policy_mem_gpu_; @@ -134,7 +143,8 @@ class CudnnNetwork; template class CudnnNetworkComputation : public NetworkComputation { public: - CudnnNetworkComputation(CudnnNetwork* network, bool wdl); + CudnnNetworkComputation(CudnnNetwork* network, bool wdl, + bool moves_left); ~CudnnNetworkComputation(); void AddInput(InputPlanes&& input) override { @@ -180,11 +190,19 @@ class CudnnNetworkComputation : public NetworkComputation { return inputs_outputs_->op_policy_mem_[sample * kNumOutputPolicy + move_id]; } + float GetMVal(int sample) const override { + if (moves_left_) { + return inputs_outputs_->op_moves_left_mem_[sample]; + } + return 0.0f; + } + private: // Memory holding inputs, outputs. std::unique_ptr inputs_outputs_; int batch_size_; bool wdl_; + bool moves_left_; CudnnNetwork* network_; }; @@ -193,7 +211,8 @@ template class CudnnNetwork : public Network { public: CudnnNetwork(const WeightsFile& file, const OptionsDict& options) - : capabilities_{file.format().network_format().input()} { + : capabilities_{file.format().network_format().input(), + file.format().network_format().moves_left()} { LegacyWeights weights(file.weights()); gpu_id_ = options.GetOrDefault("gpu", 0); @@ -433,6 +452,31 @@ class CudnnNetwork : public Network { } value_out_ = getLastLayer(); + // Moves left head + moves_left_ = file.format().network_format().moves_left() == + pblczero::NetworkFormat::MOVES_LEFT_V1; + if (moves_left_) { + auto convMov = std::make_unique>( + resi_last_, weights.moves_left.biases.size(), 8, 8, 1, kNumFilters, + true, true); + convMov->LoadWeights(&weights.moves_left.weights[0], + &weights.moves_left.biases[0], scratch_mem_); + network_.emplace_back(std::move(convMov)); + + auto FCMov1 = std::make_unique>( + getLastLayer(), weights.ip1_mov_b.size(), 1, 1, true, true); + FCMov1->LoadWeights(&weights.ip1_mov_w[0], &weights.ip1_mov_b[0], + scratch_mem_); + network_.emplace_back(std::move(FCMov1)); + + auto FCMov2 = std::make_unique>(getLastLayer(), 1, 1, 1, + true, true); + FCMov2->LoadWeights(&weights.ip2_mov_w[0], &weights.ip2_mov_b[0], + scratch_mem_); + network_.emplace_back(std::move(FCMov2)); + } + moves_left_out_ = getLastLayer(); + // 3. Allocate GPU memory for running the network: // - three buffers of max size are enough (one to hold input, second to // hold output and third to hold skip connection's input). @@ -489,6 +533,7 @@ class CudnnNetwork : public Network { float* opPol = io->op_policy_mem_gpu_; float* opVal = io->op_value_mem_gpu_; + float* opMov = io->op_moves_left_mem_gpu_; int l = 0; // Input. @@ -540,7 +585,7 @@ class CudnnNetwork : public Network { } else { network_[l++]->Eval(batchSize, (DataType*)opPol, tensor_mem_[1], nullptr, scratch_mem_, scratch_size_, cudnn_, - cublas_); //policy map layer // POLICY output + cublas_); // policy map layer // POLICY output } } else { network_[l++]->Eval(batchSize, tensor_mem_[0], tensor_mem_[2], nullptr, @@ -576,37 +621,60 @@ class CudnnNetwork : public Network { cublas_); // value FC1 if (wdl_) { - network_[l++]->Eval(batchSize, tensor_mem_[2], tensor_mem_[1], nullptr, + network_[l++]->Eval(batchSize, tensor_mem_[0], tensor_mem_[1], nullptr, scratch_mem_, scratch_size_, cudnn_, cublas_); // value FC2 // VALUE // Value softmax if (fp16) { // TODO: consider fusing the bias-add of FC2 with format conversion. - network_[l++]->Eval(batchSize, tensor_mem_[0], tensor_mem_[2], nullptr, + network_[l++]->Eval(batchSize, tensor_mem_[1], tensor_mem_[0], nullptr, scratch_mem_, scratch_size_, cudnn_, cublas_); // value FC2 - copyTypeConverted(opVal, (half*)(tensor_mem_[0]), + copyTypeConverted(opVal, (half*)(tensor_mem_[1]), 3 * batchSize); // VALUE } else { - network_[l++]->Eval(batchSize, (DataType*)opVal, tensor_mem_[2], + network_[l++]->Eval(batchSize, (DataType*)opVal, tensor_mem_[0], nullptr, scratch_mem_, scratch_size_, cudnn_, cublas_); // value FC2 // VALUE } } else { if (fp16) { // TODO: consider fusing the bias-add of FC2 with format conversion. - network_[l++]->Eval(batchSize, tensor_mem_[2], tensor_mem_[1], nullptr, + network_[l++]->Eval(batchSize, tensor_mem_[0], tensor_mem_[1], nullptr, scratch_mem_, scratch_size_, cudnn_, cublas_); // value FC2 - - copyTypeConverted(opVal, (half*)(tensor_mem_[2]), batchSize); // VALUE + copyTypeConverted(opVal, (half*)(tensor_mem_[0]), batchSize); // VALUE } else { network_[l++]->Eval(batchSize, (DataType*)opVal, tensor_mem_[1], nullptr, scratch_mem_, scratch_size_, cudnn_, cublas_); // value FC2 // VALUE } } + + if (moves_left_) { + // Moves left head + network_[l++]->Eval(batchSize, tensor_mem_[0], tensor_mem_[2], nullptr, + scratch_mem_, scratch_size_, cudnn_, + cublas_); // moves conv + + network_[l++]->Eval(batchSize, tensor_mem_[1], tensor_mem_[0], nullptr, + scratch_mem_, scratch_size_, cudnn_, + cublas_); // moves FC1 + + // Moves left FC2 + if (fp16) { + // TODO: consider fusing the bias-add of FC2 with format conversion. + network_[l++]->Eval(batchSize, tensor_mem_[0], tensor_mem_[1], nullptr, + scratch_mem_, scratch_size_, cudnn_, cublas_); + copyTypeConverted(opMov, (half*)(tensor_mem_[0]), batchSize); + } else { + network_[l++]->Eval(batchSize, (DataType*)opMov, tensor_mem_[1], + nullptr, scratch_mem_, scratch_size_, cudnn_, + cublas_); + } + } + ReportCUDAErrors(cudaDeviceSynchronize()); #ifdef DEBUG_RAW_NPS @@ -652,13 +720,15 @@ class CudnnNetwork : public Network { // Set correct gpu id for this computation (as it might have been called // from a different thread). ReportCUDAErrors(cudaSetDevice(gpu_id_)); - return std::make_unique>(this, wdl_); + return std::make_unique>(this, wdl_, + moves_left_); } std::unique_ptr GetInputsOutputs() { std::lock_guard lock(inputs_outputs_lock_); if (free_inputs_outputs_.empty()) { - return std::make_unique(max_batch_size_, wdl_); + return std::make_unique(max_batch_size_, wdl_, + moves_left_); } else { std::unique_ptr resource = std::move(free_inputs_outputs_.front()); @@ -675,7 +745,7 @@ class CudnnNetwork : public Network { // Apparently nvcc doesn't see constructor invocations through make_unique. // This function invokes constructor just to please complier and silence // warning. Is never called (but compiler thinks that it could). - void UglyFunctionToSilenceNvccWarning() { InputsOutputs io(0, false); } + void UglyFunctionToSilenceNvccWarning() { InputsOutputs io(0, false, false); } private: const NetworkCapabilities capabilities_; @@ -684,6 +754,7 @@ class CudnnNetwork : public Network { int gpu_id_; int max_batch_size_; bool wdl_; + bool moves_left_; bool nhwc_; // do we want to use nhwc layout (fastest with fp16 with tensor // cores) @@ -701,6 +772,7 @@ class CudnnNetwork : public Network { BaseLayer* resi_last_; BaseLayer* policy_out_; BaseLayer* value_out_; + BaseLayer* moves_left_out_; DataType* tensor_mem_[3]; void* scratch_mem_; @@ -775,8 +847,8 @@ class CudnnNetwork : public Network { template CudnnNetworkComputation::CudnnNetworkComputation( - CudnnNetwork* network, bool wdl) - : wdl_(wdl), network_(network) { + CudnnNetwork* network, bool wdl, bool moves_left) + : wdl_(wdl), moves_left_(moves_left), network_(network) { batch_size_ = 0; inputs_outputs_ = network_->GetInputsOutputs(); } @@ -819,6 +891,15 @@ std::unique_ptr MakeCudnnNetwork(const WeightsFile& weights, std::to_string(weights.format().network_format().value()) + " is not supported by CuDNN backend."); } + if (weights.format().network_format().moves_left() != + pblczero::NetworkFormat::MOVES_LEFT_NONE && + weights.format().network_format().moves_left() != + pblczero::NetworkFormat::MOVES_LEFT_V1) { + throw Exception( + "Movest left head format " + + std::to_string(weights.format().network_format().moves_left()) + + " is not supported by CuDNN backend."); + } return std::make_unique>(weights, options); } diff --git a/src/neural/dx/network_dx.cc b/src/neural/dx/network_dx.cc index 30a6108b2b..03c1492d38 100644 --- a/src/neural/dx/network_dx.cc +++ b/src/neural/dx/network_dx.cc @@ -373,7 +373,8 @@ void DxContext::ScheduleUpload(DXAlloc alloc, const void* data, size_t size) { DxNetwork::DxNetwork(const WeightsFile& file, const OptionsDict& options) : dx_context_(options), - capabilities_{file.format().network_format().input()} { + capabilities_{file.format().network_format().input(), + pblczero::NetworkFormat::MOVES_LEFT_NONE} { LegacyWeights weights(file.weights()); has_conv_policy_ = file.format().network_format().policy() == diff --git a/src/neural/dx/network_dx.h b/src/neural/dx/network_dx.h index 98e9bc8fde..db2ee46408 100644 --- a/src/neural/dx/network_dx.h +++ b/src/neural/dx/network_dx.h @@ -121,6 +121,10 @@ class DxNetworkComputation : public NetworkComputation { ->op_policy_mem_final_[sample * kNumOutputPolicy + move_id]; } + float GetMVal(int /* sample */) const override { + return 0.0f; + } + private: // Memory holding inputs, outputs. std::unique_ptr inputs_outputs_; diff --git a/src/neural/network.h b/src/neural/network.h index 3911592bb8..b2173bbe46 100644 --- a/src/neural/network.h +++ b/src/neural/network.h @@ -66,6 +66,7 @@ class NetworkComputation { virtual float GetDVal(int sample) const = 0; // Returns P value @move_id of @sample. virtual float GetPVal(int sample, int move_id) const = 0; + virtual float GetMVal(int sample) const = 0; virtual ~NetworkComputation() {} }; @@ -83,6 +84,7 @@ class NetworkComputation { struct NetworkCapabilities { pblczero::NetworkFormat::InputFormat input_format; + pblczero::NetworkFormat::MovesLeftFormat moves_left; // TODO expose information of whether GetDVal() is usable or always zero. // Combines capabilities by setting the most restrictive ones. May throw diff --git a/src/neural/network_check.cc b/src/neural/network_check.cc index 3834cdf6b2..fa77a0706a 100644 --- a/src/neural/network_check.cc +++ b/src/neural/network_check.cc @@ -97,6 +97,10 @@ class CheckComputation : public NetworkComputation { return work_comp_->GetDVal(sample); } + float GetMVal(int sample) const override { + return work_comp_->GetMVal(sample); + } + float GetPVal(int sample, int move_id) const override { return work_comp_->GetPVal(sample, move_id); } diff --git a/src/neural/network_demux.cc b/src/neural/network_demux.cc index d7aaf674b4..a6c272f093 100644 --- a/src/neural/network_demux.cc +++ b/src/neural/network_demux.cc @@ -58,6 +58,12 @@ class DemuxingComputation : public NetworkComputation { return parents_[idx]->GetDVal(offset); } + float GetMVal(int sample) const override { + int idx = sample / partial_size_; + int offset = sample % partial_size_; + return parents_[idx]->GetMVal(offset); + } + float GetPVal(int sample, int move_id) const override { const int idx = sample / partial_size_; const int offset = sample % partial_size_; diff --git a/src/neural/network_legacy.cc b/src/neural/network_legacy.cc index f34733b9c7..954baa04be 100644 --- a/src/neural/network_legacy.cc +++ b/src/neural/network_legacy.cc @@ -38,7 +38,12 @@ LegacyWeights::LegacyWeights(const pblczero::Weights& weights) ip1_val_w(LayerAdapter(weights.ip1_val_w()).as_vector()), ip1_val_b(LayerAdapter(weights.ip1_val_b()).as_vector()), ip2_val_w(LayerAdapter(weights.ip2_val_w()).as_vector()), - ip2_val_b(LayerAdapter(weights.ip2_val_b()).as_vector()) { + ip2_val_b(LayerAdapter(weights.ip2_val_b()).as_vector()), + moves_left(weights.moves_left()), + ip1_mov_w(LayerAdapter(weights.ip1_mov_w()).as_vector()), + ip1_mov_b(LayerAdapter(weights.ip1_mov_b()).as_vector()), + ip2_mov_w(LayerAdapter(weights.ip2_mov_w()).as_vector()), + ip2_mov_b(LayerAdapter(weights.ip2_mov_b()).as_vector()) { for (const auto& res : weights.residual()) { residual.emplace_back(res); } diff --git a/src/neural/network_legacy.h b/src/neural/network_legacy.h index a12973b443..fa3d34ed6d 100644 --- a/src/neural/network_legacy.h +++ b/src/neural/network_legacy.h @@ -78,6 +78,13 @@ struct LegacyWeights { Vec ip1_val_b; Vec ip2_val_w; Vec ip2_val_b; + + // Moves left head + ConvBlock moves_left; + Vec ip1_mov_w; + Vec ip1_mov_b; + Vec ip2_mov_w; + Vec ip2_mov_b; }; } // namespace lczero diff --git a/src/neural/network_mux.cc b/src/neural/network_mux.cc index be84c52546..3c8b0ff109 100644 --- a/src/neural/network_mux.cc +++ b/src/neural/network_mux.cc @@ -54,6 +54,10 @@ class MuxingComputation : public NetworkComputation { return parent_->GetDVal(sample + idx_in_parent_); } + float GetMVal(int sample) const override { + return parent_->GetMVal(sample + idx_in_parent_); + } + float GetPVal(int sample, int move_id) const override { return parent_->GetPVal(sample + idx_in_parent_, move_id); } diff --git a/src/neural/network_random.cc b/src/neural/network_random.cc index f0542e36e2..d353f07df2 100644 --- a/src/neural/network_random.cc +++ b/src/neural/network_random.cc @@ -78,6 +78,10 @@ class RandomNetworkComputation : public NetworkComputation { return d; } + float GetMVal(int /* sample */) const override { + return 0.0f; + } + float GetPVal(int sample, int move_id) const override { if (uniform_mode_) return 1.0f; @@ -118,7 +122,9 @@ class RandomNetwork : public Network { int seed_ = 0; bool uniform_mode_ = false; NetworkCapabilities capabilities_{ - pblczero::NetworkFormat::INPUT_CLASSICAL_112_PLANE}; + pblczero::NetworkFormat::INPUT_CLASSICAL_112_PLANE, + pblczero::NetworkFormat::MOVES_LEFT_NONE + }; }; } // namespace diff --git a/src/neural/opencl/network_opencl.cc b/src/neural/opencl/network_opencl.cc index 8b8232599b..86589d1be2 100644 --- a/src/neural/opencl/network_opencl.cc +++ b/src/neural/opencl/network_opencl.cc @@ -162,6 +162,10 @@ class OpenCLComputation : public NetworkComputation { } } + float GetMVal(int /* sample */) const override { + return 0.0f; + } + // Returns P value @move_id of @sample. float GetPVal(int sample, int move_id) const override { return policies_[sample][move_id]; @@ -200,7 +204,8 @@ class OpenCLNetwork : public Network { virtual ~OpenCLNetwork(){}; OpenCLNetwork(const WeightsFile& file, const OptionsDict& options) - : capabilities_{file.format().network_format().input()}, + : capabilities_{file.format().network_format().input(), + pblczero::NetworkFormat::MOVES_LEFT_NONE}, weights_(file), params_(), opencl_(), From f6306b350a59be774d2194b0146a3f7657243729 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Forst=C3=A9n?= Date: Wed, 4 Mar 2020 17:53:45 +0200 Subject: [PATCH 044/151] Increase 32-bit node size in assert (#1108) --- src/mcts/node.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mcts/node.h b/src/mcts/node.h index 0ef87b2ee9..8ed79d2ed3 100644 --- a/src/mcts/node.h +++ b/src/mcts/node.h @@ -317,7 +317,7 @@ class Node { // A basic sanity check. This must be adjusted when Node members are adjusted. #if defined(__i386__) || (defined(__arm__) && !defined(__aarch64__)) -static_assert(sizeof(Node) == 52, "Unexpected size of Node for 32bit compile"); +static_assert(sizeof(Node) == 56, "Unexpected size of Node for 32bit compile"); #else static_assert(sizeof(Node) == 80, "Unexpected size of Node"); #endif From 24667f61f8835b0e09a5d51b7e599c601798f147 Mon Sep 17 00:00:00 2001 From: Tilps Date: Sat, 7 Mar 2020 15:06:20 +1100 Subject: [PATCH 045/151] Add option to only let smart pruning to stop after at least k batches. (#1113) * Add option to only prune after at least k batches. * Make help text clearer. * Fix increment location. * Review feedback. --- src/mcts/search.cc | 2 ++ src/mcts/search.h | 1 + src/mcts/stoppers/factory.cc | 12 ++++++++++-- src/mcts/stoppers/stoppers.cc | 10 +++++++--- src/mcts/stoppers/stoppers.h | 3 ++- src/mcts/stoppers/timemgr.h | 1 + 6 files changed, 23 insertions(+), 6 deletions(-) diff --git a/src/mcts/search.cc b/src/mcts/search.cc index 1cd0370103..7b1ba99603 100644 --- a/src/mcts/search.cc +++ b/src/mcts/search.cc @@ -613,6 +613,7 @@ void Search::PopulateCommonIterationStats(IterationStats* stats) { } stats->total_nodes = total_playouts_ + initial_visits_; stats->nodes_since_movestart = total_playouts_; + stats->batches_since_movestart = total_batches_; stats->average_depth = cum_depth_ / (total_playouts_ ? total_playouts_ : 1); stats->edge_n.clear(); for (const auto& edge : root_node_->Edges()) { @@ -1286,6 +1287,7 @@ void SearchWorker::DoBackupUpdate() { for (const NodeToProcess& node_to_process : minibatch_) { DoBackupUpdateSingleNode(node_to_process); } + search_->total_batches_ += 1; } void SearchWorker::DoBackupUpdateSingleNode( diff --git a/src/mcts/search.h b/src/mcts/search.h index 6568517522..ae6aa03d0a 100644 --- a/src/mcts/search.h +++ b/src/mcts/search.h @@ -173,6 +173,7 @@ class Search { Edge* last_outputted_info_edge_ GUARDED_BY(nodes_mutex_) = nullptr; ThinkingInfo last_outputted_uci_info_ GUARDED_BY(nodes_mutex_); int64_t total_playouts_ GUARDED_BY(nodes_mutex_) = 0; + int64_t total_batches_ GUARDED_BY(nodes_mutex_) = 0; // Maximum search depth = length of longest path taken in PickNodetoExtend. uint16_t max_depth_ GUARDED_BY(nodes_mutex_) = 0; // Cumulative depth of all paths taken in PickNodetoExtend. diff --git a/src/mcts/stoppers/factory.cc b/src/mcts/stoppers/factory.cc index da312e25c8..57ab702f5f 100644 --- a/src/mcts/stoppers/factory.cc +++ b/src/mcts/stoppers/factory.cc @@ -29,6 +29,7 @@ #include +#include "factory.h" #include "mcts/stoppers/stoppers.h" namespace lczero { @@ -91,6 +92,11 @@ const OptionId kSmartPruningFactorId{ "promising moves from being considered even earlier. Values less than 1 " "causes hopeless moves to still have some attention. When set to 0, smart " "pruning is deactivated."}; +const OptionId kMinimumSmartPruningBatchesId{ + "smart-pruning-minimum-batches", "SmartPruningMinimumBatches", + "Only allow smart pruning to stop search after at least this many batches " + "have been evaluated. It may be useful to have this value greater than the " + "number of search threads in use."}; } // namespace @@ -99,6 +105,7 @@ void PopulateTimeManagementOptions(RunType for_what, OptionsParser* options) { options->Add(kMinimumKLDGainPerNodeId, 0.0f, 1.0f) = 0.0f; options->Add(kSmartPruningFactorId, 0.0f, 10.0f) = (for_what == RunType::kUci ? 1.33f : 0.00f); + options->Add(kMinimumSmartPruningBatchesId, 0, 10000) = 0; if (for_what == RunType::kUci) { options->Add(kRamLimitMbId, 0, 100000000) = 0; @@ -129,8 +136,9 @@ void PopulateIntrinsicStoppers(ChainedSearchStopper* stopper, const auto smart_pruning_factor = options.Get(kSmartPruningFactorId.GetId()); if (smart_pruning_factor > 0.0f) { - stopper->AddStopper( - std::make_unique(smart_pruning_factor)); + stopper->AddStopper(std::make_unique( + smart_pruning_factor, + options.Get(kMinimumSmartPruningBatchesId.GetId()))); } } diff --git a/src/mcts/stoppers/stoppers.cc b/src/mcts/stoppers/stoppers.cc index 6783557ef9..6c3a9cf08b 100644 --- a/src/mcts/stoppers/stoppers.cc +++ b/src/mcts/stoppers/stoppers.cc @@ -177,8 +177,10 @@ const int kSmartPruningToleranceMs = 200; const int kSmartPruningToleranceNodes = 300; } // namespace -SmartPruningStopper::SmartPruningStopper(float smart_pruning_factor) - : smart_pruning_factor_(smart_pruning_factor) {} +SmartPruningStopper::SmartPruningStopper(float smart_pruning_factor, + int64_t minimum_batches) + : smart_pruning_factor_(smart_pruning_factor), + minimum_batches_(minimum_batches) {} bool SmartPruningStopper::ShouldStop(const IterationStats& stats, StoppersHints* hints) { @@ -211,6 +213,7 @@ bool SmartPruningStopper::ShouldStop(const IterationStats& stats, // May overflow if (nps/smart_pruning_factor) > 180 000 000, but that's not // very realistic. hints->UpdateEstimatedRemainingRemainingPlayouts(remaining_playouts); + if (stats.batches_since_movestart < minimum_batches_) return false; uint32_t largest_n = 0; uint32_t second_largest_n = 0; @@ -227,7 +230,8 @@ bool SmartPruningStopper::ShouldStop(const IterationStats& stats, LOGFILE << remaining_playouts << " playouts remaining. Best move has " << largest_n << " visits, second best -- " << second_largest_n << ". Difference is " << (largest_n - second_largest_n) - << ", so stopping the search."; + << ", so stopping the search after " + << stats.batches_since_movestart << " batches."; return true; } diff --git a/src/mcts/stoppers/stoppers.h b/src/mcts/stoppers/stoppers.h index 76c89704bb..e0d6e6fd16 100644 --- a/src/mcts/stoppers/stoppers.h +++ b/src/mcts/stoppers/stoppers.h @@ -124,11 +124,12 @@ class KldGainStopper : public SearchStopper { // best move to potentially become the best one, stop the search. class SmartPruningStopper : public SearchStopper { public: - SmartPruningStopper(float smart_pruning_factor); + SmartPruningStopper(float smart_pruning_factor, int64_t minimum_batches); bool ShouldStop(const IterationStats&, StoppersHints*) override; private: const double smart_pruning_factor_; + const int64_t minimum_batches_; Mutex mutex_; std::optional first_eval_time_ GUARDED_BY(mutex_); }; diff --git a/src/mcts/stoppers/timemgr.h b/src/mcts/stoppers/timemgr.h index 3bcbd0126d..6db412b793 100644 --- a/src/mcts/stoppers/timemgr.h +++ b/src/mcts/stoppers/timemgr.h @@ -41,6 +41,7 @@ struct IterationStats { int64_t time_since_movestart = 0; int64_t total_nodes = 0; int64_t nodes_since_movestart = 0; + int64_t batches_since_movestart = 0; int average_depth = 0; std::vector edge_n; }; From d08572a0eadc20f635946140735086208961ebed Mon Sep 17 00:00:00 2001 From: Ed Lee Date: Sat, 7 Mar 2020 11:22:57 -0800 Subject: [PATCH 046/151] Prefer shorter winning mates and report mate score. (#1111) --- src/chess/callbacks.h | 2 + src/chess/uciloop.cc | 1 + src/engine.cc | 1 + src/mcts/node.cc | 15 ++++---- src/mcts/node.h | 13 ++++--- src/mcts/search.cc | 90 +++++++++++++++++++++++++++++++++++-------- 6 files changed, 94 insertions(+), 28 deletions(-) diff --git a/src/chess/callbacks.h b/src/chess/callbacks.h index 8d4a098de4..d7e62d1cd0 100644 --- a/src/chess/callbacks.h +++ b/src/chess/callbacks.h @@ -67,6 +67,8 @@ struct ThinkingInfo { int nps = -1; // Hash fullness * 1000 int hashfull = -1; + // Moves to mate. + std::optional mate; // Win in centipawns. std::optional score; // Win/Draw/Lose probability * 1000. diff --git a/src/chess/uciloop.cc b/src/chess/uciloop.cc index 0e7a502849..c4f11603dd 100644 --- a/src/chess/uciloop.cc +++ b/src/chess/uciloop.cc @@ -248,6 +248,7 @@ void UciLoop::SendInfo(const std::vector& infos) { if (info.seldepth >= 0) res += " seldepth " + std::to_string(info.seldepth); if (info.time >= 0) res += " time " + std::to_string(info.time); if (info.nodes >= 0) res += " nodes " + std::to_string(info.nodes); + if (info.mate) res += " score mate " + std::to_string(*info.mate); if (info.score) res += " score cp " + std::to_string(*info.score); if (info.wdl) { res += " wdl " + std::to_string(info.wdl->w) + " " + diff --git a/src/engine.cc b/src/engine.cc index 9ad3bf9a16..0d2eefccec 100644 --- a/src/engine.cc +++ b/src/engine.cc @@ -190,6 +190,7 @@ class PonderResponseTransformer : public TransformingUciResponder { for (const auto& info : *infos) { if (info.multipv <= 1) { ponder_info = info; + if (ponder_info.mate) ponder_info.mate = -*ponder_info.mate; if (ponder_info.score) ponder_info.score = -*ponder_info.score; if (ponder_info.depth > 1) ponder_info.depth--; if (ponder_info.seldepth > 1) ponder_info.seldepth--; diff --git a/src/mcts/node.cc b/src/mcts/node.cc index a9490c9188..52fcde300b 100644 --- a/src/mcts/node.cc +++ b/src/mcts/node.cc @@ -210,15 +210,16 @@ Edge* Node::GetOwnEdge() const { return GetParent()->GetEdgeToNode(this); } std::string Node::DebugString() const { std::ostringstream oss; - oss << " Term:" << is_terminal_ << " This:" << this << " Parent:" << parent_ - << " Index:" << index_ << " Child:" << child_.get() - << " Sibling:" << sibling_.get() << " WL:" << wl_ << " N:" << n_ - << " N_:" << n_in_flight_ << " Edges:" << edges_.size(); + oss << " Term:" << static_cast(terminal_type_) << " This:" << this + << " Parent:" << parent_ << " Index:" << index_ + << " Child:" << child_.get() << " Sibling:" << sibling_.get() + << " WL:" << wl_ << " N:" << n_ << " N_:" << n_in_flight_ + << " Edges:" << edges_.size(); return oss.str(); } -void Node::MakeTerminal(GameResult result, float plies_left) { - is_terminal_ = true; +void Node::MakeTerminal(GameResult result, float plies_left, Terminal type) { + terminal_type_ = type; m_ = plies_left; if (result == GameResult::DRAW) { wl_ = 0.0f; @@ -233,7 +234,7 @@ void Node::MakeTerminal(GameResult result, float plies_left) { } void Node::MakeNotTerminal() { - is_terminal_ = false; + terminal_type_ = Terminal::NonTerminal; n_ = 0; // If we have edges, we've been extended (1 visit), so include children too. diff --git a/src/mcts/node.h b/src/mcts/node.h index 8ed79d2ed3..e1a51c39c5 100644 --- a/src/mcts/node.h +++ b/src/mcts/node.h @@ -130,6 +130,8 @@ class Node { using Iterator = Edge_Iterator; using ConstIterator = Edge_Iterator; + enum class Terminal : uint8_t { NonTerminal, Terminal, Tablebase }; + // Takes pointer to a parent node and own index in a parent. Node(Node* parent, uint16_t index) : parent_(parent), index_(index) {} @@ -161,11 +163,13 @@ class Node { float GetM() const { return m_; } // Returns whether the node is known to be draw/lose/win. - bool IsTerminal() const { return is_terminal_; } + bool IsTerminal() const { return terminal_type_ != Terminal::NonTerminal; } + bool IsTbTerminal() const { return terminal_type_ == Terminal::Tablebase; } uint16_t GetNumEdges() const { return edges_.size(); } // Makes the node terminal and sets it's score. - void MakeTerminal(GameResult result, float plies_left = 0.0f); + void MakeTerminal(GameResult result, float plies_left = 0.0f, + Terminal type = Terminal::Terminal); // Makes the node not terminal and updates its visits. void MakeNotTerminal(); @@ -297,7 +301,7 @@ class Node { // 1 byte fields. // Whether or not this node end game (with a winning of either sides or draw). - bool is_terminal_ = false; + Terminal terminal_type_ = Terminal::NonTerminal; // TODO(mooskagh) Unfriend NodeTree. friend class NodeTree; @@ -336,8 +340,6 @@ class EdgeAndNode { bool operator!=(const EdgeAndNode& other) const { return edge_ != other.edge_; } - // Arbitrary ordering just to make it possible to use in tuples. - bool operator<(const EdgeAndNode& other) const { return edge_ < other.edge_; } bool HasNode() const { return node_ != nullptr; } Edge* edge() const { return edge_; } Node* node() const { return node_; } @@ -365,6 +367,7 @@ class EdgeAndNode { // Whether the node is known to be terminal. bool IsTerminal() const { return node_ ? node_->IsTerminal() : false; } + bool IsTbTerminal() const { return node_ ? node_->IsTbTerminal() : false; } // Edge related getters. float GetP() const { return edge_->GetP(); } diff --git a/src/mcts/search.cc b/src/mcts/search.cc index 7b1ba99603..a005c43e2c 100644 --- a/src/mcts/search.cc +++ b/src/mcts/search.cc @@ -138,7 +138,11 @@ void Search::SendUciInfo() REQUIRES(nodes_mutex_) { const auto d = edge.GetD(); const int w = static_cast(std::round(500.0 * (1.0 + wl - d))); const auto q = edge.GetQ(default_q, draw_score); - if (score_type == "centipawn_with_drawscore") { + if (edge.IsTerminal() && wl != 0.0f) { + uci_info.mate = std::copysign( + std::round(edge.GetM(0.0f)) / 2 + (edge.IsTbTerminal() ? 101 : 1), + wl); + } else if (score_type == "centipawn_with_drawscore") { uci_info.score = 295 * q / (1 - 0.976953126 * std::pow(q, 14)); } else if (score_type == "centipawn") { uci_info.score = 295 * wl / (1 - 0.976953126 * std::pow(q, 14)); @@ -478,32 +482,81 @@ std::vector Search::GetBestChildrenNoTemperature(Node* parent, PopulateRootMoveLimit(&root_limit); } // Best child is selected using the following criteria: - // * Is terminal win, e.g., checkmate. + // * Prefer shorter terminal wins / avoid shorter terminal losses. // * Largest number of playouts. // * If two nodes have equal number: // * If that number is 0, the one with larger prior wins. // * If that number is larger than 0, the one with larger eval wins. - using El = std::tuple; - std::vector edges; + std::vector edges; for (auto edge : parent->Edges()) { if (parent == root_node_ && !root_limit.empty() && std::find(root_limit.begin(), root_limit.end(), edge.GetMove()) == root_limit.end()) { continue; } - const auto WL = edge.GetWL(); - edges.emplace_back(edge.IsTerminal() && WL > 0.0f, edge.GetN(), WL, - edge.GetP(), edge); + edges.push_back(edge); } const auto middle = (static_cast(edges.size()) > count) ? edges.begin() + count : edges.end(); - std::partial_sort(edges.begin(), middle, edges.end(), std::greater()); + std::partial_sort( + edges.begin(), middle, edges.end(), [](const auto& a, const auto& b) { + // The function returns "true" when a is preferred to b. + + // Lists edge types from less desirable to more desirable. + enum EdgeRank { + kTerminalLoss, + kTablebaseLoss, + kNonTerminal, // Non terminal or terminal draw. + kTablebaseWin, + kTerminalWin, + }; + + auto GetEdgeRank = [](const EdgeAndNode& edge) { + const auto wl = edge.GetWL(); + if (!edge.IsTerminal() || !wl) return kNonTerminal; + if (edge.IsTbTerminal()) { + return wl < 0.0 ? kTablebaseLoss : kTablebaseWin; + } + return wl < 0.0 ? kTerminalLoss : kTerminalWin; + }; + + // If moves have different outcomes, prefer better outcome. + const auto a_rank = GetEdgeRank(a); + const auto b_rank = GetEdgeRank(b); + if (a_rank != b_rank) return a_rank > b_rank; + + // If both are terminal draws, try to make it shorter. + if (a_rank == kNonTerminal && a.IsTerminal() && b.IsTerminal()) { + if (a.IsTbTerminal() != b.IsTbTerminal()) { + // Prefer non-tablebase draws. + return a.IsTbTerminal() < b.IsTbTerminal(); + } + // Prefer shorter draws. + return a.GetM(0.0f) < b.GetM(0.0f); + } + + // Neither is terminal, use standard rule. + if (a_rank == kNonTerminal) { + // Prefer largest playouts then eval then prior. + if (a.GetN() != b.GetN()) return a.GetN() > b.GetN(); + if (a.GetWL() != b.GetWL()) return a.GetWL() > b.GetWL(); + return a.GetP() > b.GetP(); + } - std::vector res; - std::transform(edges.begin(), middle, std::back_inserter(res), - [](const El& x) { return std::get<4>(x); }); - return res; + // Both variants are winning, prefer shortest win. + if (a_rank > kNonTerminal) { + return a.GetM(0.0f) < b.GetM(0.0f); + } + + // Both variants are losing, prefer longest losses. + return a.GetM(0.0f) > b.GetM(0.0f); + }); + + if (count < edges.size()) { + edges.resize(count); + } + return edges; } // Returns a child with most visits. @@ -1057,11 +1110,13 @@ void SearchWorker::ExtendNode(Node* node) { } // If the colors seem backwards, check the checkmate check above. if (wdl == WDL_WIN) { - node->MakeTerminal(GameResult::BLACK_WON, m); + node->MakeTerminal(GameResult::BLACK_WON, m, + Node::Terminal::Tablebase); } else if (wdl == WDL_LOSS) { - node->MakeTerminal(GameResult::WHITE_WON, m); + node->MakeTerminal(GameResult::WHITE_WON, m, + Node::Terminal::Tablebase); } else { // Cursed wins and blessed losses count as draws. - node->MakeTerminal(GameResult::DRAW, m); + node->MakeTerminal(GameResult::DRAW, m, Node::Terminal::Tablebase); } search_->tb_hits_.fetch_add(1, std::memory_order_acq_rel); return; @@ -1332,6 +1387,7 @@ void SearchWorker::DoBackupUpdateSingleNode( // A non-winning terminal move needs all other moves to be similar. auto all_losing = true; + auto found_tb = n->IsTbTerminal(); float losing_m = 0.0f; if (can_convert && v <= 0.0f) { for (const auto& edge : p->Edges()) { @@ -1339,6 +1395,7 @@ void SearchWorker::DoBackupUpdateSingleNode( can_convert = can_convert && edge.IsTerminal() && WL <= 0.0f; if (!can_convert) break; all_losing = all_losing && WL < 0.0f; + found_tb = found_tb || edge.IsTbTerminal(); losing_m = std::max(losing_m, edge.GetM(0.0f)); } } @@ -1355,7 +1412,8 @@ void SearchWorker::DoBackupUpdateSingleNode( p->MakeTerminal( v > 0.0f ? GameResult::BLACK_WON : all_losing ? GameResult::WHITE_WON : GameResult::DRAW, - terminal_m); + terminal_m, + found_tb ? Node::Terminal::Tablebase : Node::Terminal::Terminal); } // Q will be flipped for opponent. From b4a06906f7106354586968266b62f39b6c4570a9 Mon Sep 17 00:00:00 2001 From: borg323 <39573933+borg323@users.noreply.github.com> Date: Sat, 7 Mar 2020 22:44:00 +0200 Subject: [PATCH 047/151] simple network embedding mechanism (#1105) --- meson.build | 4 ++++ meson_options.txt | 5 +++++ src/neural/factory.cc | 8 ++++++++ src/neural/loader.cc | 30 ++++++++++++++++++++++++++++-- src/utils/commandline.cc | 11 +++++++++++ 5 files changed, 56 insertions(+), 2 deletions(-) diff --git a/meson.build b/meson.build index 7a9f749bb6..28b9b78745 100644 --- a/meson.build +++ b/meson.build @@ -502,6 +502,10 @@ if not get_option('pext') add_project_arguments('-DNO_PEXT', language : 'cpp') endif +if get_option('embed') + add_project_arguments('-DEMBED', language : 'cpp') +endif + executable('lc0', 'src/main.cc', files, include_directories: includes, dependencies: deps, install: true) diff --git a/meson_options.txt b/meson_options.txt index 9ab6ac514d..a6b06de7e6 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -132,3 +132,8 @@ option('gtest', type: 'boolean', value: true, description: 'Build gtest tests') + +option('embed', + type: 'boolean', + value: false, + description: 'Use embedded net by default') diff --git a/src/neural/factory.cc b/src/neural/factory.cc index 690cc3c8a1..f8ff3e6f79 100644 --- a/src/neural/factory.cc +++ b/src/neural/factory.cc @@ -29,6 +29,7 @@ #include "neural/loader.h" #include +#include "utils/commandline.h" #include "utils/logging.h" namespace lczero { @@ -47,6 +48,7 @@ const OptionId NetworkFactory::kBackendOptionsId{ "Exact parameters differ per backend.", 'o'}; const char* kAutoDiscover = ""; +const char* kEmbed = ""; NetworkFactory* NetworkFactory::Get() { static NetworkFactory factory; @@ -59,7 +61,11 @@ NetworkFactory::Register::Register(const std::string& name, FactoryFunc factory, } void NetworkFactory::PopulateOptions(OptionsParser* options) { +#if defined(EMBED) + options->Add(NetworkFactory::kWeightsId) = kEmbed; +#else options->Add(NetworkFactory::kWeightsId) = kAutoDiscover; +#endif const auto backends = NetworkFactory::Get()->GetBackendsList(); options->Add(NetworkFactory::kBackendId, backends) = backends.empty() ? "" : backends[0]; @@ -111,6 +117,8 @@ std::unique_ptr NetworkFactory::LoadNetwork( if (net_path == kAutoDiscover) { net_path = DiscoverWeightsFile(); + } else if (net_path == kEmbed) { + net_path = CommandLine::BinaryName(); } else { CERR << "Loading weights file from: " << net_path; } diff --git a/src/neural/loader.cc b/src/neural/loader.cc index 9e375c8802..71be41297d 100644 --- a/src/neural/loader.cc +++ b/src/neural/loader.cc @@ -30,6 +30,7 @@ #include #include +#include #include #include #include @@ -43,6 +44,12 @@ #include "utils/logging.h" #include "version.h" +#ifdef _WIN32 +#include +#else +#include +#endif + namespace lczero { namespace { @@ -55,8 +62,27 @@ std::string DecompressGzip(const std::string& filename) { int bytes_read = 0; // Read whole file into a buffer. - const gzFile file = gzopen(filename.c_str(), "rb"); - if (!file) throw Exception("Cannot read weights from " + filename); + FILE* fp = fopen(filename.c_str(), "rb"); + if (!fp) { + throw Exception("Cannot read weights from " + filename); + } + if (filename == CommandLine::BinaryName()) { + // The network file should be appended at the end of the lc0 executable, + // followed by the network file size and a "Lc0!" (0x2130634c) magic. + int32_t size, magic; + if (fseek(fp, -8, SEEK_END) || fread(&size, 4, 1, fp) != 1 || + fread(&magic, 4, 1, fp) != 1 || magic != 0x2130634c) { + fclose(fp); + throw Exception("No embedded file detected."); + } + fseek(fp, -size - 8, SEEK_END); + } + fflush(fp); + gzFile file = gzdopen(dup(fileno(fp)), "rb"); + fclose(fp); + if (!file) { + throw Exception("Cannot process file " + filename); + } while (true) { const int sz = gzread(file, &buffer[bytes_read], buffer.size() - bytes_read); diff --git a/src/utils/commandline.cc b/src/utils/commandline.cc index 7d0bc765f8..22df960462 100644 --- a/src/utils/commandline.cc +++ b/src/utils/commandline.cc @@ -35,7 +35,18 @@ std::vector CommandLine::arguments_; std::vector> CommandLine::modes_; void CommandLine::Init(int argc, const char** argv) { +#ifdef _WIN32 + // Under windows argv[0] may not have the extension. Also _get_pgmptr() had + // issues in some windows 10 versions, so check returned values carefully. + char* pgmptr = nullptr; + if (!_get_pgmptr(&pgmptr) && pgmptr != nullptr && *pgmptr) { + binary_ = pgmptr; + } else { + binary_ = argv[0]; + } +#else binary_ = argv[0]; +#endif arguments_.clear(); std::ostringstream params; for (int i = 1; i < argc; ++i) { From e153fa17518f2b075bc26c7a958cd61bcb2b182a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Forst=C3=A9n?= Date: Sun, 8 Mar 2020 22:04:35 +0200 Subject: [PATCH 048/151] Add max out of order evals parameter (#1115) * Add max out of order evals parameter * Set default to max out of order evals in selfplay --- src/mcts/params.cc | 7 ++++++- src/mcts/params.h | 4 ++++ src/mcts/search.cc | 2 +- src/selfplay/tournament.cc | 1 + 4 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/mcts/params.cc b/src/mcts/params.cc index 307d2b7fde..c505f6df9d 100644 --- a/src/mcts/params.cc +++ b/src/mcts/params.cc @@ -176,6 +176,9 @@ const OptionId SearchParams::kOutOfOrderEvalId{ "in the cache or is terminal, evaluate it right away without sending the " "batch to the NN. When off, this may only happen with the very first node " "of a batch; when on, this can happen with any node."}; +const OptionId SearchParams::kMaxOutOfOrderEvalsId{ + "max-out-of-order-evals", "MaxOutOfOrderEvals", + "Maximum number of out of order evals during gathering of a batch."}; const OptionId SearchParams::kStickyEndgamesId{ "sticky-endgames", "StickyEndgames", "When an end of game position is found during search, allow the eval of " @@ -279,6 +282,7 @@ void SearchParams::Populate(OptionsParser* options) { options->Add(kMaxCollisionEventsId, 1, 1024) = 32; options->Add(kMaxCollisionVisitsId, 1, 1000000) = 9999; options->Add(kOutOfOrderEvalId) = true; + options->Add(kMaxOutOfOrderEvalsId, 1, 10000) = 256; options->Add(kStickyEndgamesId) = true; options->Add(kSyzygyFastPlayId) = true; options->Add(kMultiPvId, 1, 500) = 1; @@ -365,7 +369,8 @@ SearchParams::SearchParams(const OptionsDict& options) kDrawScoreOpponent{options.Get(kDrawScoreOpponentId.GetId()) / 100.0f}, kDrawScoreWhite{options.Get(kDrawScoreWhiteId.GetId()) / 100.0f}, - kDrawScoreBlack{options.Get(kDrawScoreBlackId.GetId()) / 100.0f} { + kDrawScoreBlack{options.Get(kDrawScoreBlackId.GetId()) / 100.0f}, + kMaxOutOfOrderEvals(options.Get(kMaxOutOfOrderEvalsId.GetId())) { if (std::max(std::abs(kDrawScoreSidetomove), std::abs(kDrawScoreOpponent)) + std::max(std::abs(kDrawScoreWhite), std::abs(kDrawScoreBlack)) > 1.0f) { diff --git a/src/mcts/params.h b/src/mcts/params.h index 1dae56e8ca..1634f9a0dc 100644 --- a/src/mcts/params.h +++ b/src/mcts/params.h @@ -113,6 +113,8 @@ class SearchParams { float GetWhiteDrawDelta() const { return kDrawScoreWhite; } float GetBlackDrawDelta() const { return kDrawScoreBlack; } + int GetMaxOutOfOrderEvals() const { return kMaxOutOfOrderEvals; } + // Search parameter IDs. static const OptionId kMiniBatchSizeId; static const OptionId kMaxPrefetchBatchId; @@ -160,6 +162,7 @@ class SearchParams { static const OptionId kDrawScoreOpponentId; static const OptionId kDrawScoreWhiteId; static const OptionId kDrawScoreBlackId; + static const OptionId kMaxOutOfOrderEvalsId; private: const OptionsDict& options_; @@ -201,6 +204,7 @@ class SearchParams { const float kDrawScoreOpponent; const float kDrawScoreWhite; const float kDrawScoreBlack; + const int kMaxOutOfOrderEvals; }; } // namespace lczero diff --git a/src/mcts/search.cc b/src/mcts/search.cc index a005c43e2c..ea9e91a7fe 100644 --- a/src/mcts/search.cc +++ b/src/mcts/search.cc @@ -825,7 +825,7 @@ void SearchWorker::GatherMinibatch() { // If we had too many (kMiniBatchSize) nodes out of order, also interrupt the // iteration so that search can exit. while (minibatch_size < params_.GetMiniBatchSize() && - number_out_of_order_ < params_.GetMiniBatchSize()) { + number_out_of_order_ < params_.GetMaxOutOfOrderEvals()) { // If there's something to process without touching slow neural net, do it. if (minibatch_size > 0 && computation_->GetCacheMisses() == 0) return; // Pick next node to extend. diff --git a/src/selfplay/tournament.cc b/src/selfplay/tournament.cc index 2c603800f2..df7882cc73 100644 --- a/src/selfplay/tournament.cc +++ b/src/selfplay/tournament.cc @@ -112,6 +112,7 @@ void SelfPlayTournament::PopulateOptions(OptionsParser* options) { auto defaults = options->GetMutableDefaultsOptions(); defaults->Set(SearchParams::kMiniBatchSizeId.GetId(), 32); + defaults->Set(SearchParams::kMaxOutOfOrderEvalsId.GetId(), 32); defaults->Set(SearchParams::kCpuctId.GetId(), 1.2f); defaults->Set(SearchParams::kCpuctFactorId.GetId(), 0.0f); defaults->Set(SearchParams::kPolicySoftmaxTempId.GetId(), 1.0f); From cbf618d0a615d2d26c71fb9a1c026b8cad4b5a97 Mon Sep 17 00:00:00 2001 From: borg323 <39573933+borg323@users.noreply.github.com> Date: Sun, 8 Mar 2020 22:19:00 +0200 Subject: [PATCH 049/151] make android apk (#1116) Co-authored-by: borg323 --- appveyor.yml | 57 +++++++++++++++------------- scripts/appveyor_android_build.cmd | 5 +++ scripts/appveyor_android_package.cmd | 17 +++++++++ 3 files changed, 53 insertions(+), 26 deletions(-) create mode 100644 scripts/appveyor_android_build.cmd create mode 100644 scripts/appveyor_android_package.cmd diff --git a/appveyor.yml b/appveyor.yml index 360a778729..31e552a996 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -10,8 +10,7 @@ environment: - NAME: gpu-opencl - NAME: cpu-dnnl - NAME: cpu-openblas - - NAME: android-aarch64 - - NAME: android-armv7a + - NAME: android clone_folder: c:\projects\lc0 install: - cmd: set CUDA=false @@ -20,8 +19,7 @@ install: - cmd: set BLAS=false - cmd: set GTEST=false - cmd: set ANDROID=false -- cmd: IF %NAME%==android-aarch64 set ANDROID=true -- cmd: IF %NAME%==android-armv7a set ANDROID=true +- cmd: IF %NAME%==android set ANDROID=true - cmd: IF %NAME%==gpu-nvidia-cuda set CUDA=true - cmd: IF %NAME%==gpu-dx12 set DX=true - cmd: IF %NAME%==gpu-opencl set OPENCL=true @@ -46,19 +44,19 @@ install: - cmd: set PATH=C:\Python36;C:\Python36\scripts;%PATH% - cmd: pip3 install --upgrade meson - cmd: call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat" amd64 -- cmd: IF %NAME%==android-aarch64 C:\ProgramData\Microsoft\AndroidNDK64\android-ndk-r17\build\tools\make_standalone_toolchain.py --arch arm64 --api 21 --stl libc++ --install-dir \android-standalone-64 -- cmd: IF %NAME%==android-aarch64 set PATH=C:\android-standalone-64\bin;%PATH% -- cmd: IF %NAME%==android-aarch64 sed "s/clang+*/&.cmd/" cross-files/aarch64-linux-android >crossfile -- cmd: IF %NAME%==android-aarch64 IF NOT EXIST C:\cache\OpenBLAS appveyor DownloadFile https://github.com/borg323/OpenBLAS/releases/download/android-0.3.8/openblas-android-aarch64.zip -- cmd: IF %NAME%==android-aarch64 IF NOT EXIST C:\cache\OpenBLAS 7z x openblas-android-aarch64.zip -oC:\cache\OpenBLAS -- cmd: IF %NAME%==android-armv7a C:\ProgramData\Microsoft\AndroidNDK64\android-ndk-r17\build\tools\make_standalone_toolchain.py --arch arm --api 24 --stl libc++ --install-dir \android-standalone-32 -- cmd: IF %NAME%==android-armv7a set PATH=C:\android-standalone-32\bin;%PATH% -- cmd: IF %NAME%==android-armv7a sed "s/clang+*/&.cmd/" cross-files/armv7a-linux-android >crossfile -- cmd: IF %NAME%==android-armv7a IF NOT EXIST C:\cache\OpenBLAS appveyor DownloadFile https://github.com/borg323/OpenBLAS/releases/download/android-0.3.8/openblas-android-armv7a.zip -- cmd: IF %NAME%==android-armv7a IF NOT EXIST C:\cache\OpenBLAS 7z x openblas-android-armv7a.zip -oC:\cache\OpenBLAS +- cmd: IF %NAME%==android C:\ProgramData\Microsoft\AndroidNDK64\android-ndk-r17\build\tools\make_standalone_toolchain.py --arch arm64 --api 21 --stl libc++ --install-dir \android-standalone-64 +- cmd: IF %NAME%==android set PATH=C:\android-standalone-64\bin;%PATH% +- cmd: IF %NAME%==android sed "s/clang+*/&.cmd/" cross-files/aarch64-linux-android >crossfile-aarch64 +- cmd: IF %NAME%==android IF NOT EXIST C:\cache\OpenBLAS\android-aarch64 appveyor DownloadFile https://github.com/borg323/OpenBLAS/releases/download/android-0.3.8/openblas-android-aarch64.zip +- cmd: IF %NAME%==android IF NOT EXIST C:\cache\OpenBLAS\android-aarch64 7z x openblas-android-aarch64.zip -oC:\cache\OpenBLAS +- cmd: IF %NAME%==android C:\ProgramData\Microsoft\AndroidNDK64\android-ndk-r17\build\tools\make_standalone_toolchain.py --arch arm --api 24 --stl libc++ --install-dir \android-standalone-32 +- cmd: IF %NAME%==android set PATH=C:\android-standalone-32\bin;%PATH% +- cmd: IF %NAME%==android sed "s/clang+*/&.cmd/" cross-files/armv7a-linux-android >crossfile-armv7a +- cmd: IF %NAME%==android IF NOT EXIST C:\cache\OpenBLAS\android-armv7a appveyor DownloadFile https://github.com/borg323/OpenBLAS/releases/download/android-0.3.8/openblas-android-armv7a.zip +- cmd: IF %NAME%==android IF NOT EXIST C:\cache\OpenBLAS\android-armv7a 7z x openblas-android-armv7a.zip -oC:\cache\OpenBLAS - cmd: set PKG_FOLDER="C:\cache" - cmd: IF NOT EXIST c:\cache mkdir c:\cache -- cmd: IF NOT EXIST c:\cache\591226.pb.gz IF %ANDROID%==false appveyor DownloadFile http://training.lczero.org/get_network?sha=47e3f899519dc1bc95496a457b77730fce7b0b89b6187af5c01ecbbd02e88398 -Filename c:\cache\591226.pb.gz +- cmd: IF NOT EXIST c:\cache\591226.pb.gz appveyor DownloadFile http://training.lczero.org/get_network?sha=47e3f899519dc1bc95496a457b77730fce7b0b89b6187af5c01ecbbd02e88398 -Filename c:\cache\591226.pb.gz - cmd: touch -t 201801010000.00 c:\cache\591226.pb.gz - cmd: IF %GTEST%==true IF NOT EXIST C:\cache\syzygy mkdir C:\cache\syzygy - cmd: IF %GTEST%==true cd C:\cache\syzygy @@ -75,25 +73,32 @@ before_build: - cmd: SET BUILD_BLAS=%BLAS% - cmd: IF %OPENCL%==true SET BUILD_BLAS=true - cmd: IF %DX%==true SET BUILD_BLAS=true +- cmd: SET EMBED=false +- cmd: IF %APPVEYOR_REPO_TAG%==true IF %ANDROID%==true SET EMBED=true - cmd: IF %ANDROID%==false meson build --backend vs2017 --buildtype release -Dgtest=%GTEST% -Dopencl=%OPENCL% -Dblas=%BUILD_BLAS% -Ddnnl=true -Deigen=true -Ddx=%DX% -Dcudnn=%CUDA% -Dispc_native_only=false -Dpopcnt=false -Dcudnn_include="%CUDA_PATH%\include","%PKG_FOLDER%\cuda\include" -Dcudnn_libdirs="%CUDA_PATH%\lib\x64","%PKG_FOLDER%\cuda\lib\x64" -Dopenblas_include="%PKG_FOLDER%\OpenBLAS\dist64\include" -Dopenblas_libdirs="%PKG_FOLDER%\OpenBLAS\dist64\lib" -Ddnnl_dir="%PKG_FOLDER%\dnnl_win_1.1.1_cpu_vcomp" -Dopencl_include="%PKG_FOLDER%\opencl-nug.0.777.77\build\native\include" -Dopencl_libdirs="%PKG_FOLDER%\opencl-nug.0.777.77\build\native\lib\x64" -Ddefault_library=static -- cmd: IF %ANDROID%==true meson build --buildtype release -Dgtest=false -Dopenblas_include="%PKG_FOLDER%\OpenBLAS\%NAME%\include" -Dopenblas_libdirs="%PKG_FOLDER%\OpenBLAS\%NAME%\lib" -Ddefault_library=static --cross-file crossfile +- cmd: IF %ANDROID%==true meson arm64-v8a --buildtype release -Dgtest=false -Dopenblas_include="%PKG_FOLDER%\OpenBLAS\android-aarch64\include" -Dopenblas_libdirs="%PKG_FOLDER%\OpenBLAS\android-aarch64\lib" -Dembed=%EMBED% -Ddefault_library=static --cross-file crossfile-aarch64 +- cmd: IF %ANDROID%==true meson armeabi-v7a --buildtype release -Dgtest=false -Dopenblas_include="%PKG_FOLDER%\OpenBLAS\android-armv7a\include" -Dopenblas_libdirs="%PKG_FOLDER%\OpenBLAS\android-armv7a\lib" -Dembed=%EMBED% -Ddefault_library=static --cross-file crossfile-armv7a build_script: - cmd: IF %ANDROID%==false call scripts\appveyor_win_build.cmd -- cmd: cd build -- cmd: IF %ANDROID%==true ninja -- cmd: IF %ANDROID%==true ren lc0 lc0-%NAME% +- cmd: IF %ANDROID%==true call scripts\appveyor_android_build.cmd - cmd: cd C:\projects\lc0 after_build: - cmd: IF %APPVEYOR_REPO_TAG%==true IF %ANDROID%==false call scripts\appveyor_win_package.cmd +- cmd: IF %APPVEYOR_REPO_TAG%==true IF %ANDROID%==true call scripts\appveyor_android_package.cmd +- cmd: cd C:\projects\lc0 artifacts: - path: build/lc0.exe name: lc0-$(NAME) - - path: build/lc0-$(NAME) - name: lc0-$(NAME) + - path: arm64-v8a/lc0 + name: lc0-android-arm64-v8a + - path: armeabi-v7a/lc0 + name: lc0-android-armeabi-v7a - path: /lc0*.zip/ name: lc0-$(APPVEYOR_REPO_TAG_NAME)-windows-$(NAME)-zip - path: build/lc0.pdb name: lc0-debug-symbols + - path: /lc0*.apk/ + name: lc0-$(APPVEYOR_REPO_TAG_NAME)-android-apk deploy: - provider: GitHub artifact: /.*\.zip/ @@ -102,17 +107,17 @@ deploy: on: appveyor_repo_tag: true - provider: GitHub - artifact: /lc0-android.*/ + artifact: /.*\.apk/ auth_token: secure: USFAdwQKTXqOXQjCYQfzWvzRpUhvqJLBkN4hbOg+j876vDxGZHt9bMYayb5evePp on: appveyor_repo_tag: true test_script: -- cmd: cd build +- cmd: IF %GTEST%==true cd build - cmd: IF %GTEST%==true xcopy /s /i C:\cache\syzygy syzygy - cmd: IF %GTEST%==true meson test --print-errorlogs -- cmd: cd .. +- cmd: cd C:\projects\lc0 on_finish: -- cmd: cd C:\projects\lc0\build +- cmd: IF %GTEST%==true cd C:\projects\lc0\build - cmd: IF %GTEST%==true for %%a in (*.xml) do curl -F file=@%%a https://ci.appveyor.com/api/testresults/junit/%APPVEYOR_JOB_ID% -- cmd: cd .. +- cmd: cd C:\projects\lc0 diff --git a/scripts/appveyor_android_build.cmd b/scripts/appveyor_android_build.cmd new file mode 100644 index 0000000000..444d245be2 --- /dev/null +++ b/scripts/appveyor_android_build.cmd @@ -0,0 +1,5 @@ +cd arm64-v8a +ninja +cd C:\projects\lc0 +cd armeabi-v7a +ninja diff --git a/scripts/appveyor_android_package.cmd b/scripts/appveyor_android_package.cmd new file mode 100644 index 0000000000..76279ba1a4 --- /dev/null +++ b/scripts/appveyor_android_package.cmd @@ -0,0 +1,17 @@ +git clone https://github.com/lealgo/chessenginesupport-androidlib.git --branch lc0 --single-branch oex +cd oex +git checkout 949de43f0c0c6339c0e66a6711d24987a67b29d8 +cd .. +perl -e "printf '%%sLc0!', pack('V', -s 'c:/cache/591226.pb.gz')" >tail.bin +copy /y /b arm64-v8a\lc0+c:\cache\591226.pb.gz+tail.bin oex\LeelaChessEngine\leelaChessEngine\src\main\jniLibs\arm64-v8a\liblc0.so +copy /y /b armeabi-v7a\lc0+c:\cache\591226.pb.gz+tail.bin oex\LeelaChessEngine\leelaChessEngine\src\main\jniLibs\armeabi-v7a\liblc0.so +set ANDROID_HOME=C:\android-sdk-windows +appveyor DownloadFile https://dl.google.com/android/repository/sdk-tools-windows-3859397.zip +7z x sdk-tools-windows-3859397.zip -oC:\android-sdk-windows > nul +yes | C:\android-sdk-windows\tools\bin\sdkmanager.bat --licenses +cd oex\LeelaChessEngine +rem sed -i "s/591226/%NET%/" leelaChessEngine/src/main/res/values/strings.xml +sed -i "/versionCode/ s/1/%APPVEYOR_BUILD_NUMBER%/" leelaChessEngine/src/main/AndroidManifest.xml +sed -i "s/0.25dev/%APPVEYOR_REPO_TAG_NAME%/" leelaChessEngine/src/main/AndroidManifest.xml +call gradlew.bat assemble +copy leelaChessEngine\build\outputs\apk\debug\leelaChessEngine-debug.apk ..\..\lc0-%APPVEYOR_REPO_TAG_NAME%-android.apk From 2c0754b4ba2ef0c483a4b77b475cb4ccf55e3a17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Leandro=20=C3=81lvarez=20Gonz=C3=A1lez?= Date: Tue, 10 Mar 2020 15:54:04 -0400 Subject: [PATCH 050/151] Lower Android API level requirements for armv7-a build. (#1118) --- appveyor.yml | 8 ++++---- cross-files/armv7a-linux-android | 11 +++++++++-- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index 31e552a996..cb586545eb 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -47,12 +47,12 @@ install: - cmd: IF %NAME%==android C:\ProgramData\Microsoft\AndroidNDK64\android-ndk-r17\build\tools\make_standalone_toolchain.py --arch arm64 --api 21 --stl libc++ --install-dir \android-standalone-64 - cmd: IF %NAME%==android set PATH=C:\android-standalone-64\bin;%PATH% - cmd: IF %NAME%==android sed "s/clang+*/&.cmd/" cross-files/aarch64-linux-android >crossfile-aarch64 -- cmd: IF %NAME%==android IF NOT EXIST C:\cache\OpenBLAS\android-aarch64 appveyor DownloadFile https://github.com/borg323/OpenBLAS/releases/download/android-0.3.8/openblas-android-aarch64.zip +- cmd: IF %NAME%==android IF NOT EXIST C:\cache\OpenBLAS\android-aarch64 appveyor DownloadFile https://github.com/borg323/OpenBLAS/releases/download/android-0.3.8-2/openblas-android-aarch64.zip - cmd: IF %NAME%==android IF NOT EXIST C:\cache\OpenBLAS\android-aarch64 7z x openblas-android-aarch64.zip -oC:\cache\OpenBLAS -- cmd: IF %NAME%==android C:\ProgramData\Microsoft\AndroidNDK64\android-ndk-r17\build\tools\make_standalone_toolchain.py --arch arm --api 24 --stl libc++ --install-dir \android-standalone-32 +- cmd: IF %NAME%==android C:\ProgramData\Microsoft\AndroidNDK64\android-ndk-r17\build\tools\make_standalone_toolchain.py --arch arm --api 21 --stl libc++ --install-dir \android-standalone-32 - cmd: IF %NAME%==android set PATH=C:\android-standalone-32\bin;%PATH% - cmd: IF %NAME%==android sed "s/clang+*/&.cmd/" cross-files/armv7a-linux-android >crossfile-armv7a -- cmd: IF %NAME%==android IF NOT EXIST C:\cache\OpenBLAS\android-armv7a appveyor DownloadFile https://github.com/borg323/OpenBLAS/releases/download/android-0.3.8/openblas-android-armv7a.zip +- cmd: IF %NAME%==android IF NOT EXIST C:\cache\OpenBLAS\android-armv7a appveyor DownloadFile https://github.com/borg323/OpenBLAS/releases/download/android-0.3.8-2/openblas-android-armv7a.zip - cmd: IF %NAME%==android IF NOT EXIST C:\cache\OpenBLAS\android-armv7a 7z x openblas-android-armv7a.zip -oC:\cache\OpenBLAS - cmd: set PKG_FOLDER="C:\cache" - cmd: IF NOT EXIST c:\cache mkdir c:\cache @@ -65,7 +65,7 @@ install: - cmd: IF %GTEST%==true IF NOT EXIST KQvKQ.rtbz curl --remote-name-all https://tablebase.lichess.ovh/tables/standard/3-4-5/K{P,N,R,B,Q}vK{P,N,R,B,Q}.rtb{w,z} - cmd: cd C:\projects\lc0 cache: - - C:\cache + - C:\cache -> appveyor.yml - 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.0' - C:\projects\lc0\subprojects\packagecache before_build: diff --git a/cross-files/armv7a-linux-android b/cross-files/armv7a-linux-android index ae01719bed..131c840036 100644 --- a/cross-files/armv7a-linux-android +++ b/cross-files/armv7a-linux-android @@ -1,9 +1,15 @@ # Tested with Android NDK r18, standalone toolchain -# Targeting API level 24 +# Targeting API level 21 +# +# When targeting API levels < 24 the build fails unless _FILE_OFFSET_BITS is unset. +# Meson passes _FILE_OFFSET_BITS=64 but recent NDK toolchains have issues building +# for 32-bit ABIs when such macro it set. Relevant links: +# https://android.googlesource.com/platform/bionic/+/master/docs/32-bit-abi.md +# https://github.com/mesonbuild/meson/pull/2996#issuecomment-384045808 # # First create the standalone toolchain: -# ./make_standalone_toolchain.py --arch arm --api 24 --stl libc++ --install-dir android-standalone-32 +# ./make_standalone_toolchain.py --arch arm --api 21 --stl libc++ --install-dir android-standalone-32 # # Then set the toolchain path on your environment: # export PATH="$HOME/.local/share/android-sdk/android-toolchains/android-standalone-32/bin:$PATH" @@ -15,6 +21,7 @@ cpu = 'armv7a' endian = 'little' [properties] +cpp_args = ['-U_FILE_OFFSET_BITS'] cpp_link_args = ['-llog', '-static-libstdc++'] [binaries] From 2654071cf89bf3cfb4b5501e98179b97b250a47e Mon Sep 17 00:00:00 2001 From: borg323 <39573933+borg323@users.noreply.github.com> Date: Wed, 11 Mar 2020 13:01:29 +0200 Subject: [PATCH 051/151] Fix hangs with very early stop Co-authored-by: borg323 Co-authored-by: Tilps --- src/mcts/search.cc | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/mcts/search.cc b/src/mcts/search.cc index ea9e91a7fe..8264a9c3bb 100644 --- a/src/mcts/search.cc +++ b/src/mcts/search.cc @@ -365,7 +365,7 @@ void Search::MaybeTriggerStop(const IterationStats& stats, // Already responded bestmove, nothing to do here. if (bestmove_is_sent_) return; // Don't stop when the root node is not yet expanded. - if (total_playouts_ == 0) return; + if (total_playouts_ + initial_visits_ == 0) return; if (!stop_.load(std::memory_order_acquire) || !ok_to_respond_bestmove_) { if (stopper_->ShouldStop(stats, hints)) FireStopInternal(); @@ -756,8 +756,12 @@ void SearchWorker::ExecuteOneIteration() { if (params_.GetMaxConcurrentSearchers() != 0) { while (true) { // If search is stop, we've not gathered or done anything and we don't - // want to, so we can safely skip all below. - if (search_->stop_.load(std::memory_order_acquire)) return; + // want to, so we can safely skip all below. But make sure we have done + // at least one iteration. + if (search_->stop_.load(std::memory_order_acquire) && + search_->GetTotalPlayouts() + search_->initial_visits_ > 0) { + return; + } int available = search_->pending_searchers_.load(std::memory_order_acquire); if (available > 0 && From c46b51e1d673bea0c13376ba6a4641d6e44d6f60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Forst=C3=A9n?= Date: Wed, 11 Mar 2020 17:58:34 +0200 Subject: [PATCH 052/151] Max out of order evals relative to the maximum batch size (#1119) --- src/mcts/params.cc | 11 +++++++---- src/mcts/search.cc | 4 ++-- src/selfplay/tournament.cc | 1 - 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/mcts/params.cc b/src/mcts/params.cc index c505f6df9d..d4bbbdb02d 100644 --- a/src/mcts/params.cc +++ b/src/mcts/params.cc @@ -177,8 +177,9 @@ const OptionId SearchParams::kOutOfOrderEvalId{ "batch to the NN. When off, this may only happen with the very first node " "of a batch; when on, this can happen with any node."}; const OptionId SearchParams::kMaxOutOfOrderEvalsId{ - "max-out-of-order-evals", "MaxOutOfOrderEvals", - "Maximum number of out of order evals during gathering of a batch."}; + "max-out-of-order-evals-factor", "MaxOutOfOrderEvalsFactor", + "Maximum number of out of order evals during gathering of a batch is " + "calculated by multiplying the maximum batch size by this number."}; const OptionId SearchParams::kStickyEndgamesId{ "sticky-endgames", "StickyEndgames", "When an end of game position is found during search, allow the eval of " @@ -282,7 +283,7 @@ void SearchParams::Populate(OptionsParser* options) { options->Add(kMaxCollisionEventsId, 1, 1024) = 32; options->Add(kMaxCollisionVisitsId, 1, 1000000) = 9999; options->Add(kOutOfOrderEvalId) = true; - options->Add(kMaxOutOfOrderEvalsId, 1, 10000) = 256; + options->Add(kMaxOutOfOrderEvalsId, 0.0f, 100.0f) = 1.0f; options->Add(kStickyEndgamesId) = true; options->Add(kSyzygyFastPlayId) = true; options->Add(kMultiPvId, 1, 500) = 1; @@ -370,7 +371,9 @@ SearchParams::SearchParams(const OptionsDict& options) 100.0f}, kDrawScoreWhite{options.Get(kDrawScoreWhiteId.GetId()) / 100.0f}, kDrawScoreBlack{options.Get(kDrawScoreBlackId.GetId()) / 100.0f}, - kMaxOutOfOrderEvals(options.Get(kMaxOutOfOrderEvalsId.GetId())) { + kMaxOutOfOrderEvals( + std::max(1, int(options.Get(kMaxOutOfOrderEvalsId.GetId()) * + options.Get(kMiniBatchSizeId.GetId())))) { if (std::max(std::abs(kDrawScoreSidetomove), std::abs(kDrawScoreOpponent)) + std::max(std::abs(kDrawScoreWhite), std::abs(kDrawScoreBlack)) > 1.0f) { diff --git a/src/mcts/search.cc b/src/mcts/search.cc index 8264a9c3bb..b84fabdeea 100644 --- a/src/mcts/search.cc +++ b/src/mcts/search.cc @@ -826,8 +826,8 @@ void SearchWorker::GatherMinibatch() { number_out_of_order_ = 0; // Gather nodes to process in the current batch. - // If we had too many (kMiniBatchSize) nodes out of order, also interrupt the - // iteration so that search can exit. + // If we had too many nodes out of order, also interrupt the iteration so + // that search can exit. while (minibatch_size < params_.GetMiniBatchSize() && number_out_of_order_ < params_.GetMaxOutOfOrderEvals()) { // If there's something to process without touching slow neural net, do it. diff --git a/src/selfplay/tournament.cc b/src/selfplay/tournament.cc index df7882cc73..2c603800f2 100644 --- a/src/selfplay/tournament.cc +++ b/src/selfplay/tournament.cc @@ -112,7 +112,6 @@ void SelfPlayTournament::PopulateOptions(OptionsParser* options) { auto defaults = options->GetMutableDefaultsOptions(); defaults->Set(SearchParams::kMiniBatchSizeId.GetId(), 32); - defaults->Set(SearchParams::kMaxOutOfOrderEvalsId.GetId(), 32); defaults->Set(SearchParams::kCpuctId.GetId(), 1.2f); defaults->Set(SearchParams::kCpuctFactorId.GetId(), 0.0f); defaults->Set(SearchParams::kPolicySoftmaxTempId.GetId(), 1.0f); From 10be03905bd495fb35ab9278eb60a19387255c86 Mon Sep 17 00:00:00 2001 From: Naphthalin <40385638+Naphthalin@users.noreply.github.com> Date: Wed, 11 Mar 2020 19:19:54 +0100 Subject: [PATCH 053/151] Refactor moves left head parameters into factor and slope (#1110) * Refactored moves left head parameters into factor (unchanged) and slope instead of scale. * fixed line length * Changed MovesLeftFactor to MovesLeftMaxEffect Co-authored-by: Naphthalin --- src/mcts/params.cc | 26 ++++++++++++-------------- src/mcts/params.h | 12 ++++++------ src/mcts/search.cc | 7 ++++--- 3 files changed, 22 insertions(+), 23 deletions(-) diff --git a/src/mcts/params.cc b/src/mcts/params.cc index d4bbbdb02d..f355ef6ff6 100644 --- a/src/mcts/params.cc +++ b/src/mcts/params.cc @@ -208,22 +208,20 @@ const OptionId SearchParams::kHistoryFillId{ "one. During the first moves of the game such historical positions don't " "exist, but they can be synthesized. This parameter defines when to " "synthesize them (always, never, or only at non-standard fen position)."}; -const OptionId SearchParams::kMovesLeftFactorId{ - "moves-left-factor", "MovesLeftFactor", - "Bonus to add to the score of a node based on how much shorter/longer " - "it makes when winning/losing."}; +const OptionId SearchParams::kMovesLeftMaxEffectId{ + "moves-left-max-effect", "MovesLeftMaxEffect", + "Maximum bonus to add to the score of a node based on how much " + "shorter/longer it makes the game when winning/losing."}; const OptionId SearchParams::kMovesLeftThresholdId{ "moves-left-threshold", "MovesLeftThreshold", "Absolute value of node Q needs to exceed this value before shorter wins " "or longer losses are considered."}; -const OptionId SearchParams::kMovesLeftScaleId{ - "moves-left-scale", "MovesLeftScale", +const OptionId SearchParams::kMovesLeftSlopeId{ + "moves-left-slope", "MovesLeftSlope", "Controls how the bonus for shorter wins or longer losses is adjusted " "based on how many moves the move is estimated to shorten/lengthen the " - "game. The move shortening/lengthening the game by this amount of plies " - "or more compared to the best node, gets the full MovesLeftFactor bonus " - "added. Moves shortening/lengthening by less amount of moves have bonus " - "scaled linearly."}; + "game. The move difference is multiplied with the slope and capped at " + "MovesLeftMaxEffect."}; const OptionId SearchParams::kShortSightednessId{ "short-sightedness", "ShortSightedness", "Used to focus more on short term gains over long term."}; @@ -297,9 +295,9 @@ void SearchParams::Populate(OptionsParser* options) { options->Add(kScoreTypeId, score_type) = "centipawn"; std::vector history_fill_opt{"no", "fen_only", "always"}; options->Add(kHistoryFillId, history_fill_opt) = "fen_only"; - options->Add(kMovesLeftFactorId, 0.0f, 1.0f) = 0.0f; + options->Add(kMovesLeftMaxEffectId, 0.0f, 1.0f) = 0.0f; options->Add(kMovesLeftThresholdId, 0.0f, 1.0f) = 1.0f; - options->Add(kMovesLeftScaleId, 1.0f, 100.0f) = 10.0f; + options->Add(kMovesLeftSlopeId, 0.0f, 1.0f) = 0.001f; options->Add(kShortSightednessId, 0.0f, 1.0f) = 0.0f; options->Add(kDisplayCacheUsageId) = false; options->Add(kMaxConcurrentSearchersId, 0, 128) = 1; @@ -358,9 +356,9 @@ SearchParams::SearchParams(const OptionsDict& options) kHistoryFill( EncodeHistoryFill(options.Get(kHistoryFillId.GetId()))), kMiniBatchSize(options.Get(kMiniBatchSizeId.GetId())), - kMovesLeftFactor(options.Get(kMovesLeftFactorId.GetId())), + kMovesLeftMaxEffect(options.Get(kMovesLeftMaxEffectId.GetId())), kMovesLeftThreshold(options.Get(kMovesLeftThresholdId.GetId())), - kMovesLeftScale(options.Get(kMovesLeftScaleId.GetId())), + kMovesLeftSlope(options.Get(kMovesLeftSlopeId.GetId())), kShortSightedness(options.Get(kShortSightednessId.GetId())), kDisplayCacheUsage(options.Get(kDisplayCacheUsageId.GetId())), kMaxConcurrentSearchers( diff --git a/src/mcts/params.h b/src/mcts/params.h index 1634f9a0dc..1566bad472 100644 --- a/src/mcts/params.h +++ b/src/mcts/params.h @@ -103,9 +103,9 @@ class SearchParams { return options_.Get(kScoreTypeId.GetId()); } FillEmptyHistory GetHistoryFill() const { return kHistoryFill; } - float GetMovesLeftFactor() const { return kMovesLeftFactor; } + float GetMovesLeftMaxEffect() const { return kMovesLeftMaxEffect; } float GetMovesLeftThreshold() const { return kMovesLeftThreshold; } - float GetMovesLeftScale() const { return kMovesLeftScale; } + float GetMovesLeftSlope() const { return kMovesLeftSlope; } bool GetDisplayCacheUsage() const { return kDisplayCacheUsage; } int GetMaxConcurrentSearchers() const { return kMaxConcurrentSearchers; } float GetSidetomoveDrawScore() const { return kDrawScoreSidetomove; } @@ -152,9 +152,9 @@ class SearchParams { static const OptionId kPerPvCountersId; static const OptionId kScoreTypeId; static const OptionId kHistoryFillId; - static const OptionId kMovesLeftFactorId; + static const OptionId kMovesLeftMaxEffectId; static const OptionId kMovesLeftThresholdId; - static const OptionId kMovesLeftScaleId; + static const OptionId kMovesLeftSlopeId; static const OptionId kShortSightednessId; static const OptionId kDisplayCacheUsageId; static const OptionId kMaxConcurrentSearchersId; @@ -194,9 +194,9 @@ class SearchParams { const bool kSyzygyFastPlay; const FillEmptyHistory kHistoryFill; const int kMiniBatchSize; - const float kMovesLeftFactor; + const float kMovesLeftMaxEffect; const float kMovesLeftThreshold; - const float kMovesLeftScale; + const float kMovesLeftSlope; const float kShortSightedness; const bool kDisplayCacheUsage; const int kMaxConcurrentSearchers; diff --git a/src/mcts/search.cc b/src/mcts/search.cc index b84fabdeea..6ff1a465fa 100644 --- a/src/mcts/search.cc +++ b/src/mcts/search.cc @@ -1005,11 +1005,12 @@ SearchWorker::NodeToProcess SearchWorker::PickNodeToExtend( float M = 0.0f; if (do_moves_left_adjustment) { - const float m_scale = params_.GetMovesLeftScale(); + const float m_slope = params_.GetMovesLeftSlope(); + const float m_cap = params_.GetMovesLeftMaxEffect(); const float parent_m = node->GetM(); const float child_m = child.GetM(parent_m); - M = std::clamp(child_m - parent_m, -m_scale, m_scale) / m_scale * - std::copysign(params_.GetMovesLeftFactor(), node_q); + M = std::clamp(m_slope * (child_m - parent_m), -m_cap, m_cap) * + std::copysign(1.0f, node_q); } const float Q = child.GetQ(fpu, draw_score, params_.GetLogitQ()); From e1b8cf03d2dc878d791cd81f6da4eaaa60281bff Mon Sep 17 00:00:00 2001 From: borg323 <39573933+borg323@users.noreply.github.com> Date: Fri, 13 Mar 2020 09:51:44 +0200 Subject: [PATCH 054/151] minor changes (#1124) --- src/mcts/params.cc | 7 ++++--- src/mcts/search.cc | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/mcts/params.cc b/src/mcts/params.cc index f355ef6ff6..fccff5eac7 100644 --- a/src/mcts/params.cc +++ b/src/mcts/params.cc @@ -369,9 +369,10 @@ SearchParams::SearchParams(const OptionsDict& options) 100.0f}, kDrawScoreWhite{options.Get(kDrawScoreWhiteId.GetId()) / 100.0f}, kDrawScoreBlack{options.Get(kDrawScoreBlackId.GetId()) / 100.0f}, - kMaxOutOfOrderEvals( - std::max(1, int(options.Get(kMaxOutOfOrderEvalsId.GetId()) * - options.Get(kMiniBatchSizeId.GetId())))) { + kMaxOutOfOrderEvals(std::max( + 1, + static_cast(options.Get(kMaxOutOfOrderEvalsId.GetId()) * + options.Get(kMiniBatchSizeId.GetId())))) { if (std::max(std::abs(kDrawScoreSidetomove), std::abs(kDrawScoreOpponent)) + std::max(std::abs(kDrawScoreWhite), std::abs(kDrawScoreBlack)) > 1.0f) { diff --git a/src/mcts/search.cc b/src/mcts/search.cc index 6ff1a465fa..5b743739f8 100644 --- a/src/mcts/search.cc +++ b/src/mcts/search.cc @@ -553,7 +553,7 @@ std::vector Search::GetBestChildrenNoTemperature(Node* parent, return a.GetM(0.0f) > b.GetM(0.0f); }); - if (count < edges.size()) { + if (count < static_cast(edges.size())) { edges.resize(count); } return edges; From 89fcfd12d05daf92f6da4fbdcf1fcccee698cbf2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Forst=C3=A9n?= Date: Fri, 13 Mar 2020 15:16:03 +0200 Subject: [PATCH 055/151] Moves left head support for blas (#1107) * Moves left head support for blas * Fix review comments --- src/neural/blas/network_blas.cc | 120 ++++++++++++++++++++++---------- 1 file changed, 82 insertions(+), 38 deletions(-) diff --git a/src/neural/blas/network_blas.cc b/src/neural/blas/network_blas.cc index 76f8e8440a..626d136fb9 100644 --- a/src/neural/blas/network_blas.cc +++ b/src/neural/blas/network_blas.cc @@ -47,7 +47,8 @@ namespace { class BlasComputation : public NetworkComputation { public: BlasComputation(const LegacyWeights& weights, const size_t max_batch_size, - const bool wdl, const bool conv_policy, const int blas_cores); + const bool wdl, const bool moves_left, const bool conv_policy, + const int blas_cores); virtual ~BlasComputation() {} @@ -80,8 +81,12 @@ class BlasComputation : public NetworkComputation { } } - float GetMVal(int /* sample */) const override { - return 0.0f; + float GetMVal(int sample) const override { + if (moves_left_) { + return m_values_[sample]; + } else { + return 0.0f; + } } // Returns P value @move_id of @sample. @@ -105,7 +110,9 @@ class BlasComputation : public NetworkComputation { std::vector planes_; std::vector> policies_; std::vector q_values_; + std::vector m_values_; bool wdl_; + bool moves_left_; bool conv_policy_; }; @@ -116,7 +123,8 @@ class BlasNetwork : public Network { std::unique_ptr NewComputation() override { return std::make_unique(weights_, max_batch_size_, wdl_, - conv_policy_, blas_cores_); + moves_left_, conv_policy_, + blas_cores_); } const NetworkCapabilities& GetCapabilities() const override { @@ -131,18 +139,21 @@ class BlasNetwork : public Network { LegacyWeights weights_; size_t max_batch_size_; bool wdl_; + bool moves_left_; bool conv_policy_; int blas_cores_; }; BlasComputation::BlasComputation(const LegacyWeights& weights, const size_t max_batch_size, const bool wdl, - const bool conv_policy, const int blas_cores) + const bool moves_left, const bool conv_policy, + const int blas_cores) : weights_(weights), max_batch_size_(max_batch_size), policies_(0), q_values_(0), wdl_(wdl), + moves_left_(moves_left), conv_policy_(conv_policy) { #ifdef USE_DNNL omp_set_num_threads(blas_cores); @@ -155,9 +166,11 @@ BlasComputation::BlasComputation(const LegacyWeights& weights, void BlasComputation::ComputeBlocking() { // Retrieve network key dimensions from the weights structure. const auto num_value_channels = weights_.ip1_val_b.size(); + const auto num_moves_channels = weights_.ip1_mov_b.size(); const auto num_value_input_planes = weights_.value.biases.size(); const auto num_policy_input_planes = weights_.policy.biases.size(); - const auto num_output_policy = kPolicyOutputs; + const auto num_moves_input_planes = weights_.moves_left.biases.size(); + const auto num_output_policy = static_cast(kPolicyOutputs); const auto output_channels = weights_.input.biases.size(); // max_channels is the maximum number of input channels of any @@ -188,8 +201,9 @@ void BlasComputation::ComputeBlocking() { */ // Allocate data for the whole batch. - std::vector output_val(largest_batch_size * num_value_channels); - std::vector output_pol(largest_batch_size * num_output_policy); + size_t max_fc_channels = std::max( + num_value_channels, std::max(num_output_policy, num_moves_channels)); + std::vector output_fc(largest_batch_size * max_fc_channels); std::vector res_buffer1(largest_batch_size * max_channels * kSquares); std::vector res_buffer2(largest_batch_size * output_channels * @@ -200,10 +214,11 @@ void BlasComputation::ComputeBlocking() { WinogradConvolution3 convolve3(largest_batch_size, max_channels, max_output_channels); - std::vector policy_buffer(largest_batch_size * - num_policy_input_planes * kSquares); - std::vector value_buffer(largest_batch_size * num_value_input_planes * - kSquares); + size_t max_head_planes = + std::max(num_policy_input_planes, + std::max(num_value_input_planes, num_moves_input_planes)); + std::vector head_buffer(largest_batch_size * max_head_planes * + kSquares); // These ones will rotate during the computation. float* conv_in = res_buffer1.data(); @@ -272,10 +287,10 @@ void BlasComputation::ComputeBlocking() { convolve3.Forward(batch_size, output_channels, num_policy_input_planes, res, weights_.policy.weights.data(), - policy_buffer.data()); + head_buffer.data()); BiasResidualRelu(batch_size, num_policy_input_planes, - &policy_buffer.data()[0], weights_.policy.biases.data(), + &head_buffer.data()[0], weights_.policy.biases.data(), nullptr, false); // Mapping from convolutional policy to lc0 policy @@ -283,57 +298,57 @@ void BlasComputation::ComputeBlocking() { for (auto i = 0; i < kPolicyUsedPlanes * kSquares; i++) { auto j = kConvPolicyMap[i]; if (j >= 0) { - output_pol[batch * num_output_policy + j] = - policy_buffer[batch * num_policy_input_planes * kSquares + i]; + output_fc[batch * num_output_policy + j] = + head_buffer[batch * num_policy_input_planes * kSquares + i]; } } } } else { - Convolution1::Forward( - batch_size, output_channels, num_policy_input_planes, conv_out, - weights_.policy.weights.data(), policy_buffer.data()); + Convolution1::Forward(batch_size, output_channels, + num_policy_input_planes, conv_out, + weights_.policy.weights.data(), head_buffer.data()); - BiasResidualRelu(batch_size, num_policy_input_planes, &policy_buffer[0], + BiasResidualRelu(batch_size, num_policy_input_planes, &head_buffer[0], weights_.policy.biases.data()); FullyConnectedLayer::Forward1D( batch_size, num_policy_input_planes * kSquares, num_output_policy, - policy_buffer.data(), weights_.ip_pol_w.data(), + head_buffer.data(), weights_.ip_pol_w.data(), weights_.ip_pol_b.data(), false, // Relu Off - output_pol.data()); + output_fc.data()); + } + + for (size_t j = 0; j < batch_size; j++) { + std::vector policy(num_output_policy); + + // Get the moves + policy.assign(output_fc.begin() + j * num_output_policy, + output_fc.begin() + (j + 1) * num_output_policy); + policies_.emplace_back(std::move(policy)); } // Value head Convolution1::Forward(batch_size, output_channels, num_value_input_planes, conv_out, weights_.value.weights.data(), - value_buffer.data()); + head_buffer.data()); - BiasResidualRelu(batch_size, num_value_input_planes, &value_buffer[0], + BiasResidualRelu(batch_size, num_value_input_planes, &head_buffer[0], weights_.value.biases.data()); FullyConnectedLayer::Forward1D( batch_size, num_value_input_planes * kSquares, num_value_channels, - value_buffer.data(), weights_.ip1_val_w.data(), + head_buffer.data(), weights_.ip1_val_w.data(), weights_.ip1_val_b.data(), true, // Relu On - output_val.data()); - - for (size_t j = 0; j < batch_size; j++) { - std::vector policy(num_output_policy); - - // Get the moves - policy.assign(output_pol.begin() + j * num_output_policy, - output_pol.begin() + (j + 1) * num_output_policy); - policies_.emplace_back(std::move(policy)); - } + output_fc.data()); // Now get the score if (wdl_) { std::vector wdl(3 * batch_size); FullyConnectedLayer::Forward1D( - batch_size, num_value_channels, 3, output_val.data(), + batch_size, num_value_channels, 3, output_fc.data(), weights_.ip2_val_w.data(), weights_.ip2_val_b.data(), false, // Relu Off wdl.data()); @@ -350,12 +365,38 @@ void BlasComputation::ComputeBlocking() { for (size_t j = 0; j < batch_size; j++) { double winrate = FullyConnectedLayer::Forward0D( num_value_channels, weights_.ip2_val_w.data(), - &output_val[j * num_value_channels]) + + &output_fc[j * num_value_channels]) + weights_.ip2_val_b[0]; q_values_.emplace_back(std::tanh(winrate)); } } + if (moves_left_) { + Convolution1::Forward(batch_size, output_channels, num_moves_input_planes, + conv_out, weights_.moves_left.weights.data(), + head_buffer.data()); + + BiasResidualRelu(batch_size, num_moves_input_planes, &head_buffer[0], + weights_.moves_left.biases.data()); + + FullyConnectedLayer::Forward1D( + batch_size, num_moves_input_planes * kSquares, num_moves_channels, + head_buffer.data(), weights_.ip1_mov_w.data(), + weights_.ip1_mov_b.data(), + true, // Relu On + output_fc.data()); + + std::vector output_moves_left(batch_size); + FullyConnectedLayer::Forward1D( + batch_size, num_moves_channels, 1, output_fc.data(), + weights_.ip2_mov_w.data(), weights_.ip2_mov_b.data(), + true, // Relu On + output_moves_left.data()); + + for (size_t j = 0; j < batch_size; j++) { + m_values_.emplace_back(output_moves_left[j]); + } + } } } @@ -369,7 +410,7 @@ void BlasComputation::EncodePlanes(const InputPlanes& sample, float* buffer) { BlasNetwork::BlasNetwork(const WeightsFile& file, const OptionsDict& options) : capabilities_{file.format().network_format().input(), - pblczero::NetworkFormat::MOVES_LEFT_NONE}, + file.format().network_format().moves_left()}, weights_(file.weights()) { #ifndef USE_EIGEN blas_cores_ = options.GetOrDefault("blas_cores", 1); @@ -380,6 +421,9 @@ BlasNetwork::BlasNetwork(const WeightsFile& file, const OptionsDict& options) wdl_ = file.format().network_format().value() == pblczero::NetworkFormat::VALUE_WDL; + moves_left_ = file.format().network_format().moves_left() == + pblczero::NetworkFormat::MOVES_LEFT_V1; + conv_policy_ = file.format().network_format().policy() == pblczero::NetworkFormat::POLICY_CONVOLUTION; From 4e93d8b9108624dc17bbc35fd4ba9aa99b55914a Mon Sep 17 00:00:00 2001 From: cn4750 Date: Sat, 14 Mar 2020 21:08:05 -0400 Subject: [PATCH 056/151] Update references to the newest release (#1125) Update our readme to use references to the latest 0.24 release instead of the older 0.23 release. --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 2d01fe5f0a..2d95514926 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ Lc0 is a UCI-compliant chess engine designed to play chess via neural network, s Lc0 can be acquired either via a git clone or an archive download from GitHub. Be aware that there is a required submodule which isn't included in source archives. -For essentially all purposes, including selfplay game generation and match play, we highly recommend using the latest `release/version` branch (for example `release/0.23`), which is equivalent to using the latest version tag. +For essentially all purposes, including selfplay game generation and match play, we highly recommend using the latest `release/version` branch (for example `release/0.24`), which is equivalent to using the latest version tag. Versioning follows the Semantic Versioning guidelines, with major, minor and patch sections. The training server enforces game quality using the versions output by the client and engine. @@ -17,11 +17,11 @@ Versioning follows the Semantic Versioning guidelines, with major, minor and pat Download using git: ``` -git clone -b release/0.23 --recurse-submodules https://github.com/LeelaChessZero/lc0.git +git clone -b release/0.24 --recurse-submodules https://github.com/LeelaChessZero/lc0.git ``` If you prefer to download an archive, you need to also download and place the submodule: - * Download the [.zip](https://api.github.com/repos/LeelaChessZero/lc0/zipball/release/0.23) file ([.tar.gz](https://api.github.com/repos/LeelaChessZero/lc0/tarball/release/0.23) archive is also available) + * Download the [.zip](https://api.github.com/repos/LeelaChessZero/lc0/zipball/release/0.24) file ([.tar.gz](https://api.github.com/repos/LeelaChessZero/lc0/tarball/release/0.24) archive is also available) * Extract * Download https://github.com/LeelaChessZero/lczero-common/archive/master.zip (also available as [.tar.gz](https://github.com/LeelaChessZero/lczero-common/archive/master.tar.gz)) * Move the second archive into the first archive's `libs/lczero-common/` folder and extract From 668b2f2460d13fd35ae7ef6614cc2aaac5510a5a Mon Sep 17 00:00:00 2001 From: Tilps Date: Sun, 15 Mar 2020 21:17:57 +1100 Subject: [PATCH 057/151] Remove default for logit_q parameter to GetQ (#1129) * Remove default for logit_q parameter to GetQ Also include a fix draw_score being declared bool in GetBestRootChildWithTemperature * Review feedback. * Fix missed comment. --- src/mcts/node.h | 6 +++--- src/mcts/search.cc | 38 ++++++++++++++++++++++---------------- 2 files changed, 25 insertions(+), 19 deletions(-) diff --git a/src/mcts/node.h b/src/mcts/node.h index e1a51c39c5..bc089bf4ac 100644 --- a/src/mcts/node.h +++ b/src/mcts/node.h @@ -345,7 +345,7 @@ class EdgeAndNode { Node* node() const { return node_; } // Proxy functions for easier access to node/edge. - float GetQ(float default_q, float draw_score, bool logit_q = false) const { + float GetQ(float default_q, float draw_score, bool logit_q) const { return (node_ && node_->GetN() > 0) ? // Scale Q slightly to avoid logit(1) = infinity. @@ -382,8 +382,8 @@ class EdgeAndNode { } int GetVisitsToReachU(float target_score, float numerator, float default_q, - bool logit_q) const { - const auto q = GetQ(default_q, logit_q); + float draw_score, bool logit_q) const { + const auto q = GetQ(default_q, draw_score, logit_q); if (q >= target_score) return std::numeric_limits::max(); const auto n1 = GetNStarted() + 1; return std::max( diff --git a/src/mcts/search.cc b/src/mcts/search.cc index 5b743739f8..3607bd6360 100644 --- a/src/mcts/search.cc +++ b/src/mcts/search.cc @@ -137,7 +137,7 @@ void Search::SendUciInfo() REQUIRES(nodes_mutex_) { const auto wl = edge.GetWL(); const auto d = edge.GetD(); const int w = static_cast(std::round(500.0 * (1.0 + wl - d))); - const auto q = edge.GetQ(default_q, draw_score); + const auto q = edge.GetQ(default_q, draw_score, /* logit_q= */ false); if (edge.IsTerminal() && wl != 0.0f) { uci_info.mate = std::copysign( std::round(edge.GetM(0.0f)) / 2 + (edge.IsTbTerminal() ? 101 : 1), @@ -252,13 +252,16 @@ std::vector Search::GetVerboseStats(Node* node) const { std::vector edges; for (const auto& edge : node->Edges()) edges.push_back(edge); - std::sort(edges.begin(), edges.end(), - [&fpu, &U_coeff, &logit_q](EdgeAndNode a, EdgeAndNode b) { - return std::forward_as_tuple( - a.GetN(), a.GetQ(fpu, logit_q) + a.GetU(U_coeff)) < - std::forward_as_tuple( - b.GetN(), b.GetQ(fpu, logit_q) + b.GetU(U_coeff)); - }); + std::sort( + edges.begin(), edges.end(), + [&fpu, &U_coeff, &logit_q, &draw_score](EdgeAndNode a, EdgeAndNode b) { + return std::forward_as_tuple( + a.GetN(), + a.GetQ(fpu, draw_score, logit_q) + a.GetU(U_coeff)) < + std::forward_as_tuple( + b.GetN(), + b.GetQ(fpu, draw_score, logit_q) + b.GetU(U_coeff)); + }); std::vector infos; for (const auto& edge : edges) { @@ -286,7 +289,7 @@ std::vector Search::GetVerboseStats(Node* node) const { << ") "; oss << "(Q: " << std::setw(8) << std::setprecision(5) - << edge.GetQ(fpu, draw_score) << ") "; + << edge.GetQ(fpu, draw_score, /* logit_q= */ false) << ") "; oss << "(U: " << std::setw(6) << std::setprecision(5) << edge.GetU(U_coeff) << ") "; @@ -569,7 +572,7 @@ EdgeAndNode Search::GetBestChildNoTemperature(Node* parent) const { // count. EdgeAndNode Search::GetBestRootChildWithTemperature(float temperature) const { // Root is at even depth. - const bool draw_score = GetDrawScore(/* is_odd_depth= */ false); + const float draw_score = GetDrawScore(/* is_odd_depth= */ false); MoveList root_limit; PopulateRootMoveLimit(&root_limit); @@ -588,7 +591,7 @@ EdgeAndNode Search::GetBestRootChildWithTemperature(float temperature) const { } if (edge.GetN() + offset > max_n) { max_n = edge.GetN() + offset; - max_eval = edge.GetQ(fpu, draw_score); + max_eval = edge.GetQ(fpu, draw_score, /* logit_q= */ false); } } @@ -603,7 +606,7 @@ EdgeAndNode Search::GetBestRootChildWithTemperature(float temperature) const { edge.GetMove()) == root_limit.end()) { continue; } - if (edge.GetQ(fpu, draw_score) < min_eval) continue; + if (edge.GetQ(fpu, draw_score, /* logit_q= */ false) < min_eval) continue; sum += std::pow( std::max(0.0f, (static_cast(edge.GetN()) + offset) / max_n), 1 / temperature); @@ -621,7 +624,7 @@ EdgeAndNode Search::GetBestRootChildWithTemperature(float temperature) const { edge.GetMove()) == root_limit.end()) { continue; } - if (edge.GetQ(fpu, draw_score) < min_eval) continue; + if (edge.GetQ(fpu, draw_score, /* logit_q= */ false) < min_eval) continue; if (idx-- == 0) return edge; } assert(false); @@ -1028,7 +1031,7 @@ SearchWorker::NodeToProcess SearchWorker::PickNodeToExtend( if (second_best_edge) { int estimated_visits_to_change_best = best_edge.GetVisitsToReachU( - second_best, puct_mult, fpu, params_.GetLogitQ()); + second_best, puct_mult, fpu, draw_score, params_.GetLogitQ()); // Only cache for n-2 steps as the estimate created by GetVisitsToReachU // has potential rounding errors and some conservative logic that can push // it up to 2 away from the real value. @@ -1222,7 +1225,9 @@ int SearchWorker::PrefetchIntoCache(Node* node, int budget, bool is_odd_depth) { for (auto edge : node->Edges()) { if (edge.GetP() == 0.0f) continue; // Flip the sign of a score to be able to easily sort. - scores.emplace_back(-edge.GetU(puct_mult) - edge.GetQ(fpu, draw_score), + // TODO: should this use logit_q if set?? + scores.emplace_back(-edge.GetU(puct_mult) - + edge.GetQ(fpu, draw_score, /* logit_q= */ false), edge); } @@ -1253,7 +1258,8 @@ int SearchWorker::PrefetchIntoCache(Node* node, int budget, bool is_odd_depth) { if (i != scores.size() - 1) { // Sign of the score was flipped for sorting, so flip it back. const float next_score = -scores[i + 1].first; - const float q = edge.GetQ(-fpu, draw_score); + // TODO: As above - should this use logit_q if set? + const float q = edge.GetQ(-fpu, draw_score, /* logit_q= */ false); if (next_score > q) { budget_to_spend = std::min(budget, int(edge.GetP() * puct_mult / (next_score - q) - From 4b92c366af9d987d76018b42c38689ef94b68728 Mon Sep 17 00:00:00 2001 From: Timofey Kondrashov Date: Wed, 18 Mar 2020 17:13:42 -0400 Subject: [PATCH 058/151] make pawn mask code slightly less disruptive (#1133) --- src/chess/board.cc | 8 ++------ src/chess/board.h | 5 +++-- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/src/chess/board.cc b/src/chess/board.cc index a33eb65992..7e780fe5b6 100644 --- a/src/chess/board.cc +++ b/src/chess/board.cc @@ -49,6 +49,8 @@ const char* ChessBoard::kStartposFen = const ChessBoard ChessBoard::kStartposBoard(ChessBoard::kStartposFen); +const BitBoard ChessBoard::kPawnMask = 0x00FFFFFFFFFFFF00ULL; + void ChessBoard::Clear() { std::memset(reinterpret_cast(this), 0, sizeof(ChessBoard)); } @@ -68,8 +70,6 @@ void ChessBoard::Mirror() { } namespace { -static const BitBoard kPawnMask = 0x00FFFFFFFFFFFF00ULL; - static const std::pair kKingMoves[] = { {-1, -1}, {-1, 0}, {-1, 1}, {0, -1}, {0, 1}, {1, -1}, {1, 0}, {1, 1}}; @@ -427,10 +427,6 @@ void InitializeMagicBitboards() { kBishopDirections); } -BitBoard ChessBoard::pawns() const { return pawns_ & kPawnMask; } - -BitBoard ChessBoard::en_passant() const { return pawns_ - pawns(); } - MoveList ChessBoard::GeneratePseudolegalMoves() const { MoveList result; result.reserve(60); diff --git a/src/chess/board.h b/src/chess/board.h index 77bd6e6b9c..1b213aa0f3 100644 --- a/src/chess/board.h +++ b/src/chess/board.h @@ -64,6 +64,7 @@ class ChessBoard { static const char* kStartposFen; static const ChessBoard kStartposBoard; + static const BitBoard kPawnMask; // Sets position from FEN string. // If @no_capture_ply and @moves are not nullptr, they are filled with number @@ -181,8 +182,8 @@ class ChessBoard { BitBoard ours() const { return our_pieces_; } BitBoard theirs() const { return their_pieces_; } - BitBoard pawns() const; - BitBoard en_passant() const; + BitBoard pawns() const { return pawns_ & kPawnMask; } + BitBoard en_passant() const { return pawns_ - kPawnMask; } BitBoard bishops() const { return bishops_ - rooks_; } BitBoard rooks() const { return rooks_ - bishops_; } BitBoard queens() const { return rooks_ & bishops_; } From e4af120aa6c3afc61111d6bcc7a4654bde33bf9f Mon Sep 17 00:00:00 2001 From: Timofey Kondrashov Date: Thu, 19 Mar 2020 15:26:56 -0400 Subject: [PATCH 059/151] Match knights() call more cleanly with other piece calls (#1131) --- src/chess/board.cc | 2 +- src/chess/board.h | 8 +++----- src/chess/pgn.h | 2 +- src/neural/encoder.cc | 4 ++-- src/syzygy/syzygy.cc | 4 ++-- 5 files changed, 9 insertions(+), 11 deletions(-) diff --git a/src/chess/board.cc b/src/chess/board.cc index 7e780fe5b6..e72ed2f914 100644 --- a/src/chess/board.cc +++ b/src/chess/board.cc @@ -1109,7 +1109,7 @@ bool ChessBoard::HasMatingMaterial() const { // K v K, K+B v K, K+N v K. return false; } - if (!our_knights().empty() || !their_knights().empty()) { + if (!(knights().empty())) { return true; } diff --git a/src/chess/board.h b/src/chess/board.h index 1b213aa0f3..61f994d1e0 100644 --- a/src/chess/board.h +++ b/src/chess/board.h @@ -187,11 +187,9 @@ class ChessBoard { BitBoard bishops() const { return bishops_ - rooks_; } BitBoard rooks() const { return rooks_ - bishops_; } BitBoard queens() const { return rooks_ & bishops_; } - BitBoard our_knights() const { - return our_pieces_ - pawns() - our_king_ - rooks_ - bishops_; - } - BitBoard their_knights() const { - return their_pieces_ - pawns() - their_king_ - rooks_ - bishops_; + BitBoard knights() const { + return (our_pieces_ | their_pieces_) - pawns() - our_king_ - their_king_ - + rooks_ - bishops_; } BitBoard our_king() const { return 1ull << our_king_.as_int(); } BitBoard their_king() const { return 1ull << their_king_.as_int(); } diff --git a/src/chess/pgn.h b/src/chess/pgn.h index b25f37d98a..40fee31951 100644 --- a/src/chess/pgn.h +++ b/src/chess/pgn.h @@ -218,7 +218,7 @@ class PgnReader { } else if (p == 3) { searchBits = (board.bishops() & board.ours()); } else if (p == 4) { - searchBits = board.our_knights(); + searchBits = (board.knights() & board.ours()); } else if (p == 5) { searchBits = (board.rooks() & board.ours()); } diff --git a/src/neural/encoder.cc b/src/neural/encoder.cc index 293030a2b2..e5bda1fecf 100644 --- a/src/neural/encoder.cc +++ b/src/neural/encoder.cc @@ -107,14 +107,14 @@ InputPlanes EncodePositionForNN( const int base = i * kPlanesPerBoard; result[base + 0].mask = (board.ours() & board.pawns()).as_int(); - result[base + 1].mask = (board.our_knights()).as_int(); + result[base + 1].mask = (board.ours() & board.knights()).as_int(); result[base + 2].mask = (board.ours() & board.bishops()).as_int(); result[base + 3].mask = (board.ours() & board.rooks()).as_int(); result[base + 4].mask = (board.ours() & board.queens()).as_int(); result[base + 5].mask = (board.our_king()).as_int(); result[base + 6].mask = (board.theirs() & board.pawns()).as_int(); - result[base + 7].mask = (board.their_knights()).as_int(); + result[base + 7].mask = (board.theirs() & board.knights()).as_int(); result[base + 8].mask = (board.theirs() & board.bishops()).as_int(); result[base + 9].mask = (board.theirs() & board.rooks()).as_int(); result[base + 10].mask = (board.theirs() & board.queens()).as_int(); diff --git a/src/syzygy/syzygy.cc b/src/syzygy/syzygy.cc index 18c224ce61..c168cf4d1c 100644 --- a/src/syzygy/syzygy.cc +++ b/src/syzygy/syzygy.cc @@ -194,7 +194,7 @@ int count_pieces(const ChessBoard& pos, int type, bool theirs) { case BISHOP: return (all & pos.bishops()).count_few(); case KNIGHT: - return (theirs ? pos.their_knights() : pos.our_knights()).count_few(); + return (all & pos.knights()).count_few(); case PAWN: return (all & pos.pawns()).count_few(); default: @@ -215,7 +215,7 @@ BitBoard pieces(const ChessBoard& pos, int type, bool theirs) { case BISHOP: return all & pos.bishops(); case KNIGHT: - return theirs ? pos.their_knights() : pos.our_knights(); + return all & pos.knights(); case PAWN: return all & pos.pawns(); default: From a4b15d7fc756b8610b95bcd5594adfd22ea74ea0 Mon Sep 17 00:00:00 2001 From: Timofey Kondrashov Date: Thu, 19 Mar 2020 15:28:24 -0400 Subject: [PATCH 060/151] Match kings() call more cleanly with other piece calls (#1132) * Match kings() call more cleanly with other piece calls * make bitboard return the appropriate size --- src/chess/bitboard.h | 3 ++- src/chess/board.h | 5 +++-- src/chess/pgn.h | 2 +- src/neural/encoder.cc | 4 ++-- src/syzygy/syzygy.cc | 5 ++--- 5 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/chess/bitboard.h b/src/chess/bitboard.h index 91d4871e99..3f478240ae 100644 --- a/src/chess/bitboard.h +++ b/src/chess/bitboard.h @@ -49,6 +49,7 @@ class BoardSquare { BoardSquare(const std::string& str, bool black = false) : BoardSquare(black ? '8' - str[1] : str[1] - '1', str[0] - 'a') {} constexpr std::uint8_t as_int() const { return square_; } + constexpr std::uint64_t as_board() const { return 1ULL << square_; } void set(int row, int col) { square_ = row * 8 + col; } // 0-based, bottom to top. @@ -221,7 +222,7 @@ class BitBoard { // Returns bitboard with one bit reset. friend BitBoard operator-(const BitBoard& a, const BoardSquare& b) { - return {a.board_ & ~(1ULL << b.as_int())}; + return {a.board_ & ~b.as_board()}; } // Returns difference (bitwise AND-NOT) of two boards. diff --git a/src/chess/board.h b/src/chess/board.h index 61f994d1e0..f53278f801 100644 --- a/src/chess/board.h +++ b/src/chess/board.h @@ -191,8 +191,9 @@ class ChessBoard { return (our_pieces_ | their_pieces_) - pawns() - our_king_ - their_king_ - rooks_ - bishops_; } - BitBoard our_king() const { return 1ull << our_king_.as_int(); } - BitBoard their_king() const { return 1ull << their_king_.as_int(); } + BitBoard kings() const { + return our_king_.as_board() | their_king_.as_board(); + } const Castlings& castlings() const { return castlings_; } bool flipped() const { return flipped_; } diff --git a/src/chess/pgn.h b/src/chess/pgn.h index 40fee31951..94bce12146 100644 --- a/src/chess/pgn.h +++ b/src/chess/pgn.h @@ -212,7 +212,7 @@ class PgnReader { if (p == 0) { searchBits = (board.pawns() & board.ours()); } else if (p == 1) { - searchBits = board.our_king(); + searchBits = (board.kings() & board.ours()); } else if (p == 2) { searchBits = (board.queens() & board.ours()); } else if (p == 3) { diff --git a/src/neural/encoder.cc b/src/neural/encoder.cc index e5bda1fecf..e39642713e 100644 --- a/src/neural/encoder.cc +++ b/src/neural/encoder.cc @@ -111,14 +111,14 @@ InputPlanes EncodePositionForNN( result[base + 2].mask = (board.ours() & board.bishops()).as_int(); result[base + 3].mask = (board.ours() & board.rooks()).as_int(); result[base + 4].mask = (board.ours() & board.queens()).as_int(); - result[base + 5].mask = (board.our_king()).as_int(); + result[base + 5].mask = (board.ours() & board.kings()).as_int(); result[base + 6].mask = (board.theirs() & board.pawns()).as_int(); result[base + 7].mask = (board.theirs() & board.knights()).as_int(); result[base + 8].mask = (board.theirs() & board.bishops()).as_int(); result[base + 9].mask = (board.theirs() & board.rooks()).as_int(); result[base + 10].mask = (board.theirs() & board.queens()).as_int(); - result[base + 11].mask = (board.their_king()).as_int(); + result[base + 11].mask = (board.theirs() & board.kings()).as_int(); const int repetitions = position.GetRepetitions(); if (repetitions >= 1) result[base + 12].SetAll(); diff --git a/src/syzygy/syzygy.cc b/src/syzygy/syzygy.cc index c168cf4d1c..90a74c77ce 100644 --- a/src/syzygy/syzygy.cc +++ b/src/syzygy/syzygy.cc @@ -207,7 +207,7 @@ BitBoard pieces(const ChessBoard& pos, int type, bool theirs) { const BitBoard all = theirs ? pos.theirs() : pos.ours(); switch (type) { case KING: - return theirs ? pos.their_king() : pos.our_king(); + return all & pos.kings(); case QUEEN: return all & pos.queens(); case ROOK: @@ -1321,8 +1321,7 @@ class SyzygyTablebaseImpl { const Key key = calc_key_from_position(pos); // Test for KvK - if (type == WDL && pos.ours() == pos.our_king() && - pos.theirs() == pos.their_king()) { + if (type == WDL && (pos.ours() | pos.theirs()) == pos.kings()) { return 0; } From cf4cc7af721ee14ace8592fbf30875859e7b0eea Mon Sep 17 00:00:00 2001 From: Daniel Uranga Date: Sat, 21 Mar 2020 20:10:20 -0300 Subject: [PATCH 061/151] Update CircleCI image and script (#1138) --- .circleci/Dockerfile | 10 ++++------ .circleci/config.yml | 33 ++++++++++++++++++++++++++------- 2 files changed, 30 insertions(+), 13 deletions(-) diff --git a/.circleci/Dockerfile b/.circleci/Dockerfile index ddf8f8001a..d4ea041b5c 100644 --- a/.circleci/Dockerfile +++ b/.circleci/Dockerfile @@ -1,11 +1,9 @@ FROM floopcz/tensorflow_cc:ubuntu-shared-cuda +ARG DEBIAN_FRONTEND=noninteractive RUN wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB && apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB && sh -c 'echo deb https://apt.repos.intel.com/mkl all main > /etc/apt/sources.list.d/intel-mkl.list' && apt-get update && apt-get install -y intel-mkl-64bit-2018.2-046 -RUN apt-get install -y clang-6.0 ninja-build python3-pip nvidia-opencl-dev libopenblas-dev libboost-dev nvidia-cuda-dev nvidia-cuda-toolkit libgtest-dev git ssh tar gzip ca-certificates sudo +RUN apt-get install -y clang ninja-build python3-pip nvidia-opencl-dev libopenblas-dev libboost-dev libgtest-dev git ssh tar gzip ca-certificates sudo +RUN apt-get install -y g++-8 +RUN apt-get install -y cuda RUN pip3 install meson RUN ln -s /usr/include/ /usr/include/openblas - -RUN curl -OL https://github.com/google/protobuf/releases/download/v3.5.1/protoc-3.5.1-linux-x86_64.zip -RUN unzip protoc-3.5.1-linux-x86_64.zip -d protoc3 -RUN sudo mv protoc3/bin/* /usr/local/bin/ -RUN sudo mv protoc3/include/* /usr/local/include/ diff --git a/.circleci/config.yml b/.circleci/config.yml index d55b7b7af1..69f8b8e476 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -2,7 +2,7 @@ version: 2 jobs: build: docker: - - image: danieluranga/leela_chess_zero-lc0_ubuntu_builder:0.0.4 + - image: danieluranga/leela_chess_zero-lc0_ubuntu_builder:0.0.7 steps: - checkout - run: @@ -11,15 +11,34 @@ jobs: git submodule init git submodule update --remote - run: - name: Build clang version - command: CC=clang-6.0 CXX=clang++-6.0 ./build.sh + name: Create Meson build dirs + command: mkdir build-gcc && mkdir build-clang - run: - command: cp build/release/lc0 /tmp/lc0-clang + name: Meson Clang + environment: + CC: clang + CXX: clang++ + command: meson build-clang - run: - name: Build g++ version - command: ./build.sh + name: Meson GCC + environment: + CC: gcc-8 + CXX: g++-8 + command: meson build-gcc - run: - command: cp build/release/lc0 /tmp/lc0-g++ + name: Build Clang + command: | + cd build-clang + ninja + - run: + name: Build GCC + command: | + cd build-gcc + ninja + - run: + command: cp build-clang/lc0 /tmp/lc0-clang + - run: + command: cp build-gcc/lc0 /tmp/lc0-g++ - store_artifacts: path: /tmp/lc0-clang destination: lc0-ubuntu-18-04-clang From 277f8abc34f793ae55db0e0301410996964910b5 Mon Sep 17 00:00:00 2001 From: Tilps Date: Sun, 22 Mar 2020 20:40:22 +1100 Subject: [PATCH 062/151] Make lc0 output v5 training data. (#1135) * Make lc0 output v5 training data. * Revert some unneeded formatting changes. * Review feedback. --- src/mcts/node.cc | 42 +++++++++++++++++++++++++++++------------- src/mcts/node.h | 10 ++++++---- src/mcts/search.cc | 8 +++++--- src/mcts/search.h | 9 +++++++-- src/neural/writer.cc | 2 +- src/neural/writer.h | 12 ++++++++---- src/selfplay/game.cc | 27 ++++++++++++++++++--------- src/selfplay/game.h | 2 +- 8 files changed, 75 insertions(+), 37 deletions(-) diff --git a/src/mcts/node.cc b/src/mcts/node.cc index 52fcde300b..08755aec98 100644 --- a/src/mcts/node.cc +++ b/src/mcts/node.cc @@ -329,14 +329,16 @@ uint64_t ReverseBitsInBytes(uint64_t v) { } } // namespace -V4TrainingData Node::GetV4TrainingData(GameResult game_result, - const PositionHistory& history, - FillEmptyHistory fill_empty_history, - float best_q, float best_d) const { - V4TrainingData result; +V5TrainingData Node::GetV5TrainingData( + GameResult game_result, const PositionHistory& history, + FillEmptyHistory fill_empty_history, + pblczero::NetworkFormat::InputFormat input_format, float best_q, + float best_d, float best_m) const { + V5TrainingData result; // Set version. - result.version = 4; + result.version = 5; + result.input_format = input_format; // Populate probabilities. auto total_n = GetChildrenVisits(); @@ -357,8 +359,7 @@ V4TrainingData Node::GetV4TrainingData(GameResult game_result, // Populate planes. InputPlanes planes = - EncodePositionForNN(pblczero::NetworkFormat::INPUT_CLASSICAL_112_PLANE, - history, 8, fill_empty_history); + EncodePositionForNN(input_format, history, 8, fill_empty_history); int plane_idx = 0; for (auto& plane : result.planes) { plane = ReverseBitsInBytes(planes[plane_idx++].mask); @@ -367,14 +368,23 @@ V4TrainingData Node::GetV4TrainingData(GameResult game_result, const auto& position = history.Last(); const auto& castlings = position.GetBoard().castlings(); // Populate castlings. - result.castling_us_ooo = castlings.we_can_000() ? 1 : 0; - result.castling_us_oo = castlings.we_can_00() ? 1 : 0; - result.castling_them_ooo = castlings.they_can_000() ? 1 : 0; - result.castling_them_oo = castlings.they_can_00() ? 1 : 0; + // For non-frc trained nets, just send 1 like we used to. + uint8_t queen_side = 1; + uint8_t king_side = 1; + // If frc trained, send the bit mask representing rook position. + if (input_format == pblczero::NetworkFormat::INPUT_112_WITH_CASTLING_PLANE) { + queen_side <<= castlings.queenside_rook(); + king_side <<= castlings.kingside_rook(); + } + + result.castling_us_ooo = castlings.we_can_000() ? queen_side : 0; + result.castling_us_oo = castlings.we_can_00() ? king_side : 0; + result.castling_them_ooo = castlings.they_can_000() ? queen_side : 0; + result.castling_them_oo = castlings.they_can_00() ? king_side : 0; // Other params. result.side_to_move = position.IsBlackToMove() ? 1 : 0; - result.move_count = 0; + result.deprecated_move_count = 0; result.rule50_count = position.GetNoCaptureNoPawnPly(); // Game result. @@ -394,6 +404,12 @@ V4TrainingData Node::GetV4TrainingData(GameResult game_result, result.root_d = GetD(); result.best_d = best_d; + result.root_m = GetM(); + result.best_m = best_m; + + // Unknown here - will be filled in once the full data has been collected. + result.plies_left = 0; + return result; } diff --git a/src/mcts/node.h b/src/mcts/node.h index bc089bf4ac..d01fe072b8 100644 --- a/src/mcts/node.h +++ b/src/mcts/node.h @@ -38,6 +38,7 @@ #include "chess/position.h" #include "neural/encoder.h" #include "neural/writer.h" +#include "proto/net.pb.h" #include "utils/fastmath.h" #include "utils/mutex.h" @@ -215,10 +216,11 @@ class Node { // in depth parameter, and returns true if it was indeed updated. bool UpdateFullDepth(uint16_t* depth); - V4TrainingData GetV4TrainingData(GameResult result, - const PositionHistory& history, - FillEmptyHistory fill_empty_history, - float best_q, float best_d) const; + V5TrainingData GetV5TrainingData( + GameResult result, const PositionHistory& history, + FillEmptyHistory fill_empty_history, + pblczero::NetworkFormat::InputFormat input_format, float best_q, + float best_d, float best_m) const; // Returns range for iterating over edges. ConstIterator Edges() const; diff --git a/src/mcts/search.cc b/src/mcts/search.cc index 3607bd6360..cc2c79d165 100644 --- a/src/mcts/search.cc +++ b/src/mcts/search.cc @@ -398,14 +398,16 @@ void Search::MaybeTriggerStop(const IterationStats& stats, // Return the evaluation of the actual best child, regardless of temperature // settings. This differs from GetBestMove, which does obey any temperature // settings. So, somethimes, they may return results of different moves. -std::pair Search::GetBestEval() const { +Search::BestEval Search::GetBestEval() const { SharedMutex::SharedLock lock(nodes_mutex_); Mutex::Lock counters_lock(counters_mutex_); float parent_wl = -root_node_->GetWL(); float parent_d = root_node_->GetD(); - if (!root_node_->HasChildren()) return {parent_wl, parent_d}; + float parent_m = root_node_->GetM(); + if (!root_node_->HasChildren()) return {parent_wl, parent_d, parent_m}; EdgeAndNode best_edge = GetBestChildNoTemperature(root_node_); - return {best_edge.GetWL(), best_edge.GetD()}; + return {best_edge.GetWL(), best_edge.GetD(), + best_edge.GetM(parent_m - 1) + 1}; } std::pair Search::GetBestMove() { diff --git a/src/mcts/search.h b/src/mcts/search.h index ae6aa03d0a..714ee52f33 100644 --- a/src/mcts/search.h +++ b/src/mcts/search.h @@ -77,11 +77,16 @@ class Search { // Returns best move, from the point of view of white player. And also ponder. // May or may not use temperature, according to the settings. std::pair GetBestMove(); + + struct BestEval { + float wl; + float d; + float ml; + }; // Returns the evaluation of the best move, WITHOUT temperature. This differs // from the above function; with temperature enabled, these two functions may // return results from different possible moves. - // Returns pair {Q, D}. - std::pair GetBestEval() const; + BestEval GetBestEval() const; // Returns the total number of playouts in the search. std::int64_t GetTotalPlayouts() const; // Returns the search parameters. diff --git a/src/neural/writer.cc b/src/neural/writer.cc index b762d3d319..645be1bac0 100644 --- a/src/neural/writer.cc +++ b/src/neural/writer.cc @@ -51,7 +51,7 @@ TrainingDataWriter::TrainingDataWriter(int game_id) { if (!fout_) throw Exception("Cannot create gzip file " + filename_); } -void TrainingDataWriter::WriteChunk(const V4TrainingData& data) { +void TrainingDataWriter::WriteChunk(const V5TrainingData& data) { auto bytes_written = gzwrite(fout_, reinterpret_cast(&data), sizeof(data)); if (bytes_written != sizeof(data)) { diff --git a/src/neural/writer.h b/src/neural/writer.h index a373621267..337209656a 100644 --- a/src/neural/writer.h +++ b/src/neural/writer.h @@ -35,8 +35,9 @@ namespace lczero { #pragma pack(push, 1) -struct V4TrainingData { +struct V5TrainingData { uint32_t version; + uint32_t input_format; float probabilities[1858]; uint64_t planes[104]; uint8_t castling_us_ooo; @@ -45,14 +46,17 @@ struct V4TrainingData { uint8_t castling_them_oo; uint8_t side_to_move; uint8_t rule50_count; - uint8_t move_count; + uint8_t deprecated_move_count; // left in to keep 8 int8 fields. int8_t result; float root_q; float best_q; float root_d; float best_d; + float root_m; // In plies. + float best_m; // In plies. + float plies_left; // This is the training target for MLH. } PACKED_STRUCT; -static_assert(sizeof(V4TrainingData) == 8292, "Wrong struct size"); +static_assert(sizeof(V5TrainingData) == 8308, "Wrong struct size"); #pragma pack(pop) @@ -67,7 +71,7 @@ class TrainingDataWriter { } // Writes a chunk. - void WriteChunk(const V4TrainingData& data); + void WriteChunk(const V5TrainingData& data); // Flushes file and closes it. void Finalize(); diff --git a/src/selfplay/game.cc b/src/selfplay/game.cc index f67beaff87..4c8caaec09 100644 --- a/src/selfplay/game.cc +++ b/src/selfplay/game.cc @@ -136,23 +136,26 @@ void SelfPlayGame::Play(int white_threads, int black_threads, bool training, nodes_total_ += search_->GetTotalPlayouts(); if (abort_) break; - auto best_eval = search_->GetBestEval(); + const auto best_eval = search_->GetBestEval(); if (training) { // Append training data. The GameResult is later overwritten. - auto best_wl = best_eval.first; - auto best_d = best_eval.second; - training_data_.push_back(tree_[idx]->GetCurrentHead()->GetV4TrainingData( + const auto best_wl = best_eval.wl; + const auto best_d = best_eval.d; + const auto best_m = best_eval.ml; + training_data_.push_back(tree_[idx]->GetCurrentHead()->GetV5TrainingData( GameResult::UNDECIDED, tree_[idx]->GetPositionHistory(), - search_->GetParams().GetHistoryFill(), best_wl, best_d)); + search_->GetParams().GetHistoryFill(), + pblczero::NetworkFormat::INPUT_CLASSICAL_112_PLANE, best_wl, best_d, + best_m)); } - float eval = best_eval.first; + float eval = best_eval.wl; eval = (eval + 1) / 2; if (eval < min_eval_[idx]) min_eval_[idx] = eval; const int move_number = tree_[0]->GetPositionHistory().GetLength() / 2 + 1; - auto best_w = (best_eval.first + 1.0f - best_eval.second) / 2.0f; - auto best_d = best_eval.second; - auto best_l = best_w - best_eval.first; + auto best_w = (best_eval.wl + 1.0f - best_eval.d) / 2.0f; + auto best_d = best_eval.d; + auto best_l = best_w - best_eval.wl; max_eval_[0] = std::max(max_eval_[0], blacks_move ? best_l : best_w); max_eval_[1] = std::max(max_eval_[1], best_d); max_eval_[2] = std::max(max_eval_[2], blacks_move ? best_w : best_l); @@ -271,6 +274,10 @@ void SelfPlayGame::Abort() { } void SelfPlayGame::WriteTrainingData(TrainingDataWriter* writer) const { + if (training_data_.empty()) return; + // Base estimate off of best_m. If needed external processing can use a + // different approach. + float m_estimate = training_data_.back().best_m + training_data_.size() - 1; for (auto chunk : training_data_) { const bool black_to_move = chunk.side_to_move; if (game_result_ == GameResult::WHITE_WON) { @@ -280,6 +287,8 @@ void SelfPlayGame::WriteTrainingData(TrainingDataWriter* writer) const { } else { chunk.result = 0; } + chunk.plies_left = m_estimate; + m_estimate -= 1.0f; writer->WriteChunk(chunk); } } diff --git a/src/selfplay/game.h b/src/selfplay/game.h index df72e4cab6..3d3a0c35a5 100644 --- a/src/selfplay/game.h +++ b/src/selfplay/game.h @@ -116,7 +116,7 @@ class SelfPlayGame { std::mutex mutex_; // Training data to send. - std::vector training_data_; + std::vector training_data_; }; } // namespace lczero From 97b673d1054ffdd532d43c7d13f9121b5b0e8210 Mon Sep 17 00:00:00 2001 From: Tilps Date: Sun, 22 Mar 2020 23:24:21 +1100 Subject: [PATCH 063/151] Connect up input format when generating training data to allow v5 FRC. (#1142) * Connect up input format when generating training data. * Fix bug with FRC encoding to input plane. --- src/neural/encoder.cc | 8 ++++---- src/selfplay/game.cc | 5 +++-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/neural/encoder.cc b/src/neural/encoder.cc index e39642713e..ee1797475c 100644 --- a/src/neural/encoder.cc +++ b/src/neural/encoder.cc @@ -69,12 +69,12 @@ InputPlanes EncodePositionForNN( // h-side (kingside) castling right. const auto& cast = board.castlings(); result[kAuxPlaneBase + 0].mask = - ((cast.we_can_000() ? ChessBoard::A1 : 0) | - (cast.they_can_000() ? ChessBoard::A8 : 0)) + ((cast.we_can_000() ? BoardSquare(ChessBoard::A1).as_board() : 0) | + (cast.they_can_000() ? BoardSquare(ChessBoard::A8).as_board() : 0)) << cast.queenside_rook(); result[kAuxPlaneBase + 1].mask = - ((cast.we_can_00() ? ChessBoard::A1 : 0) | - (cast.they_can_00() ? ChessBoard::A8 : 0)) + ((cast.we_can_00() ? BoardSquare(ChessBoard::A1).as_board() : 0) | + (cast.they_can_00() ? BoardSquare(ChessBoard::A8).as_board() : 0)) << cast.kingside_rook(); break; } diff --git a/src/selfplay/game.cc b/src/selfplay/game.cc index 4c8caaec09..641bfcd689 100644 --- a/src/selfplay/game.cc +++ b/src/selfplay/game.cc @@ -142,10 +142,11 @@ void SelfPlayGame::Play(int white_threads, int black_threads, bool training, const auto best_wl = best_eval.wl; const auto best_d = best_eval.d; const auto best_m = best_eval.ml; + const auto input_format = + options_[idx].network->GetCapabilities().input_format; training_data_.push_back(tree_[idx]->GetCurrentHead()->GetV5TrainingData( GameResult::UNDECIDED, tree_[idx]->GetPositionHistory(), - search_->GetParams().GetHistoryFill(), - pblczero::NetworkFormat::INPUT_CLASSICAL_112_PLANE, best_wl, best_d, + search_->GetParams().GetHistoryFill(), input_format, best_wl, best_d, best_m)); } From ea93c2604c55c2a8f4d711f99d318d4e474f8973 Mon Sep 17 00:00:00 2001 From: Alexander Lyashuk Date: Sun, 22 Mar 2020 22:57:25 +0100 Subject: [PATCH 064/151] Change custom protobuf API (#1141) * Add compile_commands.json to .gitignore * Change protobuf mutation API. * Set has_ variable to true when setting non-message fields. * Address review changes (and some other changes): * In ParseFromString() has_* variables were not set. * No unaligned memory access. * Big endian support (although I'm not sure about float and double) * Strings are passed as std::string_view rather than const std::string& everywhere (not sure if a good idea) * Non-message values are returned by value rather than by const reference. * Make non-message parameters also passed by value rather than const-reference. --- .gitignore | 19 +-- scripts/compile_proto.py | 257 +++++++++++++++++++++----------------- src/neural/loader.cc | 35 ++---- src/utils/protomessage.cc | 203 +++++++----------------------- src/utils/protomessage.h | 106 +++------------- 5 files changed, 231 insertions(+), 389 deletions(-) diff --git a/.gitignore b/.gitignore index 7e1728d1f4..c9c30f98f8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,12 +1,13 @@ -build/ -testdata/ -LC0VSProj/ -CUDA_NN/ -.DS_Store -xcuserdata -subprojects/* !subprojects/*.wrap -lc0.xcodeproj/ *.swp .clang_complete -src/.vs/ \ No newline at end of file +.DS_Store +build/ +compile_commands.json +CUDA_NN/ +lc0.xcodeproj/ +LC0VSProj/ +src/.vs/ +subprojects/* +testdata/ +xcuserdata \ No newline at end of file diff --git a/scripts/compile_proto.py b/scripts/compile_proto.py index 0c40176b75..c1dda56a88 100755 --- a/scripts/compile_proto.py +++ b/scripts/compile_proto.py @@ -30,6 +30,7 @@ 'bytes': 'std::string_view', } ZIGZAG_TYPES = set(['sint32', 'sint64']) +FLOAT_TYPES = set(['float', 'double']) TYPES = {**VARINT_TYPES, **FIXED32_TYPES, **FIXED64_TYPES, **BYTES_TYPES} @@ -166,6 +167,26 @@ def GetCppType(self): else: return '::'.join(self.name) + def GetVariableCppType(self): + if self.IsBytesType(): + return 'std::string' + else: + return self.GetCppType() + + def IsVarintType(self): + return self.typetype == 'enum' or (self.typetype == 'basic' + and self.name in VARINT_TYPES) + + def IsFixedType(self): + return self.typetype == 'basic' and (self.name in FIXED64_TYPES + or self.name in FIXED32_TYPES) + + def IsBytesType(self): + return self.typetype == 'basic' and self.name in BYTES_TYPES + + def IsFloatType(self): + return self.typetype == 'basic' and self.name in FLOAT_TYPES + def GetWireType(self): if self.typetype == 'basic': if self.name in VARINT_TYPES: @@ -184,28 +205,8 @@ def GetWireType(self): else: raise ValueError('Unknown typetype %s' % self.typetype) - def DecodeFunction(self, wire_id, index): - if self.typetype == 'basic': - if self.name == 'double': - return 'GetDoubleVal(%d, %s)' % (wire_id, index) - if self.name == 'float': - return 'GetFloatVal(%d, %s)' % (wire_id, index) - if self.name in VARINT_TYPES: - return 'GetVarintVal(%d, %s)' % (wire_id, index) - if self.name in FIXED64_TYPES: - return 'GetFixed64Val(%d, %s)' % (wire_id, index) - if self.name in BYTES_TYPES: - return 'GetBytesVal(%d, %s)' % (wire_id, index) - if self.name in FIXED32_TYPES: - return 'GetFixed32Val(%d, %s)' % (wire_id, index) - raise ValueError('Unknown type %s' % self.name) - elif self.typetype == 'enum': - return 'GetVarintVal(%d, %s)' % (wire_id, index) - elif self.typetype == 'message': - return '%s::CreateNotOwned(GetBytesVal(%d, %s))' % ( - self.GetCppType(), wire_id, index) - else: - raise ValueError('Unknown typetype %s' % self.typetype) + def IsMessage(self): + return self.typetype == 'message' def IsIntegralType(self): if self.typetype == 'basic': @@ -242,64 +243,82 @@ def __init__(self, lexer, object_stack): def IsType(self): return False - def Generate(self, w): + def GetParser(self): name = self.name.group(0) - index = 'i' if self.category == 'repeated' else 'kLast' - wire_id = self.number * 8 + self.type.GetWireType() - func_body = self.type.DecodeFunction(wire_id, index) + if self.type.IsMessage(): + if self.category == 'repeated': + return 'add_%s()->MergeFromString(val)' % name + else: + return 'mutable_%s()->MergeFromString(val)' % name + cpp_type = self.type.GetCppType() - if self.type.IsZigzag(): - func_body = 'UnZigZag(%s)' % func_body - if self.type.IsIntegralType(): - func_body = 'lczero::kind_of_bit_cast<%s>(%s)' % ( - self.type.GetCppType(), func_body) + val = 'NOT IMPLEMENTED!' + if self.type.IsVarintType(): + val_val = 'UnZigZag(val)' if self.type.IsZigzag() else 'val' + val = 'static_cast<%s>(%s)' % (cpp_type, val_val) + elif self.type.IsFixedType(): + if self.type.IsFloatType(): + val = 'bit_cast<%s>(val)' % cpp_type + else: + val = 'static_cast<%s>(val)' % cpp_type + elif self.type.IsBytesType(): + val = 'val' if self.category == 'repeated': - w.Write('size_t %s_size() const { return WireFieldCount(%d); }' % ( - name, - wire_id, - )) - w.Write('%s %s(size_t i) const { return %s; }' % ( - cpp_type, - name, - func_body, - )) - w.Write('lczero::ProtoIterator<%s> %s() const {' % - (cpp_type, name)) - w.Write(' return lczero::ProtoIterator<%s>(%s_size(), ' - '[this](size_t i) {' % (cpp_type, name)) - w.Write(' return %s;' % func_body) - w.Write(' });') - w.Write('}') + return '%s_.push_back(%s)' % (name, val) + else: + return 'set_%s(%s)' % (name, val) + def GenerateCaseClause(self, w): + w.Write('case %d: %s; break;' % (self.number, self.GetParser())) + + def GenerateClear(self, w): + name = self.name.group(0) + if self.category == 'repeated': + w.Write('%s_.clear();' % name) else: - w.Write('bool has_%s() const { return WireFieldCount(%d) > 0; }' % - ( - name, - wire_id, - )) - w.Write('%s %s() const { return %s; }' % ( - cpp_type, - name, - func_body, - )) - - def GenerateForBuilder(self, w): + w.Write('has_%s_ = false;' % name) + w.Write('%s_ = {};' % name) + + def GenerateFunctions(self, w): name = self.name.group(0) - repeated = self.category == 'repeated' - wire_id = self.number * 8 + self.type.GetWireType() - # w.Write('void clear_%s() { WireFieldClear(%d); }' % (name, wire_id)) - if repeated: - pass + cpp_type = self.type.GetCppType() + if self.category == 'repeated': + if self.type.IsMessage(): + w.Write("%s* add_%s() { return &%s_.emplace_back(); }" % + (cpp_type, name, name)) + w.Write("const std::vector<%s>& %s() const { return %s_; }" % + (cpp_type, name, name)) else: - if self.type.typetype == 'enum': - w.Write('void set_%s(%s val) { WireFieldSetVarint' - '(%d, static_cast(val)); }' % - (name, self.type.GetCppType(), wire_id)) - if self.type.typetype == 'message': - w.Write('void set_%s(const %s& val) { WireFieldSetMessage' - '(%d, val); }' % - (name, self.type.GetCppType(), wire_id)) + w.Write("bool has_%s() const { return has_%s_; }" % (name, name)) + if self.type.IsMessage(): + w.Write("const %s& %s() const { return %s_; }" % + (cpp_type, name, name)) + w.Write("%s* mutable_%s() {" % (cpp_type, name)) + w.Indent() + w.Write('has_%s_ = true;' % (name)) + w.Write('return &%s_;' % name) + w.Unindent() + w.Write("}") + else: + w.Write("%s %s() const { return %s_; }" % + (cpp_type, name, name)) + w.Write("void set_%s(%s val) {" % (name, cpp_type)) + w.Indent() + w.Write("has_%s_ = true;" % name) + w.Write("%s_ = val;" % name) + w.Unindent() + w.Write("}") + + def GenerateVariable(self, w): + name = self.name.group(0) + cpp_type = self.type.GetVariableCppType() + if self.category == 'repeated': + w.Write("std::vector<%s> %s_;" % (cpp_type, name)) + else: + w.Write("bool has_%s_{};" % (name)) + w.Write("%s %s_{};" % (cpp_type, name)) + return class ProtoEnumParser: @@ -339,8 +358,9 @@ def Generate(self, w): class ProtoMessageParser: - def __init__(self, lexer, object_stack): - self.objects = [] + def __init__(self, lexer, type_stack): + self.types = [] + self.fields = [] lexer.Consume('message') self.name = lexer.Consume('identifier').group(0) lexer.Consume('{') @@ -349,13 +369,13 @@ def __init__(self, lexer, object_stack): if token == '}': break elif token == 'message': - self.objects.append( - ProtoMessageParser(lexer, [self.objects, *object_stack])) + self.types.append( + ProtoMessageParser(lexer, [self.types, *type_stack])) elif token == 'enum': - self.objects.append(ProtoEnumParser(lexer)) + self.types.append(ProtoEnumParser(lexer)) elif token in ['repeated', 'optional', 'required']: - self.objects.append( - ProtoFieldParser(lexer, [self.objects, *object_stack])) + self.fields.append( + ProtoFieldParser(lexer, [self.types, *type_stack])) else: lexer.Error('Expected field or type') lexer.Consume('}') @@ -369,54 +389,62 @@ def GetType(self): def IsType(self): return True - def GetObjects(self): + def GetTypes(self): return self.objects - def GenerateBuilderClass(self, w): - w.Write('class Builder : public lczero::ProtoMessage::Builder {') - w.Write(' public:') + def GetFieldsGruppedByWireType(self): + type_to_fields = {} + for x in self.fields: + type_to_fields.setdefault(x.type.GetWireType(), []).append(x) + return type_to_fields + + def WriteFieldParser(self, w, wire_id, fields): + fname = {0: 'SetVarInt', 1: 'SetInt64', 2: 'SetString', 5: 'SetInt32'} + tname = { + 0: 'std::uint64_t', + 1: 'std::uint64_t', + 2: 'std::string_view', + 5: 'std::uint32_t' + } + w.Write('void %s(int field_id, %s val) override {' % + (fname[wire_id], tname[wire_id])) w.Indent() - w.Write( - 'Builder(const %s& msg) : lczero::ProtoMessage::Builder(msg) {}' % - self.name) - w.Write('%s Build() const { return %s(*this); }' % - (self.name, self.name)) - for x in self.objects: - if not x.IsType(): - x.GenerateForBuilder(w) + w.Write('switch (field_id) {') + w.Indent() + for field in fields: + field.GenerateCaseClause(w) w.Unindent() - w.Write('};') + w.Write('}') + w.Unindent() + w.Write('}') def Generate(self, w): # Protobuf message is a C++ class. w.Write('class %s : public lczero::ProtoMessage {' % self.name) w.Write(' public:') w.Indent() - # Set of standard constructors. - w.Write('%s() = default;' % (self.name)) - w.Write('%s(const %s&) = default;' % (self.name, self.name)) - w.Write('%s(%s&&) = default;' % (self.name, self.name)) - w.Write('%s& operator=(const %s&) = default;' % (self.name, self.name)) - w.Write('%s& operator=(%s&&) = default;' % (self.name, self.name)) - w.Write( - 'static %s CreateNotOwned(std::string_view s) { return %s(s); }' % - (self.name, self.name)) - # Writing fields, submessages and enums. - for x in self.objects: + # Writing submessages and enums. + for x in self.types: x.Generate(w) - self.GenerateBuilderClass(w) - # Set of functions to bind builder with parser classes. - w.Write('Builder AsBuilder() const {') - w.Write(' return Builder(*this);') + for x in self.fields: + w.Write('') + x.GenerateFunctions(w) + w.Write('') + w.Write('void Clear() override {') + w.Indent() + for x in self.fields: + x.GenerateClear(w) + w.Unindent() w.Write('}') w.Unindent() + w.Write('') w.Write(' private:') w.Indent() - w.Write('%s(std::string_view str) : lczero::ProtoMessage(str) {}' % - (self.name)) - w.Write( - '%s(const Builder& builder) : lczero::ProtoMessage(builder) {}' % - (self.name)) + for k, v in self.GetFieldsGruppedByWireType().items(): + self.WriteFieldParser(w, k, v) + w.Write('') + for x in self.fields: + x.GenerateVariable(w) w.Unindent() w.Write('};') @@ -482,7 +510,10 @@ def Unindent(self): self.indent -= 2 def Write(self, text): - self.fo.write(' ' * self.indent + text + '\n') + if text: + self.fo.write(' ' * self.indent + text + '\n') + else: + self.fo.write('\n') if __name__ == "__main__": diff --git a/src/neural/loader.cc b/src/neural/loader.cc index 71be41297d..ebccfcbf8a 100644 --- a/src/neural/loader.cc +++ b/src/neural/loader.cc @@ -114,36 +114,25 @@ void FixOlderWeightsFile(WeightsFile* file) { return; } - WeightsFile::Builder builder(*file); - - auto format = file->format().AsBuilder(); - auto net_builder = file->format().network_format().AsBuilder(); - + auto* net = file->mutable_format()->mutable_network_format(); if (!file->format().has_network_format()) { // Older protobufs don't have format definition. - net_builder.set_input(nf::INPUT_CLASSICAL_112_PLANE); - net_builder.set_output(nf::OUTPUT_CLASSICAL); - net_builder.set_network(nf::NETWORK_CLASSICAL_WITH_HEADFORMAT); - net_builder.set_value(nf::VALUE_CLASSICAL); - net_builder.set_policy(nf::POLICY_CLASSICAL); + net->set_input(nf::INPUT_CLASSICAL_112_PLANE); + net->set_output(nf::OUTPUT_CLASSICAL); + net->set_network(nf::NETWORK_CLASSICAL_WITH_HEADFORMAT); + net->set_value(nf::VALUE_CLASSICAL); + net->set_policy(nf::POLICY_CLASSICAL); } else if (network_format == pblczero::NetworkFormat::NETWORK_CLASSICAL) { // Populate policyFormat and valueFormat fields in old protobufs // without these fields. - net_builder.set_network(nf::NETWORK_CLASSICAL_WITH_HEADFORMAT); - net_builder.set_value(nf::VALUE_CLASSICAL); - net_builder.set_policy(nf::POLICY_CLASSICAL); + net->set_network(nf::NETWORK_CLASSICAL_WITH_HEADFORMAT); + net->set_value(nf::VALUE_CLASSICAL); + net->set_policy(nf::POLICY_CLASSICAL); } else if (network_format == pblczero::NetworkFormat::NETWORK_SE) { - net_builder.set_network(nf::NETWORK_SE_WITH_HEADFORMAT); - net_builder.set_value(nf::VALUE_CLASSICAL); - net_builder.set_policy(nf::POLICY_CLASSICAL); + net->set_network(nf::NETWORK_SE_WITH_HEADFORMAT); + net->set_value(nf::VALUE_CLASSICAL); + net->set_policy(nf::POLICY_CLASSICAL); } - - // It's only possible to replace the particular field completely. - // So first replace network_format in format. - format.set_network_format(net_builder.Build()); - // Then replace format in WeightsFile. - builder.set_format(format.Build()); - *file = builder.Build(); } WeightsFile ParseWeightsProto(const std::string& buffer) { diff --git a/src/utils/protomessage.cc b/src/utils/protomessage.cc index 06ce24cf72..b1a6d5f4e4 100644 --- a/src/utils/protomessage.cc +++ b/src/utils/protomessage.cc @@ -3,13 +3,13 @@ #include "utils/exception.h" namespace lczero { -namespace { -uint64_t ReadVarInt(const char** iter, const char* const end) { +namespace { +uint64_t ReadVarInt(const std::uint8_t** iter, const std::uint8_t* const end) { uint64_t res = 0; uint64_t multiplier = 1; while (*iter < end) { - unsigned char x = **iter; + std::uint8_t x = **iter; ++*iter; res += (x & 0x7f) * multiplier; if ((x & 0x80) == 0) return res; @@ -18,184 +18,75 @@ uint64_t ReadVarInt(const char** iter, const char* const end) { throw Exception("The file seems truncated."); } -std::string EncodeVarInt(std::uint64_t val) { - std::string res; - while (true) { - char c = (val & 0x7f); - val >>= 7; - if (val) c |= 0x80; - res += c; - if (!val) return res; +void CheckOutOfBounds(const std::uint8_t* const iter, size_t size, + const std::uint8_t* const end) { + if (iter + size > end) { + throw Exception("The file is truncated."); } } -} // namespace - -void ProtoMessage::ParseFromString(const std::string& str) { - // Making the buffer "owned", e.g. copy the string contents. - buffer_ = str; - data_ = buffer_; - RebuildOffsets(); +uint64_t ReadFixed(const std::uint8_t** iter, size_t size, + const std::uint8_t* const end) { + CheckOutOfBounds(*iter, size, end); + uint64_t multiplier = 1; + uint64_t result = 0; + for (; size != 0; --size, multiplier *= 256, ++*iter) { + result += multiplier * **iter; + } + return result; } -ProtoMessage::ProtoMessage(std::string_view serialized_proto) - : data_(serialized_proto) { - // Not owned. - RebuildOffsets(); +// // Kept for serialization part. +// std::string EncodeVarInt(std::uint64_t val) { +// std::string res; +// while (true) { +// char c = (val & 0x7f); +// val >>= 7; +// if (val) c |= 0x80; +// res += c; +// if (!val) return res; +// } +// } + +} // namespace + +void ProtoMessage::ParseFromString(std::string_view str) { + Clear(); + return MergeFromString(str); } -void ProtoMessage::RebuildOffsets() { - // Builds offsets, e.g. mapping from wire file id to list of field offsets in - // data_. - offsets_.clear(); - const char* const begin = data_.data(); - const char* iter = data_.data(); - const char* const end = data_.data() + data_.size(); +void ProtoMessage::MergeFromString(std::string_view str) { + const std::uint8_t* iter = reinterpret_cast(str.data()); + const std::uint8_t* const end = iter + str.size(); while (iter < end) { - uint64_t field_id = ReadVarInt(&iter, end); - auto offset = iter; - switch (field_id & 0x7) { + uint64_t wire_field_id = ReadVarInt(&iter, end); + uint64_t field_id = wire_field_id >> 3; + switch (wire_field_id & 0x7) { case 0: // Varint field, so read one more varint. - ReadVarInt(&iter, end); + SetVarInt(field_id, ReadVarInt(&iter, end)); break; case 1: - // Fixed64, skip 8 bytes. - iter += 8; + // Fixed64, read 8 bytes. + SetInt64(field_id, ReadFixed(&iter, 8, end)); break; case 2: { // String/submessage. Varint length and then buffer of that length. size_t size = ReadVarInt(&iter, end); + CheckOutOfBounds(iter, size, end); + SetString(field_id, + std::string_view(reinterpret_cast(iter), size)); iter += size; break; } case 5: - // Fixed32, skip 4 bytes. - iter += 4; + // Fixed32, read 4 bytes. + SetInt32(field_id, ReadFixed(&iter, 4, end)); break; default: throw Exception("The file seems to be unparseable."); } - offsets_[field_id].push_back({static_cast(offset - begin), - static_cast(iter - offset)}); - } - if (iter != end) { - throw Exception("The file is truncated."); - } -} - -void ProtoMessage::operator=(ProtoMessage&& other) { - buffer_ = std::move(other.buffer_); - offsets_ = std::move(other.offsets_); - if (!buffer_.empty()) { - // If owned, make data_ point to a new buffer_ (the underlying data was - // probably moved though, so probably data_ == other.data_. - data_ = buffer_; - } else { - // Not owned, copy buffer. - data_ = std::move(other.data_); } } -ProtoMessage::ProtoMessage(ProtoMessage&& other) { - operator=(std::move(other)); -} - -size_t ProtoMessage::WireFieldCount(int wire_field_id) const { - auto iter = offsets_.find(wire_field_id); - if (iter == offsets_.end()) return 0; - return iter->second.size(); -} - -const char* ProtoMessage::GetFieldPtr(int wire_field_id, size_t index) const { - auto iter = offsets_.find(wire_field_id); - if (iter == offsets_.end()) return nullptr; - if (index == kLast) return data_.data() + iter->second.back().offset; - return data_.data() + iter->second.at(index).offset; -} - -std::uint64_t ProtoMessage::GetVarintVal(int wire_field_id, - size_t index) const { - auto x = GetFieldPtr(wire_field_id, index); - if (x == nullptr) return 0; - return ReadVarInt(&x, data_.data() + data_.size()); -} - -float ProtoMessage::GetFloatVal(int wire_field_id, size_t index) const { - auto x = GetFieldPtr(wire_field_id, index); - if (x == nullptr) return 0.0f; - float res; - std::memcpy(&res, x, sizeof(res)); - return res; -} -double ProtoMessage::GetDoubleVal(int wire_field_id, size_t index) const { - auto x = GetFieldPtr(wire_field_id, index); - if (x == nullptr) return 0.0; - double res; - std::memcpy(&res, x, sizeof(res)); - return res; -} -std::uint32_t ProtoMessage::GetFixed32Val(int wire_field_id, - size_t index) const { - // WARNING: Doesn't support big-endian. - auto x = GetFieldPtr(wire_field_id, index); - if (x == nullptr) return 0; - std::uint32_t res; - std::memcpy(&res, x, sizeof(res)); - return res; -} -std::uint64_t ProtoMessage::GetFixed64Val(int wire_field_id, - size_t index) const { - // WARNING: Doesn't support big-endian. - auto x = GetFieldPtr(wire_field_id, index); - if (x == nullptr) return 0; - std::uint64_t res; - std::memcpy(&res, x, sizeof(res)); - return res; -} -std::string_view ProtoMessage::GetBytesVal(int wire_field_id, - size_t index) const { - auto x = GetFieldPtr(wire_field_id, index); - if (x == nullptr) return {}; - size_t size = ReadVarInt(&x, data_.data() + data_.size()); - return std::string_view(x, size); -} - -ProtoMessage::Builder::Builder(const ProtoMessage& msg) { - for (const auto& iter : msg.offsets_) { - auto& bucket = fields_[iter.first]; - for (const auto& entry : iter.second) { - bucket.emplace_back(msg.data_.data() + entry.offset, entry.size); - } - } -} - -void ProtoMessage::Builder::WireFieldSetVarint(int wire_field_id, - std::uint64_t value) { - fields_[wire_field_id] = {EncodeVarInt(value)}; -} - -ProtoMessage::ProtoMessage(const ProtoMessage::Builder& builder) { - buffer_ = builder.AsString(); - data_ = buffer_; - RebuildOffsets(); -} - -std::string ProtoMessage::Builder::AsString() const { - std::string res; - for (const auto& iter : fields_) { - for (const auto& entry : iter.second) { - res += EncodeVarInt(iter.first); - res += entry; - } - } - return res; -} - -void ProtoMessage::Builder::WireFieldSetMessage(int wire_field_id, - const ProtoMessage& msg) { - fields_[wire_field_id] = {EncodeVarInt(msg.data_.size()) + - std::string(msg.data_)}; -} - } // namespace lczero \ No newline at end of file diff --git a/src/utils/protomessage.h b/src/utils/protomessage.h index 62135bd85e..d859605a2b 100644 --- a/src/utils/protomessage.h +++ b/src/utils/protomessage.h @@ -18,101 +18,31 @@ namespace lczero { -// Kind of bit_cast from C++20, but can convert from uint64_t to smaller types. -template -T kind_of_bit_cast(std::uint64_t from) { - T to; - std::memcpy(&to, &from, sizeof(to)); - return to; -} - -// Iterator for repeated proto fields. -template -class ProtoIterator { - public: - class Iter { - public: - Iter(std::function func, size_t idx) - : accessor_(func), idx_(idx) {} - void operator++() { ++idx_; } - T operator*() { return accessor_(idx_); } - bool operator!=(const Iter& other) { return idx_ != other.idx_; } - - private: - const std::function accessor_; - size_t idx_; - }; - - ProtoIterator(size_t count, std::function accessor) - : count_(count), accessor_(accessor) {} - - Iter begin() const { return Iter(accessor_, 0); } - Iter end() const { return Iter(accessor_, count_); } - - private: - const size_t count_; - const std::function accessor_; -}; - class ProtoMessage { public: - void ParseFromString(const std::string&); - - class Builder { - public: - Builder(); - std::string AsString() const; - - protected: - void operator=(const ProtoMessage& msg); - Builder(const ProtoMessage&); - void WireFieldClear(int wire_field_id); - void WireFieldSetVarint(int wire_field_id, std::uint64_t varint); - void WireFieldSetMessage(int wire_field_id, const ProtoMessage& val); + virtual ~ProtoMessage() {} + virtual void Clear() = 0; - private: - using Bits = std::vector; - using Fields = std::map; - Fields fields_; - }; + void ParseFromString(std::string_view); + void MergeFromString(std::string_view); protected: - ProtoMessage() = default; - ProtoMessage(const ProtoMessage& other); - ProtoMessage(ProtoMessage&& other); - void operator=(const ProtoMessage& other); - void operator=(ProtoMessage&& other); - ProtoMessage(const Builder&); - void operator=(const Builder&); - static constexpr size_t kLast = std::numeric_limits::max(); - - ProtoMessage(std::string_view serialized_proto); - size_t WireFieldCount(int wire_field_id) const; - std::uint64_t GetVarintVal(int wire_field_id, size_t index) const; - float GetFloatVal(int wire_field_id, size_t index) const; - double GetDoubleVal(int wire_field_id, size_t index) const; - std::uint32_t GetFixed32Val(int wire_field_id, size_t index) const; - std::uint64_t GetFixed64Val(int wire_field_id, size_t index) const; - std::string_view GetBytesVal(int wire_field_id, size_t index) const; + template + static To bit_cast(From from) { + if constexpr (std::is_same_v) { + return from; + } else { + To to; + std::memcpy(&to, &from, sizeof(to)); + return to; + } + } private: - void RebuildOffsets(); - const char* GetFieldPtr(int wire_field_id, size_t index) const; - - struct FieldPos { - size_t offset; - size_t size; - }; - using Offsets = std::vector; - using FieldOffsets = std::map; - - // Map from wire field_id to list of offsets and sizes inside data_. - FieldOffsets offsets_; - // When the class owns the proto, buffer_ contains it. - std::string buffer_; - // String slice of the proto. If owned, points to buffer_. If not owned, - // points to some external location. - std::string_view data_; + virtual void SetVarInt(int /* field_id */, uint64_t /* value */) {} + virtual void SetInt64(int /* field_id */, uint64_t /* value */) {} + virtual void SetInt32(int /* field_id */, uint32_t /* value */) {} + virtual void SetString(int /* field_id */, std::string_view /* value */) {} }; } // namespace lczero \ No newline at end of file From d65c7a255d647cd8ea26de6b6aabf63c0f4202ed Mon Sep 17 00:00:00 2001 From: Tilps Date: Mon, 23 Mar 2020 18:11:19 +1100 Subject: [PATCH 065/151] Moves left head support for DX backend. (#1140) * Moves left head support for dx backend. * Some formatting. * Some fixes... * Fix incorrect dump tensor and don't send unneeded scratches that include references to the final residual output that needs to be kept safe. * Fix major bug ;) * Review feedback and cleanup. * Fix as per review feedback. * Formatting. * change to revert to kick off circleci again. * Revert "change to revert to kick off circleci again." This reverts commit 1b4d59d8be511d56c28186bb2bb8ff3d7cfb85ef. * Revert "Formatting." This reverts commit f4bc38b3a96a0df58af3065df756e662eccf2f7d. * Revert "Revert "Formatting."" This reverts commit 16f36abca33f9658a0f096a9fe3a666856156bd8. --- src/neural/dx/network_dx.cc | 127 ++++++++++++++++++++++++++++++++---- src/neural/dx/network_dx.h | 17 ++++- 2 files changed, 129 insertions(+), 15 deletions(-) diff --git a/src/neural/dx/network_dx.cc b/src/neural/dx/network_dx.cc index 03c1492d38..8aaae82729 100644 --- a/src/neural/dx/network_dx.cc +++ b/src/neural/dx/network_dx.cc @@ -374,7 +374,7 @@ void DxContext::ScheduleUpload(DXAlloc alloc, const void* data, size_t size) { DxNetwork::DxNetwork(const WeightsFile& file, const OptionsDict& options) : dx_context_(options), capabilities_{file.format().network_format().input(), - pblczero::NetworkFormat::MOVES_LEFT_NONE} { + file.format().network_format().moves_left()} { LegacyWeights weights(file.weights()); has_conv_policy_ = file.format().network_format().policy() == @@ -551,8 +551,8 @@ DxNetwork::DxNetwork(const WeightsFile& file, const OptionsDict& options) // 1x1 convolution, val_channels output filters auto convVal = std::make_unique( - fp16_, nullptr, nullptr, &dx_context_, getLastLayer(), val_channels, 8, - 8, 1, kNumFilters, true, true); + fp16_, nullptr, nullptr, &dx_context_, resi_last, val_channels, 8, 8, 1, + kNumFilters, true, true); convVal->LoadWeights(&weights.value.weights[0], &weights.value.biases[0], &dx_context_); network_.emplace_back(std::move(convVal)); @@ -587,6 +587,43 @@ DxNetwork::DxNetwork(const WeightsFile& file, const OptionsDict& options) network_.emplace_back(std::move(FCVal2)); } + // Moves left head + moves_left_ = file.format().network_format().moves_left() == + pblczero::NetworkFormat::MOVES_LEFT_V1; + if (moves_left_) { + // 1x1 convolution, moves_left biases output filters + auto convMov = std::make_unique( + fp16_, nullptr, nullptr, &dx_context_, resi_last, + weights.moves_left.biases.size(), 8, 8, 1, kNumFilters, true, true); + convMov->LoadWeights(&weights.moves_left.weights[0], + &weights.moves_left.biases[0], &dx_context_); + network_.emplace_back(std::move(convMov)); + + // Bias and relu activation. + auto FCMov1 = std::make_unique(fp16_, &dx_context_, getLastLayer(), + (int)weights.ip1_mov_b.size(), 1, 1, + true, true, false); + FCMov1->LoadWeights(&weights.ip1_mov_w[0], &weights.ip1_mov_b[0], + &dx_context_); + network_.emplace_back(std::move(FCMov1)); + + // Fully connected layer with Bias and relu. + auto FCMov2 = std::make_unique(fp16_, &dx_context_, getLastLayer(), + kNumOutputMovesLeftPadded8, 1, 1, + true, true, false); + // Pad up the weights + std::vector tempBias(kNumOutputMovesLeftPadded8); + std::vector tempWeight(kNumOutputMovesLeftPadded8 * + weights.ip2_mov_w.size() / + weights.ip2_mov_b.size()); + memcpy(tempBias.data(), weights.ip2_mov_b.data(), + weights.ip2_mov_b.size() * sizeof(float)); + memcpy(tempWeight.data(), weights.ip2_mov_w.data(), + weights.ip2_mov_w.size() * sizeof(float)); + FCMov2->LoadWeights(tempWeight.data(), tempBias.data(), &dx_context_); + network_.emplace_back(std::move(FCMov2)); + } + dx_context_.FlushAndWait(); // Allocate GPU memory for running the network @@ -745,18 +782,45 @@ void DxNetwork::Eval(InputsOutputsDx* io, int batch_size) { // value FC1. network_[l++]->Eval(batch_size, tensor_mem_[1], tensor_mem_[0], DXAlloc(), - tensor_mem_[2], tensor_mem_[3], cl); + DXAlloc(), DXAlloc(), cl); dx_context_.UavBarrier(cl); dx_context_.DumpTensor("After value fc1", tensor_mem_[1], 128, fp16_); // value FC2. network_[l++]->Eval(batch_size, io->op_value_mem_gpu_, tensor_mem_[1], - DXAlloc(), tensor_mem_[2], tensor_mem_[3], cl); + DXAlloc(), DXAlloc(), DXAlloc(), cl); dx_context_.DumpTensor("After value fc2", io->op_value_mem_gpu_, 8, fp16_); //-----------------------------------///--------------------------------------- + + // Moves left head. + if (moves_left_) { + // Moves left conv. + network_[l++]->Eval(batch_size, tensor_mem_[0], tensor_mem_[2], DXAlloc(), + tensor_mem_[1], tensor_mem_[3], cl); + dx_context_.UavBarrier(cl); + + dx_context_.DumpTensor("After moves left conv", tensor_mem_[0], 1024, + fp16_); + + // Moves left FC1. + network_[l++]->Eval(batch_size, tensor_mem_[1], tensor_mem_[0], DXAlloc(), + DXAlloc(), DXAlloc(), cl); + dx_context_.UavBarrier(cl); + + dx_context_.DumpTensor("After moves left fc1", tensor_mem_[1], 512, fp16_); + + // Moves left FC2. + network_[l++]->Eval(batch_size, io->op_moves_left_mem_gpu_, tensor_mem_[1], + DXAlloc(), DXAlloc(), DXAlloc(), cl); + + dx_context_.DumpTensor("After moves left fc2", io->op_moves_left_mem_gpu_, + 8, fp16_); + } + + //-----------------------------------///--------------------------------------- #ifdef DEBUG_DUMP_PER_LAYER_DATA dx_context_.FlushAndWait(); lock_.unlock(); @@ -795,7 +859,14 @@ void DxNetwork::Eval(InputsOutputsDx* io, int batch_size) { for (int i = 0; i < val_vector_size; i++) io->op_value_mem_final_[n * val_vector_size + i] = FP16toFP32(padded_val_fp16[n * kNumOutputValuePadded8 + i]); - + if (moves_left_) { + // Moves left: + // Un-pad moves left output, converting it to fp32. + dx_half* padded_moves_left_fp16 = (dx_half*)io->op_moves_left_mem_; + for (int n = 0; n < batch_size; n++) + io->op_moves_left_mem_final_[n] = + FP16toFP32(padded_moves_left_fp16[n * kNumOutputMovesLeftPadded8]); + } } else { // Policy: // Un-pad policy output. @@ -812,6 +883,14 @@ void DxNetwork::Eval(InputsOutputsDx* io, int batch_size) { memcpy(io->op_value_mem_final_ + val_vector_size * i, io->op_value_mem_ + kNumOutputValuePadded8 * i, val_vector_size * sizeof(float)); + if (moves_left_) { + // Moves left: + // Un-pad moves left output. + for (int i = 0; i < batch_size; i++) + memcpy(io->op_moves_left_mem_final_ + i, + io->op_moves_left_mem_ + kNumOutputMovesLeftPadded8 * i, + sizeof(float)); + } } // Softmax on value head for wdl enabled networks. @@ -845,14 +924,15 @@ DxNetwork::~DxNetwork() { } std::unique_ptr DxNetwork::NewComputation() { - return std::make_unique(this, has_wdl_); + return std::make_unique(this, has_wdl_, moves_left_); } std::unique_ptr DxNetwork::GetInputsOutputs() { std::lock_guard lock(inputs_outputs_lock_); if (free_inputs_outputs_.empty()) { return std::make_unique(max_batch_size_, &dx_context_, - has_wdl_, has_conv_policy_, fp16_); + has_wdl_, moves_left_, + has_conv_policy_, fp16_); } else { std::unique_ptr resource = std::move(free_inputs_outputs_.front()); @@ -867,8 +947,9 @@ void DxNetwork::ReleaseInputsOutputs( free_inputs_outputs_.push_back(std::move(resource)); } -DxNetworkComputation::DxNetworkComputation(DxNetwork* network, bool wdl) - : network_(network), wdl_(wdl) { +DxNetworkComputation::DxNetworkComputation(DxNetwork* network, bool wdl, + bool moves_left) + : network_(network), wdl_(wdl), moves_left_(moves_left) { batch_size_ = 0; inputs_outputs_ = network_->GetInputsOutputs(); } @@ -897,8 +978,11 @@ void DxNetworkComputation::ComputeBlocking() { } InputsOutputsDx::InputsOutputsDx(int maxBatchSize, DxContext* dx_context, - bool wdl, bool policy_map, bool fp16) - : uses_policy_map_(policy_map), needs_reset_(false) { + bool wdl, bool moves_left, bool policy_map, + bool fp16) + : uses_policy_map_(policy_map), + needs_reset_(false), + moves_left_(moves_left) { // CPU accesses on Default heap doesn't work. // GPU accesses on Upload heap works. dx_context->CreateAlloc(maxBatchSize * kInputPlanes * sizeof(uint64_t), @@ -917,6 +1001,12 @@ InputsOutputsDx::InputsOutputsDx(int maxBatchSize, DxContext* dx_context, dx_context->CreateAlloc(maxBatchSize * kNumOutputValuePadded8 * sizeof(float), D3D12_HEAP_TYPE_CUSTOM, op_value_mem_gpu_, fp16); + if (moves_left) { + dx_context->CreateAlloc( + maxBatchSize * kNumOutputMovesLeftPadded8 * sizeof(float), + D3D12_HEAP_TYPE_CUSTOM, op_moves_left_mem_gpu_, fp16); + } + ReportDxErrors(input_masks_mem_gpu_.resource->Map(0, nullptr, (void**)&input_masks_mem_)); @@ -929,12 +1019,18 @@ InputsOutputsDx::InputsOutputsDx(int maxBatchSize, DxContext* dx_context, ReportDxErrors( op_value_mem_gpu_.resource->Map(0, nullptr, (void**)&op_value_mem_)); + if (moves_left) { + ReportDxErrors(op_moves_left_mem_gpu_.resource->Map( + 0, nullptr, (void**)&op_moves_left_mem_)); + } + // When policy map is enabled, GPU writes directly to the final policy output. if (uses_policy_map_) op_policy_mem_final_ = op_policy_mem_; else op_policy_mem_final_ = new float[maxBatchSize * kNumOutputPolicy]; op_value_mem_final_ = new float[maxBatchSize * (wdl ? 3 : 1)]; + if (moves_left) op_moves_left_mem_final_ = new float[maxBatchSize]; ReportDxErrors(dx_context->getDevice()->CreateCommandAllocator( D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&command_allocator_))); @@ -949,17 +1045,24 @@ InputsOutputsDx::~InputsOutputsDx() { input_val_mem_gpu_.resource->Unmap(0, nullptr); op_policy_mem_gpu_.resource->Unmap(0, nullptr); op_value_mem_gpu_.resource->Unmap(0, nullptr); + if (moves_left_) { + op_moves_left_mem_gpu_.resource->Unmap(0, nullptr); + } input_masks_mem_gpu_.resource->Release(); input_val_mem_gpu_.resource->Release(); op_policy_mem_gpu_.resource->Release(); op_value_mem_gpu_.resource->Release(); + if (moves_left_) { + op_moves_left_mem_gpu_.resource->Release(); + } command_allocator_->Release(); command_list_->Release(); if (!uses_policy_map_) delete[] op_policy_mem_final_; delete[] op_value_mem_final_; + if (moves_left_) delete[] op_moves_left_mem_final_; } std::unique_ptr MakeDxNetwork(const WeightsFile& weights, diff --git a/src/neural/dx/network_dx.h b/src/neural/dx/network_dx.h index db2ee46408..82deccbebd 100644 --- a/src/neural/dx/network_dx.h +++ b/src/neural/dx/network_dx.h @@ -49,9 +49,11 @@ static constexpr int kNumOutputPolicyPadded8 = // Normally 3 when using wdl, and 1 without static constexpr int kNumOutputValuePadded8 = 8; +static constexpr int kNumOutputMovesLeftPadded8 = 8; + struct InputsOutputsDx { InputsOutputsDx(int maxBatchSize, DxContext* dx_context, bool wdl, - bool conv_policy, bool fp16); + bool moves_left, bool conv_policy, bool fp16); ~InputsOutputsDx(); // Wanted to put these in default heap (video memory, mapped to support CPU @@ -65,16 +67,19 @@ struct InputsOutputsDx { // In readback heap (system memory mapped for both CPU and GPU). DXAlloc op_policy_mem_gpu_; DXAlloc op_value_mem_gpu_; + DXAlloc op_moves_left_mem_gpu_; // CPU pointers of the above allocations. uint64_t* input_masks_mem_; float* input_val_mem_; float* op_policy_mem_; float* op_value_mem_; + float* op_moves_left_mem_; // Separate copy, un-padded and always in fp32 float* op_policy_mem_final_; float* op_value_mem_final_; + float* op_moves_left_mem_final_; // For recording GPU commands. ID3D12GraphicsCommandList4* command_list_; @@ -84,11 +89,12 @@ struct InputsOutputsDx { bool needs_reset_; const bool uses_policy_map_; + const bool moves_left_; }; class DxNetworkComputation : public NetworkComputation { public: - DxNetworkComputation(DxNetwork* network, bool wdl); + DxNetworkComputation(DxNetwork* network, bool wdl, bool moves_left); ~DxNetworkComputation(); void AddInput(InputPlanes&& input) override; @@ -121,7 +127,10 @@ class DxNetworkComputation : public NetworkComputation { ->op_policy_mem_final_[sample * kNumOutputPolicy + move_id]; } - float GetMVal(int /* sample */) const override { + float GetMVal(int sample) const override { + if (moves_left_) { + return inputs_outputs_->op_moves_left_mem_final_[sample]; + } return 0.0f; } @@ -130,6 +139,7 @@ class DxNetworkComputation : public NetworkComputation { std::unique_ptr inputs_outputs_; int batch_size_; bool wdl_; + bool moves_left_; DxNetwork* network_; }; @@ -217,6 +227,7 @@ class DxNetwork : public Network { bool has_wdl_; bool has_conv_policy_; bool fp16_; + bool moves_left_; std::vector> network_; BaseLayer* getLastLayer() { return network_.back().get(); } From 34dcb233eb93407f9bebbb9754bf5b830e11ad8c Mon Sep 17 00:00:00 2001 From: Alexander Lyashuk Date: Mon, 23 Mar 2020 09:40:54 +0100 Subject: [PATCH 066/151] Revive tensorflow (C++) backend. (#1114) * New tensorflow backend. (work in progress, only supports loading from GraphDef for now) * Fill memory with zeros. * Build file changes. * Fix old tensorflow backend. * Rename old tensorflow backend to tensorflow_cc, and make new one tensorflow. * TF supports SE now. And variable dimentions in policy head. * Fix meson.build * More .gitignore files. * Update submodule. * Add MLP stubs. * a0 policy head (untested). * WDL head. * Fix policy head. * Remove unfinished C tensorflow part for now. * Have a way to dump a network in GraphDef format. * s/direction/displacement --- meson.build | 31 +- meson_options.txt | 10 - .../{network_tf.cc => network_tf_cc.cc} | 232 ++++-- src/neural/shared/policy_map.h | 783 +++++++++--------- src/utils/transpose.cc | 55 -- src/utils/transpose.h | 26 +- 6 files changed, 581 insertions(+), 556 deletions(-) rename src/neural/{network_tf.cc => network_tf_cc.cc} (55%) delete mode 100644 src/utils/transpose.cc diff --git a/meson.build b/meson.build index 28b9b78745..c51bc88c74 100644 --- a/meson.build +++ b/meson.build @@ -16,7 +16,7 @@ project('lc0', 'cpp', default_options : ['cpp_std=c++17', 'b_ndebug=if-release', 'warning_level=3'], - meson_version: '>=0.46') + meson_version: '>=0.52') cc = meson.get_compiler('cpp') if cc.get_id() == 'clang' @@ -142,7 +142,6 @@ files += [ 'src/utils/optionsparser.cc', 'src/utils/random.cc', 'src/utils/string.cc', - 'src/utils/transpose.cc', 'src/utils/weights_adapter.cc', ] includes += include_directories('src') @@ -158,8 +157,6 @@ else files += 'src/utils/filesystem.posix.cc' endif - - ############################################################################# ## BACKENDS ############################################################################# @@ -168,31 +165,15 @@ if get_option('build_backends') ## ~~~~~~~~~~ ## Tensorflow ## ~~~~~~~~~~ - # Installed from https://github.com/FloopCZ/tensorflow_cc - tensorflow_include = get_option('tensorflow_include') - tensorflow_libdir = get_option('tensorflow_libdir') tf_dl_lib = cc.find_library('dl', required: false) - tf_tensorflow_lib = cc.find_library('libtensorflow_cc', - dirs: tensorflow_libdir, required: false) - - if get_option('tensorflow') and tf_dl_lib.found() and tf_tensorflow_lib.found() - includes += include_directories( - tensorflow_include, - tensorflow_include[0] + '/bazel-genfiles', - tensorflow_include[0] + '/tensorflow/contrib/makefile/downloads', - tensorflow_include[0] + '/tensorflow/contrib/makefile/downloads/absl', - tensorflow_include[0] + '/tensorflow/contrib/makefile/downloads/eigen', - tensorflow_include[0] + '/tensorflow/contrib/makefile/downloads/gemmlowp', - tensorflow_include[0] + '/tensorflow/contrib/makefile/downloads/nsync/public', - tensorflow_include[0] + '/tensorflow/contrib/makefile/gen/protobuf-host/include', - is_system: true - ) - deps += [tf_dl_lib, tf_tensorflow_lib] - files += 'src/neural/network_tf.cc' + tf_tensorflow_cc_lib = dependency('tensorflow_cc', + required: false, include_type:'system') + if get_option('tensorflow') and tf_dl_lib.found() and tf_tensorflow_cc_lib.found() + deps += [tf_dl_lib, tf_tensorflow_cc_lib] + files += 'src/neural/network_tf_cc.cc' has_backends = true endif - ## ~~~~~ ## Blas ## ~~~~~ diff --git a/meson_options.txt b/meson_options.txt index a6b06de7e6..f17b838089 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -1,8 +1,3 @@ -option('tensorflow_include', - type: 'array', - value: ['/usr/local/include/tensorflow/'], - description: 'Paths to tensorflow include directories') - option('openblas_include', type: 'array', value: ['/usr/include/openblas/'], @@ -13,11 +8,6 @@ option('opencl_include', value: '/usr/include/', description: 'Path to OpenCL include directory') -option('tensorflow_libdir', - type: 'array', - value: ['/usr/local/lib/tensorflow_cc/'], - description: 'Paths to tensorflow libraries') - option('openblas_libdirs', type: 'array', value: ['/usr/lib/'], diff --git a/src/neural/network_tf.cc b/src/neural/network_tf_cc.cc similarity index 55% rename from src/neural/network_tf.cc rename to src/neural/network_tf_cc.cc index ae546bf16f..a63631c8ee 100644 --- a/src/neural/network_tf.cc +++ b/src/neural/network_tf_cc.cc @@ -1,6 +1,6 @@ /* This file is part of Leela Chess Zero. - Copyright (C) 2018-2019 The LCZero Authors + Copyright (C) 2018-2020 The LCZero Authors Leela Chess is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -31,6 +31,7 @@ #include "neural/factory.h" #include "neural/network_legacy.h" +#include "neural/shared/policy_map.h" #include "utils/bititer.h" #include "utils/optionsdict.h" #include "utils/transpose.h" @@ -58,53 +59,57 @@ Output MakeConst(const Scope& scope, TensorShape shape, return Const(scope, tensor); } -Output MakeVals(const Scope& scope, TensorShape shape, float val) { - auto tensor = Tensor(DataType::DT_FLOAT, shape); - std::fill_n(tensor.flat().data(), tensor.NumElements(), val); +Output MakeIntConst(const Scope& scope, TensorShape shape, + const std::vector& values) { + auto tensor = Tensor(DataType::DT_INT32, shape); + CHECK_EQ(tensor.NumElements(), static_cast(values.size())) + << shape.DebugString(); + memcpy(tensor.flat().data(), values.data(), + values.size() * sizeof(values[0])); return Const(scope, tensor); } -Output Zeros(const Scope& scope, TensorShape shape) { - return MakeVals(scope, shape, 0.0f); -} - -Output Ones(const Scope& scope, TensorShape shape) { - return MakeVals(scope, shape, 1.0f); +template +Output SqueezeAndExcite(const Scope& scope, Input input, int channels, + const LegacyWeights::SEunit& weights) { + const int se_channels = weights.b1.size(); + // NCHW ("NHWC" for CPU case) format reduced to NC. + auto pooled = Mean(scope, input, CPU ? Input({1, 2}) : Input({2, 3})); + auto w1 = MakeConst(scope, {channels, se_channels}, weights.w1); + auto b1 = MakeConst(scope, {se_channels}, weights.b1); + auto fc1 = Add(scope, MatMul(scope, pooled, w1), b1); + auto relu = Relu(scope, fc1); + auto w2 = MakeConst(scope, {se_channels, 2 * channels}, weights.w2); + auto b2 = MakeConst(scope, {2 * channels}, weights.b2); + auto fc2 = Add(scope, MatMul(scope, relu, w2), b2); + auto reshape = Reshape( + scope, fc2, + CPU ? Input({-1, 1, 1, 2 * channels}) : Input({-1, 2 * channels, 1, 1})); + auto outputs = Split(scope, CPU ? 3 : 1, reshape, 2); + auto sigmoid = Sigmoid(scope, outputs[0]); + return Add(scope, Mul(scope, sigmoid, input), outputs[1]); } template Output MakeConvBlock(const Scope& scope, Input input, int channels, int input_channels, int output_channels, const LegacyWeights::ConvBlock& weights, - Input* mixin = nullptr) { + const LegacyWeights::SEunit* const seunit = nullptr, + Input* mixin = nullptr, bool relu = true) { // CPU only supports "NHWC", while for GPU "NCHW" is better. const char* const kDataFormat = CPU ? "NHWC" : "NCHW"; - auto w_conv = MakeConst(scope, {channels, channels, input_channels, output_channels}, weights.weights, {3, 2, 0, 1}); - - auto b_conv = MakeConst(scope, {output_channels}, weights.biases); auto conv2d = Conv2D(scope, input, w_conv, {1, 1, 1, 1}, "SAME", Conv2D::DataFormat(kDataFormat).Dilations({1, 1, 1, 1})); - - auto bn_means = MakeConst(scope, {output_channels}, weights.bn_means); - auto means = Sub(scope, bn_means, b_conv); - - auto batch_norm = - FusedBatchNorm( - scope, conv2d, Ones(scope, {output_channels}), - Zeros(scope, {output_channels}), means, - MakeConst(scope, {output_channels}, weights.bn_stddivs), - FusedBatchNorm::DataFormat(kDataFormat) - .IsTraining(false) - .Epsilon(1.0000001e-5f)) // Cuda doesn't support eps <= 1e-5 - .y; - - if (mixin) { - batch_norm = Add(scope, batch_norm, *mixin); - } - return Relu(scope, batch_norm); + auto b_conv = MakeConst(scope, {output_channels}, weights.biases); + Output conv_b = + BiasAdd(scope, conv2d, b_conv, BiasAdd::DataFormat(kDataFormat)); + if (seunit) + conv_b = SqueezeAndExcite(scope, conv_b, output_channels, *seunit); + if (mixin) conv_b = Add(scope, conv_b, *mixin); + return relu ? Relu(scope, conv_b) : conv_b; } template @@ -112,14 +117,15 @@ Output MakeResidualBlock(const Scope& scope, Input input, int channels, const LegacyWeights::Residual& weights) { auto block1 = MakeConvBlock(scope, input, 3, channels, channels, weights.conv1); - auto block2 = MakeConvBlock(scope, block1, 3, channels, channels, - weights.conv2, &input); + auto block2 = + MakeConvBlock(scope, block1, 3, channels, channels, weights.conv2, + weights.has_se ? &weights.se : nullptr, &input); return block2; } template std::pair MakeNetwork(const Scope& scope, Input input, - const LegacyWeights& weights) { + const LegacyWeights& weights, bool wdl) { const int filters = weights.input.weights.size() / kInputPlanes / 9; // Input convolution. @@ -132,18 +138,49 @@ std::pair MakeNetwork(const Scope& scope, Input input, } // Policy head - auto conv_pol = - MakeConvBlock(scope, flow, 1, filters, 32, weights.policy); - if (CPU) { - // conv_pol = Transpose(scope, conv_pol, {0, 3, 1, 2}); + Output policy_head; + if (!weights.policy1.weights.empty()) { + // Conv policy head. + auto conv_pol1 = + MakeConvBlock(scope, flow, 3, filters, filters, weights.policy1); + auto conv_pol = + MakeConvBlock(scope, conv_pol1, 3, filters, 80, weights.policy, + nullptr, nullptr, /* relu= */ false); + + // [1858 -> HWC or CHW] + std::vector policy_map(1858); + for (const auto& mapping : kConvPolicyMap) { + if (mapping == -1) continue; + const auto index = &mapping - kConvPolicyMap; + const auto displacement = index / 64; + const auto square = index % 64; + const auto row = square / 8; + const auto col = square % 8; + if (CPU) { + policy_map[mapping] = ((row * 8) + col) * 80 + displacement; + } else { + policy_map[mapping] = ((displacement * 8) + row) * 8 + col; + } + } + auto mapping = MakeIntConst(scope, {1858}, policy_map); + auto flattened_conv = + Reshape(scope, conv_pol, Const(scope, {-1, 80 * 8 * 8})); + policy_head = GatherV2(scope, flattened_conv, mapping, 1); + } else { + const int policy_conv_size = weights.policy.biases.size(); + auto conv_pol = MakeConvBlock(scope, flow, 1, filters, + policy_conv_size, weights.policy); + conv_pol = + Reshape(scope, conv_pol, Const(scope, {-1, policy_conv_size * 8 * 8})); + auto ip_pol_w = CPU ? MakeConst(scope, {8, 8, policy_conv_size, 1858}, + weights.ip_pol_w, {3, 2, 0, 1}) + : MakeConst(scope, {policy_conv_size, 8, 8, 1858}, + weights.ip_pol_w, {3, 0, 1, 2}); + ip_pol_w = Reshape(scope, ip_pol_w, + Const(scope, {policy_conv_size * 8 * 8, 1858})); + auto ip_pol_b = MakeConst(scope, {1858}, weights.ip_pol_b); + policy_head = Add(scope, MatMul(scope, conv_pol, ip_pol_w), ip_pol_b); } - conv_pol = Reshape(scope, conv_pol, Const(scope, {-1, 32 * 8 * 8})); - auto ip_pol_w = - CPU ? MakeConst(scope, {8, 8, 32, 1858}, weights.ip_pol_w, {3, 2, 0, 1}) - : MakeConst(scope, {32, 8, 8, 1858}, weights.ip_pol_w, {3, 0, 1, 2}); - ip_pol_w = Reshape(scope, ip_pol_w, Const(scope, {32 * 8 * 8, 1858})); - auto ip_pol_b = MakeConst(scope, {1858}, weights.ip_pol_b); - auto policy_fc = Add(scope, MatMul(scope, conv_pol, ip_pol_w), ip_pol_b); // Value head auto conv_val = @@ -157,12 +194,20 @@ std::pair MakeNetwork(const Scope& scope, Input input, auto ip1_val_b = MakeConst(scope, {128}, weights.ip1_val_b); auto value_flow = Relu(scope, Add(scope, MatMul(scope, conv_val, ip1_val_w), ip1_val_b)); - auto ip2_val_w = MakeConst(scope, {128, 1}, weights.ip2_val_w); - auto ip2_val_b = MakeConst(scope, {1}, weights.ip2_val_b); - auto value_head = - Tanh(scope, Add(scope, MatMul(scope, value_flow, ip2_val_w), ip2_val_b)); + Output value_head; + if (wdl) { + auto ip2_val_w = MakeConst(scope, {128, 3}, weights.ip2_val_w); + auto ip2_val_b = MakeConst(scope, {3}, weights.ip2_val_b); + auto ip_fc = Add(scope, MatMul(scope, value_flow, ip2_val_w), ip2_val_b); + value_head = Softmax(scope, ip_fc); + } else { + auto ip2_val_w = MakeConst(scope, {128, 1}, weights.ip2_val_w); + auto ip2_val_b = MakeConst(scope, {1}, weights.ip2_val_b); + auto ip_fc = Add(scope, MatMul(scope, value_flow, ip2_val_w), ip2_val_b); + value_head = Tanh(scope, ip_fc); + } - return {policy_fc, value_head}; + return {policy_head, value_head}; } template @@ -171,7 +216,7 @@ class TFNetworkComputation; template class TFNetwork : public Network { public: - TFNetwork(const WeightsFile& file, const OptionsDict& options); + TFNetwork(const WeightsFile& file, const OptionsDict& options, bool wdl); std::unique_ptr NewComputation() override; @@ -179,10 +224,11 @@ class TFNetwork : public Network { std::vector* outputs) const; const NetworkCapabilities& GetCapabilities() const override { - static NetworkCapabilities capabilities; - return capabilities; + return capabilities_; } + bool IsWdl() const { return wdl_; } + private: tensorflow::Scope scope_; std::unique_ptr session_; @@ -190,6 +236,8 @@ class TFNetwork : public Network { std::unique_ptr input_; std::unique_ptr policy_head_; std::unique_ptr value_head_; + const NetworkCapabilities capabilities_; + const bool wdl_; }; template @@ -207,12 +255,26 @@ class TFNetworkComputation : public NetworkComputation { int GetBatchSize() const override { return raw_input_.size(); } float GetQVal(int sample) const override { - return output_[0].template matrix()(sample, 0); + if (network_->IsWdl()) { + const auto w = output_[0].template matrix()(sample, 0); + const auto l = output_[0].template matrix()(sample, 2); + return w - l; + } else { + return output_[0].template matrix()(sample, 0); + } + } + float GetDVal(int sample) const override { + if (network_->IsWdl()) { + const auto d = output_[0].template matrix()(sample, 1); + return d; + } else { + return 0.0f; + } } - float GetDVal(int sample) const override { return 0.0f; } float GetPVal(int sample, int move_id) const override { return output_[1].template matrix()(sample, move_id); } + float GetMVal(int) const override { return 0.0f; } private: void PrepareInput(); @@ -271,9 +333,12 @@ void TFNetworkComputation::PrepareInput() { } // namespace template -TFNetwork::TFNetwork(const WeightsFile& file, - const OptionsDict& /*options*/) - : scope_(Scope::NewRootScope()) { +TFNetwork::TFNetwork(const WeightsFile& file, const OptionsDict& options, + bool wdl) + : scope_(Scope::NewRootScope()), + capabilities_{file.format().network_format().input(), + pblczero::NetworkFormat::MOVES_LEFT_NONE}, + wdl_(wdl) { const LegacyWeights weights(file.weights()); tensorflow::SessionOptions session_options; if (CPU) (*session_options.config.mutable_device_count())["GPU"] = 0; @@ -290,12 +355,27 @@ TFNetwork::TFNetwork(const WeightsFile& file, Placeholder::Shape({-1, kInputPlanes, 8, 8})); } - auto output = MakeNetwork(scope_, *input_, weights); + auto output = MakeNetwork(scope_, *input_, weights, wdl); CHECK(scope_.ok()) << scope_.status().ToString(); - policy_head_ = std::make_unique(output.first); value_head_ = std::make_unique(output.second); + if (options.Exists("dump-graphdef") || + options.Exists("dump-graphdef-txt")) { + GraphDef gdef; + CHECK(scope_.ToGraphDef(&gdef).ok()); + if (options.Exists("dump-graphdef")) { + std::ofstream f(options.Get("dump-graphdef").c_str()); + f.exceptions(std::ifstream::failbit); + f << gdef.SerializeAsString(); + } + if (options.Exists("dump-graphdef-txt")) { + std::ofstream f(options.Get("dump-graphdef-txt").c_str()); + f.exceptions(std::ifstream::failbit); + f << gdef.DebugString(); + } + } + // First request to tensorflow is slow (0.6s), so doing an empty request for // preheating. auto fake_request = NewComputation(); @@ -318,33 +398,39 @@ std::unique_ptr TFNetwork::NewComputation() { template std::unique_ptr MakeTFNetwork(const WeightsFile& weights, const OptionsDict& options) { - // Tensorflow backend needs to be updated to use folded batch norms. - throw Exception("Tensorflow backend is not supported."); - if (weights.format().network_format().network() != - pblczero::NetworkFormat::NETWORK_CLASSICAL) { + pblczero::NetworkFormat::NETWORK_CLASSICAL_WITH_HEADFORMAT && + weights.format().network_format().network() != + pblczero::NetworkFormat::NETWORK_SE_WITH_HEADFORMAT) { throw Exception( "Network format " + std::to_string(weights.format().network_format().network()) + - " is not supported by Tensorflow backend."); + " is not supported by Tensorflow C++ backend."); } if (weights.format().network_format().policy() != - pblczero::NetworkFormat::POLICY_CLASSICAL) { + pblczero::NetworkFormat::POLICY_CLASSICAL && + weights.format().network_format().policy() != + pblczero::NetworkFormat::POLICY_CONVOLUTION) { throw Exception("Policy format " + std::to_string(weights.format().network_format().policy()) + - " is not supported by Tensorflow backend."); + " is not supported by Tensorflow C++ backend."); } if (weights.format().network_format().value() != - pblczero::NetworkFormat::VALUE_CLASSICAL) { + pblczero::NetworkFormat::VALUE_CLASSICAL && + weights.format().network_format().value() != + pblczero::NetworkFormat::VALUE_WDL) { throw Exception("Value format " + std::to_string(weights.format().network_format().value()) + - " is not supported by Tensorflow backend."); + " is not supported by Tensorflow C++ backend."); } - return std::make_unique>(weights, options); + return std::make_unique>( + weights, options, + weights.format().network_format().value() == + pblczero::NetworkFormat::VALUE_WDL); } -REGISTER_NETWORK("tensorflow-cpu", MakeTFNetwork, 90) -REGISTER_NETWORK("tensorflow", MakeTFNetwork, 80) +REGISTER_NETWORK("tensorflow-cc-cpu", MakeTFNetwork, 90) +REGISTER_NETWORK("tensorflow-cc", MakeTFNetwork, 80) } // namespace } // namespace lczero diff --git a/src/neural/shared/policy_map.h b/src/neural/shared/policy_map.h index 67d0268361..e709e0aa25 100644 --- a/src/neural/shared/policy_map.h +++ b/src/neural/shared/policy_map.h @@ -20,396 +20,397 @@ namespace lczero { -const short kConvPolicyMap[] = {\ - 7, 31, 56, 81, 106, 131, 156, 180, 204, 230, 259, 288, - 317, 346, 374, 400, 425, 453, 485, 518, 551, 584, 615, 642, - 667, 695, 727, 761, 796, 830, 861, 888, 913, 941, 973, 1007, - 1042, 1076, 1107, 1134, 1159, 1187, 1219, 1252, 1285, 1318, 1349, 1376, - 1401, 1428, 1457, 1486, 1515, 1544, 1572, 1597, -1, -1, -1, -1, - -1, -1, -1, -1, 10, 35, 61, 86, 111, 136, 160, 183, - 207, 234, 264, 293, 322, 351, 378, 403, 428, 457, 490, 523, - 556, 589, 619, 645, 670, 699, 732, 766, 801, 835, 865, 891, - 916, 945, 978, 1012, 1047, 1081, 1111, 1137, 1162, 1191, 1224, 1257, - 1290, 1323, 1353, 1379, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, 13, 38, 64, 90, - 115, 140, 163, 185, 210, 237, 267, 297, 326, 355, 381, 405, - 431, 460, 493, 527, 560, 593, 622, 647, 673, 702, 735, 770, - 805, 839, 868, 893, 919, 948, 981, 1016, 1051, 1085, 1114, 1139, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 15, 40, 66, 92, 118, 142, 165, 187, 212, 239, 269, 299, - 329, 357, 383, 407, 433, 462, 495, 529, 563, 595, 624, 649, - 675, 704, 737, 772, 808, 841, 870, 895, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, 17, 42, 68, 94, 119, 144, 167, 189, - 214, 241, 271, 301, 330, 359, 385, 409, 435, 464, 497, 531, - 564, 597, 626, 651, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, 19, 44, 70, 95, - 120, 145, 169, 191, 216, 243, 273, 302, 331, 360, 387, 411, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 21, 46, 71, 96, 121, 146, 170, 193, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, 8, 32, 57, 82, 107, 132, 157, -1, - 205, 231, 260, 289, 318, 347, 375, -1, 426, 454, 486, 519, - 552, 585, 616, -1, 668, 696, 728, 762, 797, 831, 862, -1, - 914, 942, 974, 1008, 1043, 1077, 1108, -1, 1160, 1188, 1220, 1253, - 1286, 1319, 1350, -1, 1402, 1429, 1458, 1487, 1516, 1545, 1573, -1, - -1, -1, -1, -1, -1, -1, -1, -1, 12, 37, 63, 88, - 113, 138, -1, -1, 209, 236, 266, 295, 324, 353, -1, -1, - 430, 459, 492, 525, 558, 591, -1, -1, 672, 701, 734, 768, - 803, 837, -1, -1, 918, 947, 980, 1014, 1049, 1083, -1, -1, - 1164, 1193, 1226, 1259, 1292, 1325, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 14, 39, 65, 91, 116, -1, -1, -1, 211, 238, 268, 298, - 327, -1, -1, -1, 432, 461, 494, 528, 561, -1, -1, -1, - 674, 703, 736, 771, 806, -1, -1, -1, 920, 949, 982, 1017, - 1052, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, 16, 41, 67, 93, -1, -1, -1, -1, - 213, 240, 270, 300, -1, -1, -1, -1, 434, 463, 496, 530, - -1, -1, -1, -1, 676, 705, 738, 773, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, 18, 43, 69, -1, - -1, -1, -1, -1, 215, 242, 272, -1, -1, -1, -1, -1, - 436, 465, 498, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 20, 45, -1, -1, -1, -1, -1, -1, 217, 244, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, 22, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, 0, 24, 49, 75, - 101, 127, 153, -1, 197, 223, 252, 282, 312, 342, 371, -1, - 418, 446, 478, 512, 546, 580, 612, -1, 660, 688, 720, 755, - 791, 826, 858, -1, 906, 934, 966, 1001, 1037, 1072, 1104, -1, - 1152, 1180, 1212, 1246, 1280, 1314, 1346, -1, 1394, 1421, 1450, 1480, - 1510, 1540, 1569, -1, 1614, 1639, 1665, 1691, 1717, 1743, 1768, -1, - 1, 25, 50, 76, 102, 128, -1, -1, 198, 224, 253, 283, - 313, 343, -1, -1, 419, 447, 479, 513, 547, 581, -1, -1, - 661, 689, 721, 756, 792, 827, -1, -1, 907, 935, 967, 1002, - 1038, 1073, -1, -1, 1153, 1181, 1213, 1247, 1281, 1315, -1, -1, - 1395, 1422, 1451, 1481, 1511, 1541, -1, -1, 1615, 1640, 1666, 1692, - 1718, 1744, -1, -1, 2, 26, 51, 77, 103, -1, -1, -1, - 199, 225, 254, 284, 314, -1, -1, -1, 420, 448, 480, 514, - 548, -1, -1, -1, 662, 690, 722, 757, 793, -1, -1, -1, - 908, 936, 968, 1003, 1039, -1, -1, -1, 1154, 1182, 1214, 1248, - 1282, -1, -1, -1, 1396, 1423, 1452, 1482, 1512, -1, -1, -1, - 1616, 1641, 1667, 1693, 1719, -1, -1, -1, 3, 27, 52, 78, - -1, -1, -1, -1, 200, 226, 255, 285, -1, -1, -1, -1, - 421, 449, 481, 515, -1, -1, -1, -1, 663, 691, 723, 758, - -1, -1, -1, -1, 909, 937, 969, 1004, -1, -1, -1, -1, - 1155, 1183, 1215, 1249, -1, -1, -1, -1, 1397, 1424, 1453, 1483, - -1, -1, -1, -1, 1617, 1642, 1668, 1694, -1, -1, -1, -1, - 4, 28, 53, -1, -1, -1, -1, -1, 201, 227, 256, -1, - -1, -1, -1, -1, 422, 450, 482, -1, -1, -1, -1, -1, - 664, 692, 724, -1, -1, -1, -1, -1, 910, 938, 970, -1, - -1, -1, -1, -1, 1156, 1184, 1216, -1, -1, -1, -1, -1, - 1398, 1425, 1454, -1, -1, -1, -1, -1, 1618, 1643, 1669, -1, - -1, -1, -1, -1, 5, 29, -1, -1, -1, -1, -1, -1, - 202, 228, -1, -1, -1, -1, -1, -1, 423, 451, -1, -1, - -1, -1, -1, -1, 665, 693, -1, -1, -1, -1, -1, -1, - 911, 939, -1, -1, -1, -1, -1, -1, 1157, 1185, -1, -1, - -1, -1, -1, -1, 1399, 1426, -1, -1, -1, -1, -1, -1, - 1619, 1644, -1, -1, -1, -1, -1, -1, 6, -1, -1, -1, - -1, -1, -1, -1, 203, -1, -1, -1, -1, -1, -1, -1, - 424, -1, -1, -1, -1, -1, -1, -1, 666, -1, -1, -1, - -1, -1, -1, -1, 912, -1, -1, -1, -1, -1, -1, -1, - 1158, -1, -1, -1, -1, -1, -1, -1, 1400, -1, -1, -1, - -1, -1, -1, -1, 1620, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, 195, 220, 248, 277, - 306, 335, 364, -1, 416, 443, 474, 507, 540, 573, 605, -1, - 658, 685, 716, 750, 785, 819, 851, -1, 904, 931, 962, 996, - 1031, 1065, 1097, -1, 1150, 1177, 1208, 1241, 1274, 1307, 1339, -1, - 1392, 1418, 1446, 1475, 1504, 1533, 1562, -1, 1612, 1636, 1661, 1686, - 1711, 1736, 1761, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, 414, 440, 470, 503, - 536, 569, -1, -1, 656, 682, 712, 746, 781, 815, -1, -1, - 902, 928, 958, 992, 1027, 1061, -1, -1, 1148, 1174, 1204, 1237, - 1270, 1303, -1, -1, 1390, 1415, 1442, 1471, 1500, 1529, -1, -1, - 1610, 1633, 1657, 1682, 1707, 1732, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, 653, 678, 707, 741, - 776, -1, -1, -1, 899, 924, 953, 987, 1022, -1, -1, -1, - 1145, 1170, 1199, 1232, 1265, -1, -1, -1, 1387, 1411, 1437, 1466, - 1495, -1, -1, -1, 1607, 1629, 1652, 1677, 1702, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, 897, 922, 951, 984, - -1, -1, -1, -1, 1143, 1168, 1197, 1229, -1, -1, -1, -1, - 1385, 1409, 1435, 1463, -1, -1, -1, -1, 1605, 1627, 1650, 1674, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, 1141, 1166, 1195, -1, - -1, -1, -1, -1, 1383, 1407, 1433, -1, -1, -1, -1, -1, - 1603, 1625, 1648, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, 1381, 1405, -1, -1, - -1, -1, -1, -1, 1601, 1623, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, 1599, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 194, 219, 247, 276, 305, 334, 363, 390, 415, 442, 473, 506, - 539, 572, 604, 632, 657, 684, 715, 749, 784, 818, 850, 878, - 903, 930, 961, 995, 1030, 1064, 1096, 1124, 1149, 1176, 1207, 1240, - 1273, 1306, 1338, 1366, 1391, 1417, 1445, 1474, 1503, 1532, 1561, 1587, - 1611, 1635, 1660, 1685, 1710, 1735, 1760, 1784, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 412, 438, 468, 501, 534, 567, 600, 629, 654, 680, 710, 744, - 779, 813, 846, 875, 900, 926, 956, 990, 1025, 1059, 1092, 1121, - 1146, 1172, 1202, 1235, 1268, 1301, 1334, 1363, 1388, 1413, 1440, 1469, - 1498, 1527, 1557, 1584, 1608, 1631, 1655, 1680, 1705, 1730, 1756, 1781, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 652, 677, 706, 740, 775, 810, 843, 872, 898, 923, 952, 986, - 1021, 1056, 1089, 1118, 1144, 1169, 1198, 1231, 1264, 1298, 1331, 1360, - 1386, 1410, 1436, 1465, 1494, 1524, 1554, 1581, 1606, 1628, 1651, 1676, - 1701, 1727, 1753, 1778, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 896, 921, 950, 983, 1019, 1054, 1087, 1116, 1142, 1167, 1196, 1228, - 1262, 1296, 1329, 1358, 1384, 1408, 1434, 1462, 1492, 1522, 1552, 1579, - 1604, 1626, 1649, 1673, 1699, 1725, 1751, 1776, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 1140, 1165, 1194, 1227, 1260, 1294, 1327, 1356, 1382, 1406, 1432, 1461, - 1490, 1520, 1550, 1577, 1602, 1624, 1647, 1672, 1697, 1723, 1749, 1774, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 1380, 1404, 1431, 1460, 1489, 1518, 1548, 1575, 1600, 1622, 1646, 1671, - 1696, 1721, 1747, 1772, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 1598, 1621, 1645, 1670, 1695, 1720, 1745, 1770, -1, -1, -1, -1, - -1, -1, -1, -1, -1, 218, 246, 275, 304, 333, 362, 389, - -1, 441, 472, 505, 538, 571, 603, 631, -1, 683, 714, 748, - 783, 817, 849, 877, -1, 929, 960, 994, 1029, 1063, 1095, 1123, - -1, 1175, 1206, 1239, 1272, 1305, 1337, 1365, -1, 1416, 1444, 1473, - 1502, 1531, 1560, 1586, -1, 1634, 1659, 1684, 1709, 1734, 1759, 1783, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, 466, 499, 532, 565, 598, 627, - -1, -1, 708, 742, 777, 811, 844, 873, -1, -1, 954, 988, - 1023, 1057, 1090, 1119, -1, -1, 1200, 1233, 1266, 1299, 1332, 1361, - -1, -1, 1438, 1467, 1496, 1525, 1555, 1582, -1, -1, 1653, 1678, - 1703, 1728, 1754, 1779, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, 739, 774, 809, 842, 871, - -1, -1, -1, 985, 1020, 1055, 1088, 1117, -1, -1, -1, 1230, - 1263, 1297, 1330, 1359, -1, -1, -1, 1464, 1493, 1523, 1553, 1580, - -1, -1, -1, 1675, 1700, 1726, 1752, 1777, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, 1018, 1053, 1086, 1115, - -1, -1, -1, -1, 1261, 1295, 1328, 1357, -1, -1, -1, -1, - 1491, 1521, 1551, 1578, -1, -1, -1, -1, 1698, 1724, 1750, 1775, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, 1293, 1326, 1355, - -1, -1, -1, -1, -1, 1519, 1549, 1576, -1, -1, -1, -1, - -1, 1722, 1748, 1773, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1547, 1574, - -1, -1, -1, -1, -1, -1, 1746, 1771, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1769, - -1, 23, 48, 74, 100, 126, 152, 177, -1, 222, 251, 281, - 311, 341, 370, 397, -1, 445, 477, 511, 545, 579, 611, 639, - -1, 687, 719, 754, 790, 825, 857, 885, -1, 933, 965, 1000, - 1036, 1071, 1103, 1131, -1, 1179, 1211, 1245, 1279, 1313, 1345, 1373, - -1, 1420, 1449, 1479, 1509, 1539, 1568, 1594, -1, 1638, 1664, 1690, - 1716, 1742, 1767, 1791, -1, -1, 47, 73, 99, 125, 151, 176, - -1, -1, 250, 280, 310, 340, 369, 396, -1, -1, 476, 510, - 544, 578, 610, 638, -1, -1, 718, 753, 789, 824, 856, 884, - -1, -1, 964, 999, 1035, 1070, 1102, 1130, -1, -1, 1210, 1244, - 1278, 1312, 1344, 1372, -1, -1, 1448, 1478, 1508, 1538, 1567, 1593, - -1, -1, 1663, 1689, 1715, 1741, 1766, 1790, -1, -1, -1, 72, - 98, 124, 150, 175, -1, -1, -1, 279, 309, 339, 368, 395, - -1, -1, -1, 509, 543, 577, 609, 637, -1, -1, -1, 752, - 788, 823, 855, 883, -1, -1, -1, 998, 1034, 1069, 1101, 1129, - -1, -1, -1, 1243, 1277, 1311, 1343, 1371, -1, -1, -1, 1477, - 1507, 1537, 1566, 1592, -1, -1, -1, 1688, 1714, 1740, 1765, 1789, - -1, -1, -1, -1, 97, 123, 149, 174, -1, -1, -1, -1, - 308, 338, 367, 394, -1, -1, -1, -1, 542, 576, 608, 636, - -1, -1, -1, -1, 787, 822, 854, 882, -1, -1, -1, -1, - 1033, 1068, 1100, 1128, -1, -1, -1, -1, 1276, 1310, 1342, 1370, - -1, -1, -1, -1, 1506, 1536, 1565, 1591, -1, -1, -1, -1, - 1713, 1739, 1764, 1788, -1, -1, -1, -1, -1, 122, 148, 173, - -1, -1, -1, -1, -1, 337, 366, 393, -1, -1, -1, -1, - -1, 575, 607, 635, -1, -1, -1, -1, -1, 821, 853, 881, - -1, -1, -1, -1, -1, 1067, 1099, 1127, -1, -1, -1, -1, - -1, 1309, 1341, 1369, -1, -1, -1, -1, -1, 1535, 1564, 1590, - -1, -1, -1, -1, -1, 1738, 1763, 1787, -1, -1, -1, -1, - -1, -1, 147, 172, -1, -1, -1, -1, -1, -1, 365, 392, - -1, -1, -1, -1, -1, -1, 606, 634, -1, -1, -1, -1, - -1, -1, 852, 880, -1, -1, -1, -1, -1, -1, 1098, 1126, - -1, -1, -1, -1, -1, -1, 1340, 1368, -1, -1, -1, -1, - -1, -1, 1563, 1589, -1, -1, -1, -1, -1, -1, 1762, 1786, - -1, -1, -1, -1, -1, -1, -1, 171, -1, -1, -1, -1, - -1, -1, -1, 391, -1, -1, -1, -1, -1, -1, -1, 633, - -1, -1, -1, -1, -1, -1, -1, 879, -1, -1, -1, -1, - -1, -1, -1, 1125, -1, -1, -1, -1, -1, -1, -1, 1367, - -1, -1, -1, -1, -1, -1, -1, 1588, -1, -1, -1, -1, - -1, -1, -1, 1785, -1, 30, 55, 80, 105, 130, 155, 179, - -1, 229, 258, 287, 316, 345, 373, 399, -1, 452, 484, 517, - 550, 583, 614, 641, -1, 694, 726, 760, 795, 829, 860, 887, - -1, 940, 972, 1006, 1041, 1075, 1106, 1133, -1, 1186, 1218, 1251, - 1284, 1317, 1348, 1375, -1, 1427, 1456, 1485, 1514, 1543, 1571, 1596, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 59, 84, - 109, 134, 158, 181, -1, -1, 262, 291, 320, 349, 376, 401, - -1, -1, 488, 521, 554, 587, 617, 643, -1, -1, 730, 764, - 799, 833, 863, 889, -1, -1, 976, 1010, 1045, 1079, 1109, 1135, - -1, -1, 1222, 1255, 1288, 1321, 1351, 1377, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, 89, 114, 139, 162, 184, -1, -1, -1, 296, - 325, 354, 380, 404, -1, -1, -1, 526, 559, 592, 621, 646, - -1, -1, -1, 769, 804, 838, 867, 892, -1, -1, -1, 1015, - 1050, 1084, 1113, 1138, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, 117, 141, 164, 186, - -1, -1, -1, -1, 328, 356, 382, 406, -1, -1, -1, -1, - 562, 594, 623, 648, -1, -1, -1, -1, 807, 840, 869, 894, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, 143, 166, 188, -1, -1, -1, -1, -1, 358, 384, 408, - -1, -1, -1, -1, -1, 596, 625, 650, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, 168, 190, -1, -1, -1, -1, - -1, -1, 386, 410, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 192, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, 11, 36, 62, 87, - 112, 137, 161, -1, 208, 235, 265, 294, 323, 352, 379, -1, - 429, 458, 491, 524, 557, 590, 620, -1, 671, 700, 733, 767, - 802, 836, 866, -1, 917, 946, 979, 1013, 1048, 1082, 1112, -1, - 1163, 1192, 1225, 1258, 1291, 1324, 1354, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 9, 33, 58, 83, 108, 133, -1, -1, 206, 232, 261, 290, - 319, 348, -1, -1, 427, 455, 487, 520, 553, 586, -1, -1, - 669, 697, 729, 763, 798, 832, -1, -1, 915, 943, 975, 1009, - 1044, 1078, -1, -1, 1161, 1189, 1221, 1254, 1287, 1320, -1, -1, - 1403, 1430, 1459, 1488, 1517, 1546, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 196, 221, 249, 278, 307, 336, -1, -1, 417, 444, 475, 508, - 541, 574, -1, -1, 659, 686, 717, 751, 786, 820, -1, -1, - 905, 932, 963, 997, 1032, 1066, -1, -1, 1151, 1178, 1209, 1242, - 1275, 1308, -1, -1, 1393, 1419, 1447, 1476, 1505, 1534, -1, -1, - 1613, 1637, 1662, 1687, 1712, 1737, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 413, 439, 469, 502, 535, 568, 601, -1, 655, 681, 711, 745, - 780, 814, 847, -1, 901, 927, 957, 991, 1026, 1060, 1093, -1, - 1147, 1173, 1203, 1236, 1269, 1302, 1335, -1, 1389, 1414, 1441, 1470, - 1499, 1528, 1558, -1, 1609, 1632, 1656, 1681, 1706, 1731, 1757, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, 437, 467, 500, 533, 566, 599, 628, - -1, 679, 709, 743, 778, 812, 845, 874, -1, 925, 955, 989, - 1024, 1058, 1091, 1120, -1, 1171, 1201, 1234, 1267, 1300, 1333, 1362, - -1, 1412, 1439, 1468, 1497, 1526, 1556, 1583, -1, 1630, 1654, 1679, - 1704, 1729, 1755, 1780, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, 245, 274, 303, 332, 361, 388, -1, -1, 471, 504, - 537, 570, 602, 630, -1, -1, 713, 747, 782, 816, 848, 876, - -1, -1, 959, 993, 1028, 1062, 1094, 1122, -1, -1, 1205, 1238, - 1271, 1304, 1336, 1364, -1, -1, 1443, 1472, 1501, 1530, 1559, 1585, - -1, -1, 1658, 1683, 1708, 1733, 1758, 1782, -1, -1, 54, 79, - 104, 129, 154, 178, -1, -1, 257, 286, 315, 344, 372, 398, - -1, -1, 483, 516, 549, 582, 613, 640, -1, -1, 725, 759, - 794, 828, 859, 886, -1, -1, 971, 1005, 1040, 1074, 1105, 1132, - -1, -1, 1217, 1250, 1283, 1316, 1347, 1374, -1, -1, 1455, 1484, - 1513, 1542, 1570, 1595, -1, -1, -1, -1, -1, -1, -1, -1, - -1, 34, 60, 85, 110, 135, 159, 182, -1, 233, 263, 292, - 321, 350, 377, 402, -1, 456, 489, 522, 555, 588, 618, 644, - -1, 698, 731, 765, 800, 834, 864, 890, -1, 944, 977, 1011, - 1046, 1080, 1110, 1136, -1, 1190, 1223, 1256, 1289, 1322, 1352, 1378, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, 1799, 1808, 1817, 1826, 1835, 1844, 1853, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, 1800, 1809, 1818, - 1827, 1836, 1845, 1854, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, 1798, 1807, 1816, 1825, 1834, 1843, 1852, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, 1793, 1802, 1811, 1820, 1829, 1838, 1847, 1856, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, 1794, 1803, 1812, 1821, - 1830, 1839, 1848, 1857, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 1792, 1801, 1810, 1819, 1828, 1837, 1846, 1855, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, 1796, 1805, 1814, 1823, 1832, 1841, 1850, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, 1797, 1806, 1815, 1824, - 1833, 1842, 1851, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 1795, 1804, 1813, 1822, 1831, 1840, 1849, -1, -1, -1, -1, -1, - -1, -1, -1, -1}; +// 73x8x8. +const short kConvPolicyMap[] = { + 7, 31, 56, 81, 106, 131, 156, 180, 204, 230, 259, 288, + 317, 346, 374, 400, 425, 453, 485, 518, 551, 584, 615, 642, + 667, 695, 727, 761, 796, 830, 861, 888, 913, 941, 973, 1007, + 1042, 1076, 1107, 1134, 1159, 1187, 1219, 1252, 1285, 1318, 1349, 1376, + 1401, 1428, 1457, 1486, 1515, 1544, 1572, 1597, -1, -1, -1, -1, + -1, -1, -1, -1, 10, 35, 61, 86, 111, 136, 160, 183, + 207, 234, 264, 293, 322, 351, 378, 403, 428, 457, 490, 523, + 556, 589, 619, 645, 670, 699, 732, 766, 801, 835, 865, 891, + 916, 945, 978, 1012, 1047, 1081, 1111, 1137, 1162, 1191, 1224, 1257, + 1290, 1323, 1353, 1379, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 13, 38, 64, 90, + 115, 140, 163, 185, 210, 237, 267, 297, 326, 355, 381, 405, + 431, 460, 493, 527, 560, 593, 622, 647, 673, 702, 735, 770, + 805, 839, 868, 893, 919, 948, 981, 1016, 1051, 1085, 1114, 1139, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 15, 40, 66, 92, 118, 142, 165, 187, 212, 239, 269, 299, + 329, 357, 383, 407, 433, 462, 495, 529, 563, 595, 624, 649, + 675, 704, 737, 772, 808, 841, 870, 895, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 17, 42, 68, 94, 119, 144, 167, 189, + 214, 241, 271, 301, 330, 359, 385, 409, 435, 464, 497, 531, + 564, 597, 626, 651, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 19, 44, 70, 95, + 120, 145, 169, 191, 216, 243, 273, 302, 331, 360, 387, 411, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 21, 46, 71, 96, 121, 146, 170, 193, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 8, 32, 57, 82, 107, 132, 157, -1, + 205, 231, 260, 289, 318, 347, 375, -1, 426, 454, 486, 519, + 552, 585, 616, -1, 668, 696, 728, 762, 797, 831, 862, -1, + 914, 942, 974, 1008, 1043, 1077, 1108, -1, 1160, 1188, 1220, 1253, + 1286, 1319, 1350, -1, 1402, 1429, 1458, 1487, 1516, 1545, 1573, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 12, 37, 63, 88, + 113, 138, -1, -1, 209, 236, 266, 295, 324, 353, -1, -1, + 430, 459, 492, 525, 558, 591, -1, -1, 672, 701, 734, 768, + 803, 837, -1, -1, 918, 947, 980, 1014, 1049, 1083, -1, -1, + 1164, 1193, 1226, 1259, 1292, 1325, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 14, 39, 65, 91, 116, -1, -1, -1, 211, 238, 268, 298, + 327, -1, -1, -1, 432, 461, 494, 528, 561, -1, -1, -1, + 674, 703, 736, 771, 806, -1, -1, -1, 920, 949, 982, 1017, + 1052, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 16, 41, 67, 93, -1, -1, -1, -1, + 213, 240, 270, 300, -1, -1, -1, -1, 434, 463, 496, 530, + -1, -1, -1, -1, 676, 705, 738, 773, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 18, 43, 69, -1, + -1, -1, -1, -1, 215, 242, 272, -1, -1, -1, -1, -1, + 436, 465, 498, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 20, 45, -1, -1, -1, -1, -1, -1, 217, 244, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 22, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 0, 24, 49, 75, + 101, 127, 153, -1, 197, 223, 252, 282, 312, 342, 371, -1, + 418, 446, 478, 512, 546, 580, 612, -1, 660, 688, 720, 755, + 791, 826, 858, -1, 906, 934, 966, 1001, 1037, 1072, 1104, -1, + 1152, 1180, 1212, 1246, 1280, 1314, 1346, -1, 1394, 1421, 1450, 1480, + 1510, 1540, 1569, -1, 1614, 1639, 1665, 1691, 1717, 1743, 1768, -1, + 1, 25, 50, 76, 102, 128, -1, -1, 198, 224, 253, 283, + 313, 343, -1, -1, 419, 447, 479, 513, 547, 581, -1, -1, + 661, 689, 721, 756, 792, 827, -1, -1, 907, 935, 967, 1002, + 1038, 1073, -1, -1, 1153, 1181, 1213, 1247, 1281, 1315, -1, -1, + 1395, 1422, 1451, 1481, 1511, 1541, -1, -1, 1615, 1640, 1666, 1692, + 1718, 1744, -1, -1, 2, 26, 51, 77, 103, -1, -1, -1, + 199, 225, 254, 284, 314, -1, -1, -1, 420, 448, 480, 514, + 548, -1, -1, -1, 662, 690, 722, 757, 793, -1, -1, -1, + 908, 936, 968, 1003, 1039, -1, -1, -1, 1154, 1182, 1214, 1248, + 1282, -1, -1, -1, 1396, 1423, 1452, 1482, 1512, -1, -1, -1, + 1616, 1641, 1667, 1693, 1719, -1, -1, -1, 3, 27, 52, 78, + -1, -1, -1, -1, 200, 226, 255, 285, -1, -1, -1, -1, + 421, 449, 481, 515, -1, -1, -1, -1, 663, 691, 723, 758, + -1, -1, -1, -1, 909, 937, 969, 1004, -1, -1, -1, -1, + 1155, 1183, 1215, 1249, -1, -1, -1, -1, 1397, 1424, 1453, 1483, + -1, -1, -1, -1, 1617, 1642, 1668, 1694, -1, -1, -1, -1, + 4, 28, 53, -1, -1, -1, -1, -1, 201, 227, 256, -1, + -1, -1, -1, -1, 422, 450, 482, -1, -1, -1, -1, -1, + 664, 692, 724, -1, -1, -1, -1, -1, 910, 938, 970, -1, + -1, -1, -1, -1, 1156, 1184, 1216, -1, -1, -1, -1, -1, + 1398, 1425, 1454, -1, -1, -1, -1, -1, 1618, 1643, 1669, -1, + -1, -1, -1, -1, 5, 29, -1, -1, -1, -1, -1, -1, + 202, 228, -1, -1, -1, -1, -1, -1, 423, 451, -1, -1, + -1, -1, -1, -1, 665, 693, -1, -1, -1, -1, -1, -1, + 911, 939, -1, -1, -1, -1, -1, -1, 1157, 1185, -1, -1, + -1, -1, -1, -1, 1399, 1426, -1, -1, -1, -1, -1, -1, + 1619, 1644, -1, -1, -1, -1, -1, -1, 6, -1, -1, -1, + -1, -1, -1, -1, 203, -1, -1, -1, -1, -1, -1, -1, + 424, -1, -1, -1, -1, -1, -1, -1, 666, -1, -1, -1, + -1, -1, -1, -1, 912, -1, -1, -1, -1, -1, -1, -1, + 1158, -1, -1, -1, -1, -1, -1, -1, 1400, -1, -1, -1, + -1, -1, -1, -1, 1620, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 195, 220, 248, 277, + 306, 335, 364, -1, 416, 443, 474, 507, 540, 573, 605, -1, + 658, 685, 716, 750, 785, 819, 851, -1, 904, 931, 962, 996, + 1031, 1065, 1097, -1, 1150, 1177, 1208, 1241, 1274, 1307, 1339, -1, + 1392, 1418, 1446, 1475, 1504, 1533, 1562, -1, 1612, 1636, 1661, 1686, + 1711, 1736, 1761, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 414, 440, 470, 503, + 536, 569, -1, -1, 656, 682, 712, 746, 781, 815, -1, -1, + 902, 928, 958, 992, 1027, 1061, -1, -1, 1148, 1174, 1204, 1237, + 1270, 1303, -1, -1, 1390, 1415, 1442, 1471, 1500, 1529, -1, -1, + 1610, 1633, 1657, 1682, 1707, 1732, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 653, 678, 707, 741, + 776, -1, -1, -1, 899, 924, 953, 987, 1022, -1, -1, -1, + 1145, 1170, 1199, 1232, 1265, -1, -1, -1, 1387, 1411, 1437, 1466, + 1495, -1, -1, -1, 1607, 1629, 1652, 1677, 1702, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 897, 922, 951, 984, + -1, -1, -1, -1, 1143, 1168, 1197, 1229, -1, -1, -1, -1, + 1385, 1409, 1435, 1463, -1, -1, -1, -1, 1605, 1627, 1650, 1674, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 1141, 1166, 1195, -1, + -1, -1, -1, -1, 1383, 1407, 1433, -1, -1, -1, -1, -1, + 1603, 1625, 1648, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 1381, 1405, -1, -1, + -1, -1, -1, -1, 1601, 1623, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 1599, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 194, 219, 247, 276, 305, 334, 363, 390, 415, 442, 473, 506, + 539, 572, 604, 632, 657, 684, 715, 749, 784, 818, 850, 878, + 903, 930, 961, 995, 1030, 1064, 1096, 1124, 1149, 1176, 1207, 1240, + 1273, 1306, 1338, 1366, 1391, 1417, 1445, 1474, 1503, 1532, 1561, 1587, + 1611, 1635, 1660, 1685, 1710, 1735, 1760, 1784, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 412, 438, 468, 501, 534, 567, 600, 629, 654, 680, 710, 744, + 779, 813, 846, 875, 900, 926, 956, 990, 1025, 1059, 1092, 1121, + 1146, 1172, 1202, 1235, 1268, 1301, 1334, 1363, 1388, 1413, 1440, 1469, + 1498, 1527, 1557, 1584, 1608, 1631, 1655, 1680, 1705, 1730, 1756, 1781, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 652, 677, 706, 740, 775, 810, 843, 872, 898, 923, 952, 986, + 1021, 1056, 1089, 1118, 1144, 1169, 1198, 1231, 1264, 1298, 1331, 1360, + 1386, 1410, 1436, 1465, 1494, 1524, 1554, 1581, 1606, 1628, 1651, 1676, + 1701, 1727, 1753, 1778, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 896, 921, 950, 983, 1019, 1054, 1087, 1116, 1142, 1167, 1196, 1228, + 1262, 1296, 1329, 1358, 1384, 1408, 1434, 1462, 1492, 1522, 1552, 1579, + 1604, 1626, 1649, 1673, 1699, 1725, 1751, 1776, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 1140, 1165, 1194, 1227, 1260, 1294, 1327, 1356, 1382, 1406, 1432, 1461, + 1490, 1520, 1550, 1577, 1602, 1624, 1647, 1672, 1697, 1723, 1749, 1774, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 1380, 1404, 1431, 1460, 1489, 1518, 1548, 1575, 1600, 1622, 1646, 1671, + 1696, 1721, 1747, 1772, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 1598, 1621, 1645, 1670, 1695, 1720, 1745, 1770, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 218, 246, 275, 304, 333, 362, 389, + -1, 441, 472, 505, 538, 571, 603, 631, -1, 683, 714, 748, + 783, 817, 849, 877, -1, 929, 960, 994, 1029, 1063, 1095, 1123, + -1, 1175, 1206, 1239, 1272, 1305, 1337, 1365, -1, 1416, 1444, 1473, + 1502, 1531, 1560, 1586, -1, 1634, 1659, 1684, 1709, 1734, 1759, 1783, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 466, 499, 532, 565, 598, 627, + -1, -1, 708, 742, 777, 811, 844, 873, -1, -1, 954, 988, + 1023, 1057, 1090, 1119, -1, -1, 1200, 1233, 1266, 1299, 1332, 1361, + -1, -1, 1438, 1467, 1496, 1525, 1555, 1582, -1, -1, 1653, 1678, + 1703, 1728, 1754, 1779, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 739, 774, 809, 842, 871, + -1, -1, -1, 985, 1020, 1055, 1088, 1117, -1, -1, -1, 1230, + 1263, 1297, 1330, 1359, -1, -1, -1, 1464, 1493, 1523, 1553, 1580, + -1, -1, -1, 1675, 1700, 1726, 1752, 1777, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 1018, 1053, 1086, 1115, + -1, -1, -1, -1, 1261, 1295, 1328, 1357, -1, -1, -1, -1, + 1491, 1521, 1551, 1578, -1, -1, -1, -1, 1698, 1724, 1750, 1775, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, 1293, 1326, 1355, + -1, -1, -1, -1, -1, 1519, 1549, 1576, -1, -1, -1, -1, + -1, 1722, 1748, 1773, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1547, 1574, + -1, -1, -1, -1, -1, -1, 1746, 1771, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1769, + -1, 23, 48, 74, 100, 126, 152, 177, -1, 222, 251, 281, + 311, 341, 370, 397, -1, 445, 477, 511, 545, 579, 611, 639, + -1, 687, 719, 754, 790, 825, 857, 885, -1, 933, 965, 1000, + 1036, 1071, 1103, 1131, -1, 1179, 1211, 1245, 1279, 1313, 1345, 1373, + -1, 1420, 1449, 1479, 1509, 1539, 1568, 1594, -1, 1638, 1664, 1690, + 1716, 1742, 1767, 1791, -1, -1, 47, 73, 99, 125, 151, 176, + -1, -1, 250, 280, 310, 340, 369, 396, -1, -1, 476, 510, + 544, 578, 610, 638, -1, -1, 718, 753, 789, 824, 856, 884, + -1, -1, 964, 999, 1035, 1070, 1102, 1130, -1, -1, 1210, 1244, + 1278, 1312, 1344, 1372, -1, -1, 1448, 1478, 1508, 1538, 1567, 1593, + -1, -1, 1663, 1689, 1715, 1741, 1766, 1790, -1, -1, -1, 72, + 98, 124, 150, 175, -1, -1, -1, 279, 309, 339, 368, 395, + -1, -1, -1, 509, 543, 577, 609, 637, -1, -1, -1, 752, + 788, 823, 855, 883, -1, -1, -1, 998, 1034, 1069, 1101, 1129, + -1, -1, -1, 1243, 1277, 1311, 1343, 1371, -1, -1, -1, 1477, + 1507, 1537, 1566, 1592, -1, -1, -1, 1688, 1714, 1740, 1765, 1789, + -1, -1, -1, -1, 97, 123, 149, 174, -1, -1, -1, -1, + 308, 338, 367, 394, -1, -1, -1, -1, 542, 576, 608, 636, + -1, -1, -1, -1, 787, 822, 854, 882, -1, -1, -1, -1, + 1033, 1068, 1100, 1128, -1, -1, -1, -1, 1276, 1310, 1342, 1370, + -1, -1, -1, -1, 1506, 1536, 1565, 1591, -1, -1, -1, -1, + 1713, 1739, 1764, 1788, -1, -1, -1, -1, -1, 122, 148, 173, + -1, -1, -1, -1, -1, 337, 366, 393, -1, -1, -1, -1, + -1, 575, 607, 635, -1, -1, -1, -1, -1, 821, 853, 881, + -1, -1, -1, -1, -1, 1067, 1099, 1127, -1, -1, -1, -1, + -1, 1309, 1341, 1369, -1, -1, -1, -1, -1, 1535, 1564, 1590, + -1, -1, -1, -1, -1, 1738, 1763, 1787, -1, -1, -1, -1, + -1, -1, 147, 172, -1, -1, -1, -1, -1, -1, 365, 392, + -1, -1, -1, -1, -1, -1, 606, 634, -1, -1, -1, -1, + -1, -1, 852, 880, -1, -1, -1, -1, -1, -1, 1098, 1126, + -1, -1, -1, -1, -1, -1, 1340, 1368, -1, -1, -1, -1, + -1, -1, 1563, 1589, -1, -1, -1, -1, -1, -1, 1762, 1786, + -1, -1, -1, -1, -1, -1, -1, 171, -1, -1, -1, -1, + -1, -1, -1, 391, -1, -1, -1, -1, -1, -1, -1, 633, + -1, -1, -1, -1, -1, -1, -1, 879, -1, -1, -1, -1, + -1, -1, -1, 1125, -1, -1, -1, -1, -1, -1, -1, 1367, + -1, -1, -1, -1, -1, -1, -1, 1588, -1, -1, -1, -1, + -1, -1, -1, 1785, -1, 30, 55, 80, 105, 130, 155, 179, + -1, 229, 258, 287, 316, 345, 373, 399, -1, 452, 484, 517, + 550, 583, 614, 641, -1, 694, 726, 760, 795, 829, 860, 887, + -1, 940, 972, 1006, 1041, 1075, 1106, 1133, -1, 1186, 1218, 1251, + 1284, 1317, 1348, 1375, -1, 1427, 1456, 1485, 1514, 1543, 1571, 1596, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 59, 84, + 109, 134, 158, 181, -1, -1, 262, 291, 320, 349, 376, 401, + -1, -1, 488, 521, 554, 587, 617, 643, -1, -1, 730, 764, + 799, 833, 863, 889, -1, -1, 976, 1010, 1045, 1079, 1109, 1135, + -1, -1, 1222, 1255, 1288, 1321, 1351, 1377, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 89, 114, 139, 162, 184, -1, -1, -1, 296, + 325, 354, 380, 404, -1, -1, -1, 526, 559, 592, 621, 646, + -1, -1, -1, 769, 804, 838, 867, 892, -1, -1, -1, 1015, + 1050, 1084, 1113, 1138, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 117, 141, 164, 186, + -1, -1, -1, -1, 328, 356, 382, 406, -1, -1, -1, -1, + 562, 594, 623, 648, -1, -1, -1, -1, 807, 840, 869, 894, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 143, 166, 188, -1, -1, -1, -1, -1, 358, 384, 408, + -1, -1, -1, -1, -1, 596, 625, 650, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 168, 190, -1, -1, -1, -1, + -1, -1, 386, 410, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 192, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 11, 36, 62, 87, + 112, 137, 161, -1, 208, 235, 265, 294, 323, 352, 379, -1, + 429, 458, 491, 524, 557, 590, 620, -1, 671, 700, 733, 767, + 802, 836, 866, -1, 917, 946, 979, 1013, 1048, 1082, 1112, -1, + 1163, 1192, 1225, 1258, 1291, 1324, 1354, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 9, 33, 58, 83, 108, 133, -1, -1, 206, 232, 261, 290, + 319, 348, -1, -1, 427, 455, 487, 520, 553, 586, -1, -1, + 669, 697, 729, 763, 798, 832, -1, -1, 915, 943, 975, 1009, + 1044, 1078, -1, -1, 1161, 1189, 1221, 1254, 1287, 1320, -1, -1, + 1403, 1430, 1459, 1488, 1517, 1546, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 196, 221, 249, 278, 307, 336, -1, -1, 417, 444, 475, 508, + 541, 574, -1, -1, 659, 686, 717, 751, 786, 820, -1, -1, + 905, 932, 963, 997, 1032, 1066, -1, -1, 1151, 1178, 1209, 1242, + 1275, 1308, -1, -1, 1393, 1419, 1447, 1476, 1505, 1534, -1, -1, + 1613, 1637, 1662, 1687, 1712, 1737, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 413, 439, 469, 502, 535, 568, 601, -1, 655, 681, 711, 745, + 780, 814, 847, -1, 901, 927, 957, 991, 1026, 1060, 1093, -1, + 1147, 1173, 1203, 1236, 1269, 1302, 1335, -1, 1389, 1414, 1441, 1470, + 1499, 1528, 1558, -1, 1609, 1632, 1656, 1681, 1706, 1731, 1757, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 437, 467, 500, 533, 566, 599, 628, + -1, 679, 709, 743, 778, 812, 845, 874, -1, 925, 955, 989, + 1024, 1058, 1091, 1120, -1, 1171, 1201, 1234, 1267, 1300, 1333, 1362, + -1, 1412, 1439, 1468, 1497, 1526, 1556, 1583, -1, 1630, 1654, 1679, + 1704, 1729, 1755, 1780, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 245, 274, 303, 332, 361, 388, -1, -1, 471, 504, + 537, 570, 602, 630, -1, -1, 713, 747, 782, 816, 848, 876, + -1, -1, 959, 993, 1028, 1062, 1094, 1122, -1, -1, 1205, 1238, + 1271, 1304, 1336, 1364, -1, -1, 1443, 1472, 1501, 1530, 1559, 1585, + -1, -1, 1658, 1683, 1708, 1733, 1758, 1782, -1, -1, 54, 79, + 104, 129, 154, 178, -1, -1, 257, 286, 315, 344, 372, 398, + -1, -1, 483, 516, 549, 582, 613, 640, -1, -1, 725, 759, + 794, 828, 859, 886, -1, -1, 971, 1005, 1040, 1074, 1105, 1132, + -1, -1, 1217, 1250, 1283, 1316, 1347, 1374, -1, -1, 1455, 1484, + 1513, 1542, 1570, 1595, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 34, 60, 85, 110, 135, 159, 182, -1, 233, 263, 292, + 321, 350, 377, 402, -1, 456, 489, 522, 555, 588, 618, 644, + -1, 698, 731, 765, 800, 834, 864, 890, -1, 944, 977, 1011, + 1046, 1080, 1110, 1136, -1, 1190, 1223, 1256, 1289, 1322, 1352, 1378, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 1799, 1808, 1817, 1826, 1835, 1844, 1853, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, 1800, 1809, 1818, + 1827, 1836, 1845, 1854, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 1798, 1807, 1816, 1825, 1834, 1843, 1852, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 1793, 1802, 1811, 1820, 1829, 1838, 1847, 1856, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 1794, 1803, 1812, 1821, + 1830, 1839, 1848, 1857, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 1792, 1801, 1810, 1819, 1828, 1837, 1846, 1855, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 1796, 1805, 1814, 1823, 1832, 1841, 1850, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 1797, 1806, 1815, 1824, + 1833, 1842, 1851, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 1795, 1804, 1813, 1822, 1831, 1840, 1849, -1, -1, -1, -1, -1, + -1, -1, -1, -1}; } // namespace lczero diff --git a/src/utils/transpose.cc b/src/utils/transpose.cc deleted file mode 100644 index f26ef52c2d..0000000000 --- a/src/utils/transpose.cc +++ /dev/null @@ -1,55 +0,0 @@ -/* - This file is part of Leela Chess Zero. - Copyright (C) 2018 The LCZero Authors - - Leela Chess is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Leela Chess is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with Leela Chess. If not, see . - - Additional permission under GNU GPL version 3 section 7 - - If you modify this Program, or any covered work, by linking or - combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA - Toolkit and the NVIDIA CUDA Deep Neural Network library (or a - modified version of those libraries), containing parts covered by the - terms of the respective license agreement, the licensors of this - Program grant you additional permission to convey the resulting work. -*/ - -#include "utils/transpose.h" -#include - -namespace lczero { -void TransposeTensor(const std::vector& dims, std::vector order, - const std::vector from, float* to) { - if (order.empty()) { - for (size_t i = 0; i < dims.size(); ++i) - order.push_back(dims.size() - i - 1); - } - std::vector cur_idx(dims.size()); - for (size_t _ = 0; _ < from.size(); ++_) { - size_t from_idx = 0; - for (int i : order) { - from_idx *= dims[i]; - from_idx += cur_idx[i]; - } - *to++ = from[from_idx]; - for (int i = static_cast(dims.size()) - 1; i >= 0; --i) { - if (++cur_idx[i] == dims[i]) { - cur_idx[i] = 0; - } else { - break; - } - } - } -} -} // namespace lczero diff --git a/src/utils/transpose.h b/src/utils/transpose.h index 9aea6b85d8..4bb56aca54 100644 --- a/src/utils/transpose.h +++ b/src/utils/transpose.h @@ -1,6 +1,6 @@ /* This file is part of Leela Chess Zero. - Copyright (C) 2018 The LCZero Authors + Copyright (C) 2018-2020 The LCZero Authors Leela Chess is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -35,7 +35,29 @@ namespace lczero { // from.size() elements. // @dims -- Dimensions of @from tensor. For example, {120, 60, 3, 3} // @order -- New-to-old dimension index mapping. For example {3, 2, 0, 1} +template void TransposeTensor(const std::vector& dims, std::vector order, - const std::vector from, float* to); + const std::vector from, T* to) { + if (order.empty()) { + for (size_t i = 0; i < dims.size(); ++i) + order.push_back(dims.size() - i - 1); + } + std::vector cur_idx(dims.size()); + for (size_t _ = 0; _ < from.size(); ++_) { + size_t from_idx = 0; + for (int i : order) { + from_idx *= dims[i]; + from_idx += cur_idx[i]; + } + *to++ = from[from_idx]; + for (int i = static_cast(dims.size()) - 1; i >= 0; --i) { + if (++cur_idx[i] == dims[i]) { + cur_idx[i] = 0; + } else { + break; + } + } + } +} } // namespace lczero From 4899d0c3f0fceaa84e095215f1e58d8ac1566d11 Mon Sep 17 00:00:00 2001 From: borg323 <39573933+borg323@users.noreply.github.com> Date: Mon, 23 Mar 2020 11:32:11 +0200 Subject: [PATCH 067/151] fix appveyor error handling (#1136) --- scripts/appveyor_win_build.cmd | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/appveyor_win_build.cmd b/scripts/appveyor_win_build.cmd index 14eae16c07..746f8e097a 100644 --- a/scripts/appveyor_win_build.cmd +++ b/scripts/appveyor_win_build.cmd @@ -2,6 +2,7 @@ SET PGO=false IF %APPVEYOR_REPO_TAG%==true IF %DX%==false SET PGO=true IF %PGO%==false msbuild "C:\projects\lc0\build\lc0.sln" /m /p:WholeProgramOptimization=true /p:DebugInformationFormat=ProgramDatabase /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" IF %PGO%==true msbuild "C:\projects\lc0\build\lc0.sln" /m /p:WholeProgramOptimization=PGInstrument /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" +IF ERRORLEVEL 1 EXIT cd build IF %NAME%==cpu-openblas copy C:\cache\OpenBLAS\dist64\bin\libopenblas.dll IF %NAME%==cpu-dnnl copy C:\cache\dnnl_win_1.1.1_cpu_vcomp\bin\dnnl.dll From f657e8d6205a48694aa08c7898dde8b6d0e5ab70 Mon Sep 17 00:00:00 2001 From: borg323 <39573933+borg323@users.noreply.github.com> Date: Mon, 23 Mar 2020 12:05:31 +0200 Subject: [PATCH 068/151] small fastmath.h cleanup (#1137) --- src/utils/fastmath.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/utils/fastmath.h b/src/utils/fastmath.h index 9d1e2cd3fe..ed5535b741 100644 --- a/src/utils/fastmath.h +++ b/src/utils/fastmath.h @@ -27,10 +27,10 @@ #pragma once +#include +#include #include - - namespace lczero { // These stunts are performed by trained professionals, do not try this at home. @@ -58,7 +58,7 @@ inline float FastLog2(const float a) { // modified for better accuracy with 32 bit floating point math. inline float FastPow2(const float a) { if (a < -126) return 0.0; - int32_t exp = floor(a); + int32_t exp = static_cast(floor(a)); float out = a - exp; // Minimize max relative error. out = 1.0f + out * (0.6602339f + 0.33976606f * out); From 0cc9b4bdea2b58d341ff9a0b5b15a1b823e3c395 Mon Sep 17 00:00:00 2001 From: Tilps Date: Wed, 25 Mar 2020 09:13:31 +1100 Subject: [PATCH 069/151] Add moves left head support to tf backend. (#1144) * Add moves left head support to tf backend. * Formatting. --- src/neural/network_tf_cc.cc | 60 ++++++++++++++++++++++++++++++------- 1 file changed, 50 insertions(+), 10 deletions(-) diff --git a/src/neural/network_tf_cc.cc b/src/neural/network_tf_cc.cc index a63631c8ee..8b2c11e75c 100644 --- a/src/neural/network_tf_cc.cc +++ b/src/neural/network_tf_cc.cc @@ -124,8 +124,9 @@ Output MakeResidualBlock(const Scope& scope, Input input, int channels, } template -std::pair MakeNetwork(const Scope& scope, Input input, - const LegacyWeights& weights, bool wdl) { +std::tuple MakeNetwork(const Scope& scope, Input input, + const LegacyWeights& weights, + bool wdl, bool moves_left) { const int filters = weights.input.weights.size() / kInputPlanes / 9; // Input convolution. @@ -207,7 +208,33 @@ std::pair MakeNetwork(const Scope& scope, Input input, value_head = Tanh(scope, ip_fc); } - return {policy_head, value_head}; + // Moves left head + Output moves_left_head; + if (moves_left) { + const int mlh_channels = weights.moves_left.biases.size(); + auto conv_mov = MakeConvBlock(scope, flow, 1, filters, mlh_channels, + weights.moves_left); + conv_mov = + Reshape(scope, conv_mov, Const(scope, {-1, mlh_channels * 8 * 8})); + + const int mlh_fc1_outputs = weights.ip1_mov_b.size(); + auto ip1_mov_w = + CPU ? MakeConst(scope, {8, 8, mlh_channels, mlh_fc1_outputs}, + weights.ip1_mov_w, {3, 2, 0, 1}) + : MakeConst(scope, {mlh_channels, 8, 8, mlh_fc1_outputs}, + weights.ip1_mov_w, {3, 0, 1, 2}); + ip1_mov_w = Reshape(scope, ip1_mov_w, + Const(scope, {mlh_channels * 8 * 8, mlh_fc1_outputs})); + auto ip1_mov_b = MakeConst(scope, {mlh_fc1_outputs}, weights.ip1_mov_b); + auto mov_flow = + Relu(scope, Add(scope, MatMul(scope, conv_mov, ip1_mov_w), ip1_mov_b)); + auto ip2_mov_w = MakeConst(scope, {mlh_fc1_outputs, 1}, weights.ip2_mov_w); + auto ip2_mov_b = MakeConst(scope, {1}, weights.ip2_mov_b); + auto ip_fc = Add(scope, MatMul(scope, mov_flow, ip2_mov_w), ip2_mov_b); + moves_left_head = Relu(scope, ip_fc); + } + + return {policy_head, value_head, moves_left_head}; } template @@ -229,6 +256,10 @@ class TFNetwork : public Network { bool IsWdl() const { return wdl_; } + bool IsMlh() const { + return capabilities_.moves_left == pblczero::NetworkFormat::MOVES_LEFT_V1; + } + private: tensorflow::Scope scope_; std::unique_ptr session_; @@ -236,6 +267,7 @@ class TFNetwork : public Network { std::unique_ptr input_; std::unique_ptr policy_head_; std::unique_ptr value_head_; + std::unique_ptr moves_left_head_; const NetworkCapabilities capabilities_; const bool wdl_; }; @@ -274,7 +306,13 @@ class TFNetworkComputation : public NetworkComputation { float GetPVal(int sample, int move_id) const override { return output_[1].template matrix()(sample, move_id); } - float GetMVal(int) const override { return 0.0f; } + float GetMVal(int sample) const override { + if (network_->IsMlh()) { + return output_[2].template matrix()(sample, 0); + } else { + return 0.0f; + } + } private: void PrepareInput(); @@ -337,7 +375,7 @@ TFNetwork::TFNetwork(const WeightsFile& file, const OptionsDict& options, bool wdl) : scope_(Scope::NewRootScope()), capabilities_{file.format().network_format().input(), - pblczero::NetworkFormat::MOVES_LEFT_NONE}, + file.format().network_format().moves_left()}, wdl_(wdl) { const LegacyWeights weights(file.weights()); tensorflow::SessionOptions session_options; @@ -355,10 +393,11 @@ TFNetwork::TFNetwork(const WeightsFile& file, const OptionsDict& options, Placeholder::Shape({-1, kInputPlanes, 8, 8})); } - auto output = MakeNetwork(scope_, *input_, weights, wdl); + auto output = MakeNetwork(scope_, *input_, weights, wdl, IsMlh()); CHECK(scope_.ok()) << scope_.status().ToString(); - policy_head_ = std::make_unique(output.first); - value_head_ = std::make_unique(output.second); + policy_head_ = std::make_unique(std::get<0>(output)); + value_head_ = std::make_unique(std::get<1>(output)); + moves_left_head_ = std::make_unique(std::get<2>(output)); if (options.Exists("dump-graphdef") || options.Exists("dump-graphdef-txt")) { @@ -386,8 +425,9 @@ TFNetwork::TFNetwork(const WeightsFile& file, const OptionsDict& options, template tensorflow::Status TFNetwork::Compute(tensorflow::Tensor& input, std::vector* outputs) const { - return session_->Run({{*input_, input}}, {*value_head_, *policy_head_}, - outputs); + std::vector fetch_outputs = {*value_head_, *policy_head_}; + if (IsMlh()) fetch_outputs.push_back(*moves_left_head_); + return session_->Run({{*input_, input}}, fetch_outputs, outputs); } template From a371dca9b8c09d63fe1b0a047eb646d7854c6344 Mon Sep 17 00:00:00 2001 From: Ed Lee Date: Tue, 24 Mar 2020 16:02:08 -0700 Subject: [PATCH 070/151] Remember if network has format before making mutable. (#1149) --- src/neural/loader.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/neural/loader.cc b/src/neural/loader.cc index ebccfcbf8a..1a77de88ae 100644 --- a/src/neural/loader.cc +++ b/src/neural/loader.cc @@ -107,15 +107,15 @@ std::string DecompressGzip(const std::string& filename) { void FixOlderWeightsFile(WeightsFile* file) { using nf = pblczero::NetworkFormat; auto network_format = file->format().network_format().network(); - if (file->format().has_network_format() && - network_format != nf::NETWORK_CLASSICAL && + const auto has_network_format = file->format().has_network_format(); + if (has_network_format && network_format != nf::NETWORK_CLASSICAL && network_format != nf::NETWORK_SE) { // Already in a new format, return unchanged. return; } auto* net = file->mutable_format()->mutable_network_format(); - if (!file->format().has_network_format()) { + if (!has_network_format) { // Older protobufs don't have format definition. net->set_input(nf::INPUT_CLASSICAL_112_PLANE); net->set_output(nf::OUTPUT_CLASSICAL); From f757103d91f45247c45d5f76e40806b1467c9143 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Forst=C3=A9n?= Date: Wed, 25 Mar 2020 06:25:00 +0200 Subject: [PATCH 071/151] Moves left head for OpenCL backend (#1146) --- src/neural/opencl/OpenCL.h | 19 ++++++++ src/neural/opencl/OpenCLBuffers.cc | 73 +++++++++++++++++++++++------ src/neural/opencl/OpenCLBuffers.h | 7 ++- src/neural/opencl/network_opencl.cc | 60 ++++++++++++++++++++---- 4 files changed, 135 insertions(+), 24 deletions(-) diff --git a/src/neural/opencl/OpenCL.h b/src/neural/opencl/OpenCL.h index 7f5438ead2..d12cdb3c82 100644 --- a/src/neural/opencl/OpenCL.h +++ b/src/neural/opencl/OpenCL.h @@ -68,6 +68,7 @@ class Layer { bool is_policy{false}; bool is_conv_policy{false}; bool is_value{false}; + bool is_moves_left{false}; std::vector weights; }; @@ -187,6 +188,24 @@ class OpenCL_Network { m_layers[layer].ip_out_size = ip_out; } + void push_moves_left(unsigned int channels, unsigned int outputs, + unsigned int ip_in, unsigned int ip_out, + const std::vector& weights, + const std::vector& biases, + const std::vector& fc_w, + const std::vector& fc_b) { + size_t layer = get_layer_count(); + push_weights(layer, weights); + push_weights(layer, biases); + push_weights(layer, fc_w); + push_weights(layer, fc_b); + m_layers[layer].is_moves_left = true; + m_layers[layer].outputs = outputs; + m_layers[layer].channels = channels; + m_layers[layer].ip_in_size = ip_in; + m_layers[layer].ip_out_size = ip_out; + } + size_t get_layer_count() const { return m_layers.size(); } private: diff --git a/src/neural/opencl/OpenCLBuffers.cc b/src/neural/opencl/OpenCLBuffers.cc index 3ff9ef6425..a12f4a4d6a 100644 --- a/src/neural/opencl/OpenCLBuffers.cc +++ b/src/neural/opencl/OpenCLBuffers.cc @@ -46,13 +46,23 @@ OpenCLBuffers::OpenCLBuffers(const OpenCL_Network& opencl_net) constexpr auto width = 8; constexpr auto height = 8; - auto finalSize_pol = layers[layers.size() - 2].ip_out_size * sizeof(net_t); - auto finalSize_val = layers.back().ip_out_size * sizeof(net_t); + m_finalSize_pol = 0; + m_finalSize_val = 0; + m_finalSize_mov = 0; auto max_channels = unsigned{0}; for (const auto& layer : layers) { max_channels = std::max(max_channels, std::max(layer.channels, layer.outputs)); + if (layer.is_policy || layer.is_conv_policy) { + m_finalSize_pol = layer.ip_out_size * sizeof(net_t); + } + if (layer.is_value) { + m_finalSize_val = layer.ip_out_size * sizeof(net_t); + } + if (layer.is_moves_left) { + m_finalSize_mov = layer.ip_out_size * sizeof(net_t); + } } const auto mwg = m_opencl.m_sgemm_tuners.mwg; @@ -86,16 +96,35 @@ OpenCLBuffers::OpenCLBuffers(const OpenCL_Network& opencl_net) try { m_pinnedOutBuffer_pol = cl::Buffer( m_opencl.m_context, CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, - max_batch_size * finalSize_pol); + max_batch_size * m_finalSize_pol); } catch (const cl::Error& e) { CERR << "Error in m_pinnedOutBuffer_pol: " << e.what() << ": " << e.err() << std::endl; throw; } - m_pinnedOutBuffer_val = - cl::Buffer(m_opencl.m_context, CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, - max_batch_size * finalSize_val); + try { + m_pinnedOutBuffer_val = cl::Buffer( + m_opencl.m_context, CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, + max_batch_size * m_finalSize_val); + } catch (const cl::Error& e) { + CERR << "Error in m_pinnedOutBuffer_val: " << e.what() << ": " << e.err() + << std::endl; + throw; + } + + if (m_finalSize_mov > 0) { + try { + m_pinnedOutBuffer_mov = cl::Buffer( + m_opencl.m_context, CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, + max_batch_size * m_finalSize_mov); + } catch (const cl::Error& e) { + CERR << "Error in m_pinnedOutBuffer_mov: " << e.what() << ": " << e.err() + << std::endl; + throw; + } + } + m_pool_buffer = cl::Buffer(m_opencl.m_context, CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, alloc_pool_size); @@ -104,12 +133,10 @@ OpenCLBuffers::OpenCLBuffers(const OpenCL_Network& opencl_net) void OpenCLBuffers::forward(const std::vector& input, std::vector& output_pol, std::vector& output_val, + std::vector& output_mov, const int batch_size) { auto& layers = m_opencl_net.m_layers; - auto finalSize_pol = layers[layers.size() - 2].ip_out_size * sizeof(net_t); - auto finalSize_val = layers.back().ip_out_size * sizeof(net_t); - const auto inSize = sizeof(net_t) * input.size(); m_commandqueue.enqueueWriteBuffer(m_inBuffer, CL_FALSE, 0, inSize, input.data()); @@ -241,13 +268,15 @@ void OpenCLBuffers::forward(const std::vector& input, layer.outputs * 8 * 8, layer.ip_in_size, layer.ip_out_size); } else { - assert(layer.is_value || layer.is_policy); + assert(layer.is_value || layer.is_policy || layer.is_moves_left); cl::Buffer out_buffer; if (layer.is_policy) { out_buffer = m_pinnedOutBuffer_pol; - } else { + } else if (layer.is_value) { out_buffer = m_pinnedOutBuffer_val; + } else { + out_buffer = m_pinnedOutBuffer_mov; } auto conv_weights = begin(layer.weights); @@ -265,22 +294,36 @@ void OpenCLBuffers::forward(const std::vector& input, auto pinnedOutBufferHost_pol = m_commandqueue.enqueueMapBuffer( m_pinnedOutBuffer_pol, CL_FALSE, CL_MAP_READ, 0, - batch_size * finalSize_pol); + batch_size * m_finalSize_pol); auto pinnedOutBufferHost_val = m_commandqueue.enqueueMapBuffer( m_pinnedOutBuffer_val, CL_FALSE, CL_MAP_READ, 0, - batch_size * finalSize_val); + batch_size * m_finalSize_val); + void* pinnedOutBufferHost_mov; + if (m_finalSize_mov > 0) { + pinnedOutBufferHost_mov = m_commandqueue.enqueueMapBuffer( + m_pinnedOutBuffer_mov, CL_FALSE, CL_MAP_READ, 0, + batch_size * m_finalSize_mov); + } m_commandqueue.finish(); std::memcpy(output_pol.data(), pinnedOutBufferHost_pol, - batch_size * finalSize_pol); + batch_size * m_finalSize_pol); std::memcpy(output_val.data(), pinnedOutBufferHost_val, - batch_size * finalSize_val); + batch_size * m_finalSize_val); + if (m_finalSize_mov > 0) { + std::memcpy(output_mov.data(), pinnedOutBufferHost_mov, + batch_size * m_finalSize_mov); + } m_commandqueue.enqueueUnmapMemObject(m_pinnedOutBuffer_pol, pinnedOutBufferHost_pol); m_commandqueue.enqueueUnmapMemObject(m_pinnedOutBuffer_val, pinnedOutBufferHost_val); + if (m_finalSize_mov > 0) { + m_commandqueue.enqueueUnmapMemObject(m_pinnedOutBuffer_mov, + pinnedOutBufferHost_mov); + } } void OpenCLBuffers::convolve3(int channels, int outputs, cl::Buffer& bufferIn, diff --git a/src/neural/opencl/OpenCLBuffers.h b/src/neural/opencl/OpenCLBuffers.h index 21f9bd43c9..ba1dc5739c 100644 --- a/src/neural/opencl/OpenCLBuffers.h +++ b/src/neural/opencl/OpenCLBuffers.h @@ -49,7 +49,8 @@ class OpenCLBuffers { OpenCLBuffers(const OpenCL_Network& opencl_net); void forward(const std::vector& input, std::vector& output_pol, - std::vector& output_val, const int batch_size); + std::vector& output_val, std::vector& output_mov, + const int batch_size); private: using weight_slice_t = std::vector::const_iterator; @@ -99,4 +100,8 @@ class OpenCLBuffers { cl::Buffer m_pool_buffer; cl::Buffer m_pinnedOutBuffer_pol; cl::Buffer m_pinnedOutBuffer_val; + cl::Buffer m_pinnedOutBuffer_mov; + size_t m_finalSize_pol; + size_t m_finalSize_val; + size_t m_finalSize_mov; }; diff --git a/src/neural/opencl/network_opencl.cc b/src/neural/opencl/network_opencl.cc index 86589d1be2..3c8b9e945e 100644 --- a/src/neural/opencl/network_opencl.cc +++ b/src/neural/opencl/network_opencl.cc @@ -45,24 +45,33 @@ class OpenCLNetwork; struct OpenCLWeights { const std::vector ip2_val_w; const std::vector ip2_val_b; + const std::vector ip2_mov_w; + const std::vector ip2_mov_b; const size_t num_output_policies = 1858; const size_t num_value_channels; + const size_t num_moves_channels; OpenCLWeights(const WeightsFile& file) : ip2_val_w(LayerAdapter(file.weights().ip2_val_w()).as_vector()), ip2_val_b(LayerAdapter(file.weights().ip2_val_b()).as_vector()), - num_value_channels(LayerAdapter(file.weights().ip1_val_b()).size()) {} + ip2_mov_w(LayerAdapter(file.weights().ip2_mov_w()).as_vector()), + ip2_mov_b(LayerAdapter(file.weights().ip2_mov_b()).as_vector()), + num_value_channels(LayerAdapter(file.weights().ip1_val_b()).size()), + num_moves_channels(LayerAdapter(file.weights().ip1_mov_b()).size()) {} }; class OpenCLComputation : public NetworkComputation { public: OpenCLComputation(const OpenCL_Network& opencl_net, - const OpenCLWeights& weights, const bool wdl) + const OpenCLWeights& weights, const bool wdl, + const bool moves_left) : opencl_net_(opencl_net), weights_(weights), policies_(), q_values_(), - wdl_(wdl) { + m_values_(), + wdl_(wdl), + moves_left_(moves_left) { buffers_ = opencl_net.acquire_buffers(); } @@ -82,6 +91,7 @@ class OpenCLComputation : public NetworkComputation { const auto num_output_policies = weights_.num_output_policies; const auto num_value_channels = weights_.num_value_channels; + const auto num_moves_channels = weights_.num_moves_channels; // Typically // input_channels = 112 @@ -90,6 +100,7 @@ class OpenCLComputation : public NetworkComputation { std::vector output_pol(largest_batch_size * num_output_policies); std::vector output_val(largest_batch_size * num_value_channels); + std::vector output_mov(largest_batch_size * num_moves_channels); std::vector input_data(largest_batch_size * kInputPlanes * kSquares); for (size_t i = 0; i < plane_count; i += largest_batch_size) { @@ -98,7 +109,8 @@ class OpenCLComputation : public NetworkComputation { EncodePlanes(planes_[i + j], &input_data[j * kSquares * kInputPlanes]); } - buffers_->forward(input_data, output_pol, output_val, batch_size); + buffers_->forward(input_data, output_pol, output_val, output_mov, + batch_size); for (size_t j = 0; j < batch_size; j++) { std::vector policy(num_output_policies); @@ -135,6 +147,16 @@ class OpenCLComputation : public NetworkComputation { q_values_.emplace_back(std::tanh(winrate)); } + + if (moves_left_) { + auto m = weights_.ip2_mov_b[0]; + auto ptr_weights = weights_.ip2_mov_w.data(); + auto ptr_outputs = &output_mov[j * num_moves_channels]; + for (size_t i = 0; i < num_moves_channels; i++) + m += ptr_weights[i] * std::max(0.0f, ptr_outputs[i]); + + m_values_.emplace_back(std::max(0.0f, m)); + } } } } @@ -162,8 +184,13 @@ class OpenCLComputation : public NetworkComputation { } } - float GetMVal(int /* sample */) const override { - return 0.0f; + float GetMVal(int sample) const override { + if (moves_left_) { + auto d = m_values_[sample]; + return d; + } else { + return 0.0f; + } } // Returns P value @move_id of @sample. @@ -185,9 +212,11 @@ class OpenCLComputation : public NetworkComputation { std::vector> policies_; std::vector q_values_; + std::vector m_values_; std::unique_ptr buffers_; bool wdl_; + bool moves_left_; }; void OpenCLComputation::EncodePlanes(const InputPlanes& sample, float* buffer) { @@ -205,7 +234,7 @@ class OpenCLNetwork : public Network { OpenCLNetwork(const WeightsFile& file, const OptionsDict& options) : capabilities_{file.format().network_format().input(), - pblczero::NetworkFormat::MOVES_LEFT_NONE}, + file.format().network_format().moves_left()}, weights_(file), params_(), opencl_(), @@ -222,6 +251,9 @@ class OpenCLNetwork : public Network { wdl_ = file.format().network_format().output() == pblczero::NetworkFormat::OUTPUT_WDL; + moves_left_ = file.format().network_format().moves_left() == + pblczero::NetworkFormat::MOVES_LEFT_V1; + auto max_batch_size_ = static_cast(options.GetOrDefault("batch_size", 16)); if (max_batch_size_ > kHardMaxBatchSize) { @@ -241,9 +273,11 @@ class OpenCLNetwork : public Network { const auto residual_blocks = weights.residual.size(); const auto num_value_input_planes = weights.value.biases.size(); + const auto num_moves_input_planes = weights.moves_left.biases.size(); const auto num_policy_input_planes = weights.policy.biases.size(); const auto num_output_policy = kPolicyOutputs; const auto num_value_channels = weights.ip1_val_b.size(); + const auto num_moves_channels = weights.ip1_mov_b.size(); // Typically // input_channels = 112 @@ -350,11 +384,20 @@ class OpenCLNetwork : public Network { weights.value.biases, weights.ip1_val_w, weights.ip1_val_b); + if (moves_left_) { + opencl_net_.push_moves_left( + channels, num_moves_input_planes, + num_moves_input_planes * width * height, num_moves_channels, + weights.moves_left.weights, weights.moves_left.biases, + weights.ip1_mov_w, weights.ip1_mov_b); + } + opencl_net_.setMaxMatchSize(max_batch_size_); } std::unique_ptr NewComputation() override { - return std::make_unique(opencl_net_, weights_, wdl_); + return std::make_unique(opencl_net_, weights_, wdl_, + moves_left_); } const NetworkCapabilities& GetCapabilities() const override { @@ -372,6 +415,7 @@ class OpenCLNetwork : public Network { OpenCL opencl_; OpenCL_Network opencl_net_; bool wdl_; + bool moves_left_; }; std::unique_ptr MakeOpenCLNetwork(const WeightsFile& weights, From 012269836870a1d1672e82acad01ec1333ddf2b7 Mon Sep 17 00:00:00 2001 From: gsobala Date: Wed, 25 Mar 2020 19:32:41 +0000 Subject: [PATCH 072/151] Update to Raspi installation instructions (#1147) Tested on virgin Buster distro on a Raspi 4, added installation of ninja --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 2d95514926..654d883988 100644 --- a/README.md +++ b/README.md @@ -168,6 +168,7 @@ cd .. ``` pip3 install meson +pip3 install ninja ``` 3. Install clang From c6642d56e22be3cb9f18951a321b7582f4a4aade Mon Sep 17 00:00:00 2001 From: Ed Lee Date: Thu, 26 Mar 2020 04:18:20 -0700 Subject: [PATCH 073/151] Revert "Restore --noise treating as --noise-epsilon=0.25." (#926) This reverts commit 1753f7c40332121950f818979d9c71bca34ee230. --- src/mcts/params.cc | 11 +---------- src/mcts/params.h | 1 - 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/src/mcts/params.cc b/src/mcts/params.cc index fccff5eac7..f99d3a0893 100644 --- a/src/mcts/params.cc +++ b/src/mcts/params.cc @@ -109,12 +109,6 @@ const OptionId SearchParams::kTemperatureVisitOffsetId{ "Adjusts visits by this value when picking a move with a temperature. If a " "negative offset reduces visits for a particular move below zero, that " "move is not picked. If no moves can be picked, no temperature is used."}; -const OptionId SearchParams::kNoiseId{ - "noise", "DirichletNoise", - "Add Dirichlet noise to root node prior probabilities. This allows the " - "engine to discover new ideas during training by exploring moves which are " - "known to be bad. Not normally used during play.", - 'n'}; const OptionId SearchParams::kNoiseEpsilonId{ "noise-epsilon", "DirichletNoiseEpsilon", "Amount of Dirichlet noise to combine with root priors. This allows the " @@ -265,7 +259,6 @@ void SearchParams::Populate(OptionsParser* options) { options->Add(kTemperatureWinpctCutoffId, 0.0f, 100.0f) = 100.0f; options->Add(kTemperatureVisitOffsetId, -1000.0f, 1000.0f) = 0.0f; - options->Add(kNoiseId) = false; options->Add(kNoiseEpsilonId, 0.0f, 1.0f) = 0.0f; options->Add(kNoiseAlphaId, 0.0f, 10000000.0f) = 0.3f; options->Add(kVerboseStatsId) = false; @@ -331,9 +324,7 @@ SearchParams::SearchParams(const OptionsDict& options) options.Get(options.Get(kRootHasOwnCpuctParamsId.GetId()) ? kCpuctFactorAtRootId.GetId() : kCpuctFactorId.GetId())), - kNoiseEpsilon(options.Get(kNoiseId.GetId()) - ? 0.25f - : options.Get(kNoiseEpsilonId.GetId())), + kNoiseEpsilon(options.Get(kNoiseEpsilonId.GetId())), kNoiseAlpha(options.Get(kNoiseAlphaId.GetId())), kFpuAbsolute(options.Get(kFpuStrategyId.GetId()) == "absolute"), diff --git a/src/mcts/params.h b/src/mcts/params.h index 1566bad472..187bd559d8 100644 --- a/src/mcts/params.h +++ b/src/mcts/params.h @@ -132,7 +132,6 @@ class SearchParams { static const OptionId kTemperatureEndgameId; static const OptionId kTemperatureWinpctCutoffId; static const OptionId kTemperatureVisitOffsetId; - static const OptionId kNoiseId; static const OptionId kNoiseEpsilonId; static const OptionId kNoiseAlphaId; static const OptionId kVerboseStatsId; From b5bedf45d81a8b86f248d0aac80106d92a4113c8 Mon Sep 17 00:00:00 2001 From: borg323 <39573933+borg323@users.noreply.github.com> Date: Thu, 26 Mar 2020 17:27:18 +0200 Subject: [PATCH 074/151] reduce the chance for random circleci breakage (#1154) --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 69f8b8e476..b3aeb45aaa 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -34,7 +34,7 @@ jobs: name: Build GCC command: | cd build-gcc - ninja + ninja -j 4 - run: command: cp build-clang/lc0 /tmp/lc0-clang - run: From 3782285281a74960046e94301900c63206f8adfe Mon Sep 17 00:00:00 2001 From: Alexander Lyashuk Date: Sat, 28 Mar 2020 07:40:31 +0100 Subject: [PATCH 075/151] Get rid of GetId() (#1159) --- src/benchmark/backendbench.cc | 6 +-- src/benchmark/benchmark.cc | 10 ++-- src/engine.cc | 14 +++--- src/mcts/params.cc | 94 ++++++++++++++++------------------- src/mcts/params.h | 34 +++++-------- src/mcts/stoppers/factory.cc | 30 +++++------ src/neural/factory.cc | 15 +++--- src/selfplay/game.cc | 19 ++++--- src/selfplay/loop.cc | 2 +- src/selfplay/tournament.cc | 74 ++++++++++++++------------- src/utils/configfile.cc | 6 +-- src/utils/optionsdict.h | 64 ++++++++++++++++++++++-- src/utils/optionsparser.cc | 8 +-- src/utils/optionsparser.h | 30 ++--------- 14 files changed, 207 insertions(+), 199 deletions(-) diff --git a/src/benchmark/backendbench.cc b/src/benchmark/backendbench.cc index 6f13a11d46..5fb7bbee94 100644 --- a/src/benchmark/backendbench.cc +++ b/src/benchmark/backendbench.cc @@ -62,10 +62,10 @@ void BackendBenchmark::Run() { auto network = NetworkFactory::LoadNetwork(option_dict); NodeTree tree; - tree.ResetToPosition(option_dict.Get(kFenId.GetId()), {}); - const int batches = option_dict.Get(kBatchesId.GetId()); + tree.ResetToPosition(option_dict.Get(kFenId), {}); + const int batches = option_dict.Get(kBatchesId); - for (int i = 1; i <= option_dict.Get(kMaxBatchSizeId.GetId()); i++) { + for (int i = 1; i <= option_dict.Get(kMaxBatchSizeId); i++) { const auto start = std::chrono::steady_clock::now(); // TODO: support threads not equal to 1 to be able to more sensibly test // multiplexing backend. diff --git a/src/benchmark/benchmark.cc b/src/benchmark/benchmark.cc index 53ab526853..eadeab3d0a 100644 --- a/src/benchmark/benchmark.cc +++ b/src/benchmark/benchmark.cc @@ -62,13 +62,13 @@ void Benchmark::Run() { auto network = NetworkFactory::LoadNetwork(option_dict); NodeTree tree; - tree.ResetToPosition(option_dict.Get(kFenId.GetId()), {}); + tree.ResetToPosition(option_dict.Get(kFenId), {}); NNCache cache; - cache.SetCapacity(option_dict.Get(kNNCacheSizeId.GetId())); + cache.SetCapacity(option_dict.Get(kNNCacheSizeId)); - int visits = option_dict.Get(kNodesId.GetId()); - const int movetime = option_dict.Get(kMovetimeId.GetId()); + int visits = option_dict.Get(kNodesId); + const int movetime = option_dict.Get(kMovetimeId); auto stopper = std::make_unique(); if (movetime > -1) { @@ -87,7 +87,7 @@ void Benchmark::Run() { MoveList(), start, std::move(stopper), false, option_dict, &cache, nullptr); - search->StartThreads(option_dict.Get(kThreadsOptionId.GetId())); + search->StartThreads(option_dict.Get(kThreadsOptionId)); search->Wait(); diff --git a/src/engine.cc b/src/engine.cc index 0d2eefccec..05319a6290 100644 --- a/src/engine.cc +++ b/src/engine.cc @@ -105,7 +105,7 @@ void EngineController::UpdateFromUciOptions() { SharedLock lock(busy_mutex_); // Syzygy tablebases. - std::string tb_paths = options_.Get(kSyzygyTablebaseId.GetId()); + std::string tb_paths = options_.Get(kSyzygyTablebaseId); if (!tb_paths.empty() && tb_paths != tb_paths_) { syzygy_tb_ = std::make_unique(); CERR << "Loading Syzygy tablebases from " << tb_paths; @@ -126,7 +126,7 @@ void EngineController::UpdateFromUciOptions() { } // Cache size. - cache_.SetCapacity(options_.Get(kNNCacheSizeId.GetId())); + cache_.SetCapacity(options_.Get(kNNCacheSizeId)); } void EngineController::EnsureReady() { @@ -238,13 +238,13 @@ void EngineController::Go(const GoParams& params) { SetupPosition(ChessBoard::kStartposFen, {}); } - if (!options_.Get(kUciChess960.GetId())) { + if (!options_.Get(kUciChess960)) { // Remap FRC castling to legacy castling. responder = std::make_unique( std::move(responder), tree_->HeadPosition().GetBoard()); } - if (!options_.Get(kShowWDL.GetId())) { + if (!options_.Get(kShowWDL)) { // Strip WDL information from the response. responder = std::make_unique(std::move(responder)); } @@ -259,7 +259,7 @@ void EngineController::Go(const GoParams& params) { LOGFILE << "Timer started at " << FormatTime(SteadyClockToSystemClock(*move_start_time_)); - search_->StartThreads(options_.Get(kThreadsOptionId.GetId())); + search_->StartThreads(options_.Get(kThreadsOptionId)); } void EngineController::PonderHit() { @@ -285,7 +285,7 @@ EngineLoop::EngineLoop() void EngineLoop::RunLoop() { if (!ConfigFile::Init(&options_) || !options_.ProcessAllFlags()) return; Logging::Get().SetFilename( - options_.GetOptionsDict().Get(kLogFileId.GetId())); + options_.GetOptionsDict().Get(kLogFileId)); UciLoop::RunLoop(); } @@ -307,7 +307,7 @@ void EngineLoop::CmdSetOption(const std::string& name, const std::string& value, options_.SetUciOption(name, value, context); // Set the log filename for the case it was set in UCI option. Logging::Get().SetFilename( - options_.GetOptionsDict().Get(kLogFileId.GetId())); + options_.GetOptionsDict().Get(kLogFileId)); } void EngineLoop::CmdUciNewGame() { engine_.NewGame(); } diff --git a/src/mcts/params.cc b/src/mcts/params.cc index f99d3a0893..7c9283db9e 100644 --- a/src/mcts/params.cc +++ b/src/mcts/params.cc @@ -308,62 +308,52 @@ void SearchParams::Populate(OptionsParser* options) { SearchParams::SearchParams(const OptionsDict& options) : options_(options), - kLogitQ(options.Get(kLogitQId.GetId())), - kCpuct(options.Get(kCpuctId.GetId())), - kCpuctAtRoot( - options.Get(options.Get(kRootHasOwnCpuctParamsId.GetId()) - ? kCpuctAtRootId.GetId() - : kCpuctId.GetId())), - kCpuctBase(options.Get(kCpuctBaseId.GetId())), - kCpuctBaseAtRoot( - options.Get(options.Get(kRootHasOwnCpuctParamsId.GetId()) - ? kCpuctBaseAtRootId.GetId() - : kCpuctBaseId.GetId())), - kCpuctFactor(options.Get(kCpuctFactorId.GetId())), - kCpuctFactorAtRoot( - options.Get(options.Get(kRootHasOwnCpuctParamsId.GetId()) - ? kCpuctFactorAtRootId.GetId() - : kCpuctFactorId.GetId())), - kNoiseEpsilon(options.Get(kNoiseEpsilonId.GetId())), - kNoiseAlpha(options.Get(kNoiseAlphaId.GetId())), - kFpuAbsolute(options.Get(kFpuStrategyId.GetId()) == - "absolute"), - kFpuValue(options.Get(kFpuValueId.GetId())), + kLogitQ(options.Get(kLogitQId)), + kCpuct(options.Get(kCpuctId)), + kCpuctAtRoot(options.Get( + options.Get(kRootHasOwnCpuctParamsId) ? kCpuctAtRootId + : kCpuctId)), + kCpuctBase(options.Get(kCpuctBaseId)), + kCpuctBaseAtRoot(options.Get( + options.Get(kRootHasOwnCpuctParamsId) ? kCpuctBaseAtRootId + : kCpuctBaseId)), + kCpuctFactor(options.Get(kCpuctFactorId)), + kCpuctFactorAtRoot(options.Get( + options.Get(kRootHasOwnCpuctParamsId) ? kCpuctFactorAtRootId + : kCpuctFactorId)), + kNoiseEpsilon(options.Get(kNoiseEpsilonId)), + kNoiseAlpha(options.Get(kNoiseAlphaId)), + kFpuAbsolute(options.Get(kFpuStrategyId) == "absolute"), + kFpuValue(options.Get(kFpuValueId)), kFpuAbsoluteAtRoot( - (options.Get(kFpuStrategyAtRootId.GetId()) == "same" && + (options.Get(kFpuStrategyAtRootId) == "same" && kFpuAbsolute) || - options.Get(kFpuStrategyAtRootId.GetId()) == "absolute"), - kFpuValueAtRoot(options.Get(kFpuStrategyAtRootId.GetId()) == - "same" + options.Get(kFpuStrategyAtRootId) == "absolute"), + kFpuValueAtRoot(options.Get(kFpuStrategyAtRootId) == "same" ? kFpuValue - : options.Get(kFpuValueAtRootId.GetId())), - kCacheHistoryLength(options.Get(kCacheHistoryLengthId.GetId())), - kPolicySoftmaxTemp(options.Get(kPolicySoftmaxTempId.GetId())), - kMaxCollisionEvents(options.Get(kMaxCollisionEventsId.GetId())), - kMaxCollisionVisits(options.Get(kMaxCollisionVisitsId.GetId())), - kOutOfOrderEval(options.Get(kOutOfOrderEvalId.GetId())), - kStickyEndgames(options.Get(kStickyEndgamesId.GetId())), - kSyzygyFastPlay(options.Get(kSyzygyFastPlayId.GetId())), - kHistoryFill( - EncodeHistoryFill(options.Get(kHistoryFillId.GetId()))), - kMiniBatchSize(options.Get(kMiniBatchSizeId.GetId())), - kMovesLeftMaxEffect(options.Get(kMovesLeftMaxEffectId.GetId())), - kMovesLeftThreshold(options.Get(kMovesLeftThresholdId.GetId())), - kMovesLeftSlope(options.Get(kMovesLeftSlopeId.GetId())), - kShortSightedness(options.Get(kShortSightednessId.GetId())), - kDisplayCacheUsage(options.Get(kDisplayCacheUsageId.GetId())), - kMaxConcurrentSearchers( - options.Get(kMaxConcurrentSearchersId.GetId())), - kDrawScoreSidetomove{options.Get(kDrawScoreSidetomoveId.GetId()) / - 100.0f}, - kDrawScoreOpponent{options.Get(kDrawScoreOpponentId.GetId()) / - 100.0f}, - kDrawScoreWhite{options.Get(kDrawScoreWhiteId.GetId()) / 100.0f}, - kDrawScoreBlack{options.Get(kDrawScoreBlackId.GetId()) / 100.0f}, + : options.Get(kFpuValueAtRootId)), + kCacheHistoryLength(options.Get(kCacheHistoryLengthId)), + kPolicySoftmaxTemp(options.Get(kPolicySoftmaxTempId)), + kMaxCollisionEvents(options.Get(kMaxCollisionEventsId)), + kMaxCollisionVisits(options.Get(kMaxCollisionVisitsId)), + kOutOfOrderEval(options.Get(kOutOfOrderEvalId)), + kStickyEndgames(options.Get(kStickyEndgamesId)), + kSyzygyFastPlay(options.Get(kSyzygyFastPlayId)), + kHistoryFill(EncodeHistoryFill(options.Get(kHistoryFillId))), + kMiniBatchSize(options.Get(kMiniBatchSizeId)), + kMovesLeftMaxEffect(options.Get(kMovesLeftMaxEffectId)), + kMovesLeftThreshold(options.Get(kMovesLeftThresholdId)), + kMovesLeftSlope(options.Get(kMovesLeftSlopeId)), + kShortSightedness(options.Get(kShortSightednessId)), + kDisplayCacheUsage(options.Get(kDisplayCacheUsageId)), + kMaxConcurrentSearchers(options.Get(kMaxConcurrentSearchersId)), + kDrawScoreSidetomove{options.Get(kDrawScoreSidetomoveId) / 100.0f}, + kDrawScoreOpponent{options.Get(kDrawScoreOpponentId) / 100.0f}, + kDrawScoreWhite{options.Get(kDrawScoreWhiteId) / 100.0f}, + kDrawScoreBlack{options.Get(kDrawScoreBlackId) / 100.0f}, kMaxOutOfOrderEvals(std::max( - 1, - static_cast(options.Get(kMaxOutOfOrderEvalsId.GetId()) * - options.Get(kMiniBatchSizeId.GetId())))) { + 1, static_cast(options.Get(kMaxOutOfOrderEvalsId) * + options.Get(kMiniBatchSizeId)))) { if (std::max(std::abs(kDrawScoreSidetomove), std::abs(kDrawScoreOpponent)) + std::max(std::abs(kDrawScoreWhite), std::abs(kDrawScoreBlack)) > 1.0f) { diff --git a/src/mcts/params.h b/src/mcts/params.h index 187bd559d8..f547e316b6 100644 --- a/src/mcts/params.h +++ b/src/mcts/params.h @@ -44,7 +44,7 @@ class SearchParams { // Parameter getters. int GetMiniBatchSize() const { return kMiniBatchSize; } int GetMaxPrefetchBatch() const { - return options_.Get(kMaxPrefetchBatchId.GetId()); + return options_.Get(kMaxPrefetchBatchId); } bool GetLogitQ() const { return kLogitQ; } float GetCpuct(bool at_root) const { return at_root ? kCpuctAtRoot : kCpuct; } @@ -54,33 +54,25 @@ class SearchParams { float GetCpuctFactor(bool at_root) const { return at_root ? kCpuctFactorAtRoot : kCpuctFactor; } - float GetTemperature() const { - return options_.Get(kTemperatureId.GetId()); - } + float GetTemperature() const { return options_.Get(kTemperatureId); } float GetTemperatureVisitOffset() const { - return options_.Get(kTemperatureVisitOffsetId.GetId()); - } - int GetTempDecayMoves() const { - return options_.Get(kTempDecayMovesId.GetId()); + return options_.Get(kTemperatureVisitOffsetId); } + int GetTempDecayMoves() const { return options_.Get(kTempDecayMovesId); } int GetTemperatureCutoffMove() const { - return options_.Get(kTemperatureCutoffMoveId.GetId()); + return options_.Get(kTemperatureCutoffMoveId); } float GetTemperatureEndgame() const { - return options_.Get(kTemperatureEndgameId.GetId()); + return options_.Get(kTemperatureEndgameId); } float GetTemperatureWinpctCutoff() const { - return options_.Get(kTemperatureWinpctCutoffId.GetId()); + return options_.Get(kTemperatureWinpctCutoffId); } float GetNoiseEpsilon() const { return kNoiseEpsilon; } float GetNoiseAlpha() const { return kNoiseAlpha; } - bool GetVerboseStats() const { - return options_.Get(kVerboseStatsId.GetId()); - } - bool GetLogLiveStats() const { - return options_.Get(kLogLiveStatsId.GetId()); - } + bool GetVerboseStats() const { return options_.Get(kVerboseStatsId); } + bool GetLogLiveStats() const { return options_.Get(kLogLiveStatsId); } bool GetFpuAbsolute(bool at_root) const { return at_root ? kFpuAbsoluteAtRoot : kFpuAbsolute; } @@ -95,12 +87,10 @@ class SearchParams { bool GetOutOfOrderEval() const { return kOutOfOrderEval; } bool GetStickyEndgames() const { return kStickyEndgames; } bool GetSyzygyFastPlay() const { return kSyzygyFastPlay; } - int GetMultiPv() const { return options_.Get(kMultiPvId.GetId()); } - bool GetPerPvCounters() const { - return options_.Get(kPerPvCountersId.GetId()); - } + int GetMultiPv() const { return options_.Get(kMultiPvId); } + bool GetPerPvCounters() const { return options_.Get(kPerPvCountersId); } std::string GetScoreType() const { - return options_.Get(kScoreTypeId.GetId()); + return options_.Get(kScoreTypeId); } FillEmptyHistory GetHistoryFill() const { return kHistoryFill; } float GetMovesLeftMaxEffect() const { return kMovesLeftMaxEffect; } diff --git a/src/mcts/stoppers/factory.cc b/src/mcts/stoppers/factory.cc index 57ab702f5f..994183c118 100644 --- a/src/mcts/stoppers/factory.cc +++ b/src/mcts/stoppers/factory.cc @@ -125,20 +125,17 @@ void PopulateTimeManagementOptions(RunType for_what, OptionsParser* options) { void PopulateIntrinsicStoppers(ChainedSearchStopper* stopper, const OptionsDict& options) { // KLD gain. - const auto min_kld_gain = - options.Get(kMinimumKLDGainPerNodeId.GetId()); + const auto min_kld_gain = options.Get(kMinimumKLDGainPerNodeId); if (min_kld_gain > 0.0f) { stopper->AddStopper(std::make_unique( - min_kld_gain, options.Get(kKLDGainAverageIntervalId.GetId()))); + min_kld_gain, options.Get(kKLDGainAverageIntervalId))); } // Should be last in the chain. - const auto smart_pruning_factor = - options.Get(kSmartPruningFactorId.GetId()); + const auto smart_pruning_factor = options.Get(kSmartPruningFactorId); if (smart_pruning_factor > 0.0f) { stopper->AddStopper(std::make_unique( - smart_pruning_factor, - options.Get(kMinimumSmartPruningBatchesId.GetId()))); + smart_pruning_factor, options.Get(kMinimumSmartPruningBatchesId))); } } @@ -147,11 +144,11 @@ namespace { void PopulateStoppers(ChainedSearchStopper* stopper, const OptionsDict& options, const GoParams& params) { const bool infinite = params.infinite || params.ponder; - const int64_t move_overhead = options.Get(kMoveOverheadId.GetId()); + const int64_t move_overhead = options.Get(kMoveOverheadId); // RAM limit watching stopper. - const auto cache_size_mb = options.Get(kNNCacheSizeId.GetId()); - const int ram_limit = options.Get(kRamLimitMbId.GetId()); + const auto cache_size_mb = options.Get(kNNCacheSizeId); + const int ram_limit = options.Get(kRamLimitMbId); if (ram_limit) { stopper->AddStopper( std::make_unique(cache_size_mb, ram_limit)); @@ -237,16 +234,14 @@ std::unique_ptr LegacyTimeManager::CreateTimeManagementStopper( // If no time limit is given, don't stop on this condition. if (params.infinite || params.ponder || !time) return nullptr; - const int64_t move_overhead = options.Get(kMoveOverheadId.GetId()); + const int64_t move_overhead = options.Get(kMoveOverheadId); const std::optional& inc = is_black ? params.binc : params.winc; const int increment = inc ? std::max(int64_t(0), *inc) : 0; // How to scale moves time. - const float slowmover = options.Get(kSlowMoverId.GetId()); - const float time_curve_midpoint = - options.Get(kTimeMidpointMoveId.GetId()); - const float time_curve_steepness = - options.Get(kTimeSteepnessId.GetId()); + const float slowmover = options.Get(kSlowMoverId); + const float time_curve_midpoint = options.Get(kTimeMidpointMoveId); + const float time_curve_steepness = options.Get(kTimeSteepnessId); float movestogo = ComputeEstimatedMovesToGo( position.GetGamePly(), time_curve_midpoint, time_curve_steepness); @@ -268,8 +263,7 @@ std::unique_ptr LegacyTimeManager::CreateTimeManagementStopper( // of it will be used immediately, remove that from planning. int time_to_squander = 0; if (time_spared_ms_ > 0) { - time_to_squander = - time_spared_ms_ * options.Get(kSpendSavedTimeId.GetId()); + time_to_squander = time_spared_ms_ * options.Get(kSpendSavedTimeId); time_spared_ms_ -= time_to_squander; total_moves_time -= time_to_squander; } diff --git a/src/neural/factory.cc b/src/neural/factory.cc index f8ff3e6f79..b288845aaa 100644 --- a/src/neural/factory.cc +++ b/src/neural/factory.cc @@ -26,9 +26,10 @@ */ #include "neural/factory.h" -#include "neural/loader.h" #include + +#include "neural/loader.h" #include "utils/commandline.h" #include "utils/logging.h" @@ -98,9 +99,9 @@ std::unique_ptr NetworkFactory::Create(const std::string& network, NetworkFactory::BackendConfiguration::BackendConfiguration( const OptionsDict& options) - : weights_path(options.Get(kWeightsId.GetId())), - backend(options.Get(kBackendId.GetId())), - backend_options(options.Get(kBackendOptionsId.GetId())) {} + : weights_path(options.Get(kWeightsId)), + backend(options.Get(kBackendId)), + backend_options(options.Get(kBackendOptionsId)) {} bool NetworkFactory::BackendConfiguration::operator==( const BackendConfiguration& other) const { @@ -110,10 +111,10 @@ bool NetworkFactory::BackendConfiguration::operator==( std::unique_ptr NetworkFactory::LoadNetwork( const OptionsDict& options) { - std::string net_path = options.Get(kWeightsId.GetId()); - const std::string backend = options.Get(kBackendId.GetId()); + std::string net_path = options.Get(kWeightsId); + const std::string backend = options.Get(kBackendId); const std::string backend_options = - options.Get(kBackendOptionsId.GetId()); + options.Get(kBackendOptionsId); if (net_path == kAutoDiscover) { net_path = DiscoverWeightsFile(); diff --git a/src/selfplay/game.cc b/src/selfplay/game.cc index 641bfcd689..cb8ff88ffa 100644 --- a/src/selfplay/game.cc +++ b/src/selfplay/game.cc @@ -71,8 +71,8 @@ void SelfPlayGame::PopulateUciParams(OptionsParser* options) { SelfPlayGame::SelfPlayGame(PlayerOptions player1, PlayerOptions player2, bool shared_tree, const MoveList& opening) : options_{player1, player2}, - chess960_{player1.uci_options->Get(kUciChess960.GetId()) || - player2.uci_options->Get(kUciChess960.GetId())} { + chess960_{player1.uci_options->Get(kUciChess960) || + player2.uci_options->Get(kUciChess960)} { tree_[0] = std::make_shared(); tree_[0]->ResetToPosition(ChessBoard::kStartposFen, {}); @@ -101,7 +101,7 @@ void SelfPlayGame::Play(int white_threads, int black_threads, bool training, // Initialize search. const int idx = blacks_move ? 1 : 0; - if (!options_[idx].uci_options->Get(kReuseTreeId.GetId())) { + if (!options_[idx].uci_options->Get(kReuseTreeId)) { tree_[idx]->TrimTreeAtHead(); } @@ -161,11 +161,10 @@ void SelfPlayGame::Play(int white_threads, int black_threads, bool training, max_eval_[1] = std::max(max_eval_[1], best_d); max_eval_[2] = std::max(max_eval_[2], blacks_move ? best_w : best_l); if (enable_resign && move_number >= options_[idx].uci_options->Get( - kResignEarliestMoveId.GetId())) { + kResignEarliestMoveId)) { const float resignpct = - options_[idx].uci_options->Get(kResignPercentageId.GetId()) / - 100; - if (options_[idx].uci_options->Get(kResignWDLStyleId.GetId())) { + options_[idx].uci_options->Get(kResignPercentageId) / 100; + if (options_[idx].uci_options->Get(kResignWDLStyleId)) { auto threshold = 1.0f - resignpct; if (best_w > threshold) { game_result_ = @@ -206,8 +205,8 @@ void SelfPlayGame::Play(int white_threads, int black_threads, bool training, // If 'best move' is less than allowed visits and not max visits, // discard it and try again. if (cur_n == max_n || - static_cast(cur_n) >= options_[idx].uci_options->Get( - kMinimumAllowedVistsId.GetId())) { + static_cast(cur_n) >= + options_[idx].uci_options->Get(kMinimumAllowedVistsId)) { break; } PositionHistory history_copy = tree_[idx]->GetPositionHistory(); @@ -254,7 +253,7 @@ std::vector SelfPlayGame::GetMoves() const { float SelfPlayGame::GetWorstEvalForWinnerOrDraw() const { // TODO: This assumes both players have the same resign style. // Supporting otherwise involves mixing the meaning of worst. - if (options_[0].uci_options->Get(kResignWDLStyleId.GetId())) { + if (options_[0].uci_options->Get(kResignWDLStyleId)) { if (game_result_ == GameResult::WHITE_WON) { return std::max(max_eval_[1], max_eval_[2]); } else if (game_result_ == GameResult::BLACK_WON) { diff --git a/src/selfplay/loop.cc b/src/selfplay/loop.cc index 98f28b7fe8..86587fa683 100644 --- a/src/selfplay/loop.cc +++ b/src/selfplay/loop.cc @@ -52,7 +52,7 @@ void SelfPlayLoop::RunLoop() { options_.Add(kInteractiveId) = false; if (!options_.ProcessAllFlags()) return; - if (options_.GetOptionsDict().Get(kInteractiveId.GetId())) { + if (options_.GetOptionsDict().Get(kInteractiveId)) { UciLoop::RunLoop(); } else { // Send id before starting tournament to allow wrapping client to know diff --git a/src/selfplay/tournament.cc b/src/selfplay/tournament.cc index 2c603800f2..23ae3fad14 100644 --- a/src/selfplay/tournament.cc +++ b/src/selfplay/tournament.cc @@ -111,23 +111,22 @@ void SelfPlayTournament::PopulateOptions(OptionsParser* options) { SelfPlayGame::PopulateUciParams(options); auto defaults = options->GetMutableDefaultsOptions(); - defaults->Set(SearchParams::kMiniBatchSizeId.GetId(), 32); - defaults->Set(SearchParams::kCpuctId.GetId(), 1.2f); - defaults->Set(SearchParams::kCpuctFactorId.GetId(), 0.0f); - defaults->Set(SearchParams::kPolicySoftmaxTempId.GetId(), 1.0f); - defaults->Set(SearchParams::kMaxCollisionVisitsId.GetId(), 1); - defaults->Set(SearchParams::kMaxCollisionEventsId.GetId(), 1); - defaults->Set(SearchParams::kCacheHistoryLengthId.GetId(), 7); - defaults->Set(SearchParams::kOutOfOrderEvalId.GetId(), false); - defaults->Set(SearchParams::kTemperatureId.GetId(), 1.0f); - defaults->Set(SearchParams::kNoiseEpsilonId.GetId(), 0.25f); - defaults->Set(SearchParams::kFpuValueId.GetId(), 0.0f); - defaults->Set(SearchParams::kHistoryFillId.GetId(), "no"); - defaults->Set(NetworkFactory::kBackendId.GetId(), - "multiplexing"); - defaults->Set(SearchParams::kStickyEndgamesId.GetId(), false); - defaults->Set(SearchParams::kLogitQId.GetId(), false); - defaults->Set(SearchParams::kRootHasOwnCpuctParamsId.GetId(), false); + defaults->Set(SearchParams::kMiniBatchSizeId, 32); + defaults->Set(SearchParams::kCpuctId, 1.2f); + defaults->Set(SearchParams::kCpuctFactorId, 0.0f); + defaults->Set(SearchParams::kPolicySoftmaxTempId, 1.0f); + defaults->Set(SearchParams::kMaxCollisionVisitsId, 1); + defaults->Set(SearchParams::kMaxCollisionEventsId, 1); + defaults->Set(SearchParams::kCacheHistoryLengthId, 7); + defaults->Set(SearchParams::kOutOfOrderEvalId, false); + defaults->Set(SearchParams::kTemperatureId, 1.0f); + defaults->Set(SearchParams::kNoiseEpsilonId, 0.25f); + defaults->Set(SearchParams::kFpuValueId, 0.0f); + defaults->Set(SearchParams::kHistoryFillId, "no"); + defaults->Set(NetworkFactory::kBackendId, "multiplexing"); + defaults->Set(SearchParams::kStickyEndgamesId, false); + defaults->Set(SearchParams::kLogitQId, false); + defaults->Set(SearchParams::kRootHasOwnCpuctParamsId, false); } SelfPlayTournament::SelfPlayTournament( @@ -142,22 +141,21 @@ SelfPlayTournament::SelfPlayTournament( game_callback_(game_info), tournament_callback_(tournament_info), kThreads{ - options.GetSubdict("player1").Get(kThreadsId.GetId()), - options.GetSubdict("player2").Get(kThreadsId.GetId()), + options.GetSubdict("player1").Get(kThreadsId), + options.GetSubdict("player2").Get(kThreadsId), }, - kTotalGames(options.Get(kTotalGamesId.GetId())), - kShareTree(options.Get(kShareTreesId.GetId())), - kParallelism(options.Get(kParallelGamesId.GetId())), - kTraining(options.Get(kTrainingId.GetId())), - kResignPlaythrough(options.Get(kResignPlaythroughId.GetId())), - kDiscardedStartChance( - options.Get(kDiscardedStartChanceId.GetId())) { - std::string book = options.Get(kOpeningsFileId.GetId()); + kTotalGames(options.Get(kTotalGamesId)), + kShareTree(options.Get(kShareTreesId)), + kParallelism(options.Get(kParallelGamesId)), + kTraining(options.Get(kTrainingId)), + kResignPlaythrough(options.Get(kResignPlaythroughId)), + kDiscardedStartChance(options.Get(kDiscardedStartChanceId)) { + std::string book = options.Get(kOpeningsFileId); if (!book.empty()) { PgnReader book_reader; book_reader.AddPgnFile(book); openings_ = book_reader.ReleaseGames(); - if (options.Get(kOpeningsModeId.GetId()) == "shuffled") { + if (options.Get(kOpeningsModeId) == "shuffled") { Random::Get().Shuffle(openings_.begin(), openings_.end()); } } @@ -178,22 +176,22 @@ SelfPlayTournament::SelfPlayTournament( // Initializing cache. cache_[0] = std::make_shared( - options.GetSubdict("player1").Get(kNNCacheSizeId.GetId())); + options.GetSubdict("player1").Get(kNNCacheSizeId)); if (kShareTree) { cache_[1] = cache_[0]; } else { cache_[1] = std::make_shared( - options.GetSubdict("player2").Get(kNNCacheSizeId.GetId())); + options.GetSubdict("player2").Get(kNNCacheSizeId)); } // SearchLimits. for (int idx : {0, 1}) { search_limits_[idx].playouts = - options.GetSubdict(kPlayerNames[idx]).Get(kPlayoutsId.GetId()); + options.GetSubdict(kPlayerNames[idx]).Get(kPlayoutsId); search_limits_[idx].visits = - options.GetSubdict(kPlayerNames[idx]).Get(kVisitsId.GetId()); + options.GetSubdict(kPlayerNames[idx]).Get(kVisitsId); search_limits_[idx].movetime = - options.GetSubdict(kPlayerNames[idx]).Get(kTimeMsId.GetId()); + options.GetSubdict(kPlayerNames[idx]).Get(kTimeMsId); if (search_limits_[idx].playouts == -1 && search_limits_[idx].visits == -1 && @@ -212,9 +210,9 @@ void SelfPlayTournament::PlayOneGame(int game_number) { Mutex::Lock lock(mutex_); player1_black = ((game_number % 2) == 1) != first_game_black_; if (!openings_.empty()) { - if (player_options_[0].Get(kOpeningsMirroredId.GetId())) { + if (player_options_[0].Get(kOpeningsMirroredId)) { opening = openings_[(game_number / 2) % openings_.size()]; - } else if (player_options_[0].Get(kOpeningsModeId.GetId()) == + } else if (player_options_[0].Get(kOpeningsModeId) == "random") { opening = openings_[Random::Get().GetInt(0, openings_.size() - 1)]; } else { @@ -238,9 +236,9 @@ void SelfPlayTournament::PlayOneGame(int game_number) { std::vector last_thinking_info; for (int pl_idx : {0, 1}) { const bool verbose_thinking = - player_options_[pl_idx].Get(kVerboseThinkingId.GetId()); + player_options_[pl_idx].Get(kVerboseThinkingId); const bool move_thinking = - player_options_[pl_idx].Get(kMoveThinkingId.GetId()); + player_options_[pl_idx].Get(kMoveThinkingId); // Populate per-player options. PlayerOptions& opt = options[color_idx[pl_idx]]; opt.network = networks_[pl_idx].get(); @@ -370,7 +368,7 @@ void SelfPlayTournament::Worker() { { Mutex::Lock lock(mutex_); if (abort_) break; - bool mirrored = player_options_[0].Get(kOpeningsMirroredId.GetId()); + bool mirrored = player_options_[0].Get(kOpeningsMirroredId); if ((kTotalGames >= 0 && games_count_ >= kTotalGames) || (kTotalGames == -2 && !openings_.empty() && games_count_ >= diff --git a/src/utils/configfile.cc b/src/utils/configfile.cc index e973914df6..4ab0df3af5 100644 --- a/src/utils/configfile.cc +++ b/src/utils/configfile.cc @@ -25,11 +25,12 @@ Program grant you additional permission to convey the resulting work. */ +#include "utils/configfile.h" + #include #include #include "utils/commandline.h" -#include "utils/configfile.h" #include "utils/logging.h" #include "utils/optionsparser.h" #include "utils/string.h" @@ -101,8 +102,7 @@ bool ConfigFile::ParseFile(const std::string& filename, // Check to see if we are using the default config file or not. OptionsDict dict = options->GetOptionsDict(); - const bool using_default_config = - dict.IsDefault(kConfigFileId.GetId()); + const bool using_default_config = dict.IsDefault(kConfigFileId); if (!input.is_open()) { // It is okay if we cannot open the default file since it is normal diff --git a/src/utils/optionsdict.h b/src/utils/optionsdict.h index 0d2c763ae2..d409d6ff28 100644 --- a/src/utils/optionsdict.h +++ b/src/utils/optionsdict.h @@ -1,6 +1,6 @@ /* This file is part of Leela Chess Zero. - Copyright (C) 2018 The LCZero Authors + Copyright (C) 2018-2020 The LCZero Authors Leela Chess is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -31,6 +31,7 @@ #include #include #include + #include "utils/exception.h" namespace lczero { @@ -69,39 +70,67 @@ class TypeDict { } }; +struct OptionId { + OptionId(const char* long_flag, const char* uci_option, const char* help_text, + const char short_flag = '\0') + : long_flag(long_flag), + uci_option(uci_option), + help_text(help_text), + short_flag(short_flag) {} + + OptionId(const OptionId& other) = delete; + bool operator==(const OptionId& other) const { return this == &other; } + + const char* const long_flag; + const char* const uci_option; + const char* const help_text; + const char short_flag; +}; + class OptionsDict : TypeDict, TypeDict, TypeDict, TypeDict { public: - OptionsDict(const OptionsDict* parent = nullptr) : parent_(parent) {} // e.g. dict.Get("threads") // Returns value of given type. Throws exception if not found. template T Get(const std::string& key) const; + template + T Get(const OptionId& option_id) const; // Checks whether the given key exists for given type. template bool Exists(const std::string& key) const; + template + bool Exists(const OptionId& option_id) const; // Returns value of given type. Returns default if not found. template T GetOrDefault(const std::string& key, const T& default_val) const; + template + T GetOrDefault(const OptionId& option_id, const T& default_val) const; // Sets value for a given type. template void Set(const std::string& key, const T& value); + template + void Set(const OptionId& option_id, const T& value); // Get reference to assign value to. template T& GetRef(const std::string& key); + template + T& GetRef(const OptionId& option_id); // Returns true when the value is not set anywhere maybe except the root // dictionary; template bool IsDefault(const std::string& key) const; + template + bool IsDefault(const OptionId& option_id) const; // Returns subdictionary. Throws exception if doesn't exist. const OptionsDict& GetSubdict(const std::string& name) const; @@ -118,7 +147,7 @@ class OptionsDict : TypeDict, // Creates options dict from string. Example of a string: // option1=1, option_two = "string val", subdict(option3=3.14) // - // the sub dictionary is containing a parent pointer refering + // the sub dictionary is containing a parent pointer refering // back to this object. You need to ensure, that this object // is still in scope, when the parent pointer is used void AddSubdictFromString(const std::string& str); @@ -130,6 +159,10 @@ class OptionsDict : TypeDict, bool HasSubdict(const std::string& name) const; private: + static std::string GetOptionId(const OptionId& option_id) { + return std::to_string(reinterpret_cast(&option_id)); + } + const OptionsDict* parent_ = nullptr; std::map subdicts_; }; @@ -144,6 +177,10 @@ T OptionsDict::Get(const std::string& key) const { if (parent_) return parent_->Get(key); throw Exception("Key [" + key + "] was not set in options."); } +template +T OptionsDict::Get(const OptionId& option_id) const { + return Get(GetOptionId(option_id)); +} template bool OptionsDict::Exists(const std::string& key) const { @@ -153,6 +190,10 @@ bool OptionsDict::Exists(const std::string& key) const { if (!parent_) return false; return parent_->Exists(key); } +template +bool OptionsDict::Exists(const OptionId& option_id) const { + return Exists(GetOptionId(option_id)); +} template T OptionsDict::GetOrDefault(const std::string& key, @@ -165,16 +206,29 @@ T OptionsDict::GetOrDefault(const std::string& key, if (parent_) return parent_->GetOrDefault(key, default_val); return default_val; } +template +T OptionsDict::GetOrDefault(const OptionId& option_id, + const T& default_val) const { + return GetOrDefault(GetOptionId(option_id), default_val); +} template void OptionsDict::Set(const std::string& key, const T& value) { TypeDict::dict_[key].Set(value); } +template +void OptionsDict::Set(const OptionId& option_id, const T& value) { + Set(GetOptionId(option_id), value); +} template T& OptionsDict::GetRef(const std::string& key) { return TypeDict::dict_[key].Get(); } +template +T& OptionsDict::GetRef(const OptionId& option_id) { + return GetRef(GetOptionId(option_id)); +} template bool OptionsDict::IsDefault(const std::string& key) const { @@ -183,5 +237,9 @@ bool OptionsDict::IsDefault(const std::string& key) const { if (dict.find(key) != dict.end()) return false; return parent_->IsDefault(key); } +template +bool OptionsDict::IsDefault(const OptionId& option_id) const { + return IsDefault(GetOptionId(option_id)); +} } // namespace lczero diff --git a/src/utils/optionsparser.cc b/src/utils/optionsparser.cc index 9ed7a8c148..c40bfa9e73 100644 --- a/src/utils/optionsparser.cc +++ b/src/utils/optionsparser.cc @@ -30,6 +30,7 @@ #include #include #include + #include "utils/commandline.h" #include "utils/configfile.h" #include "utils/logging.h" @@ -69,7 +70,7 @@ void OptionsParser::SetUciOption(const std::string& name, } void OptionsParser::HideOption(const OptionId& id) { - const auto option = FindOptionById(id.GetId()); + const auto option = FindOptionById(id); if (option) option->hidden_ = true; } @@ -90,10 +91,9 @@ OptionsParser::Option* OptionsParser::FindOptionByUciName( return nullptr; } -OptionsParser::Option* OptionsParser::FindOptionById( - const std::string& name) const { +OptionsParser::Option* OptionsParser::FindOptionById(const OptionId& id) const { for (const auto& val : options_) { - if (name == val->GetId()) return val.get(); + if (id == val->GetId()) return val.get(); } return nullptr; } diff --git a/src/utils/optionsparser.h b/src/utils/optionsparser.h index 3671bec626..0c718245ae 100644 --- a/src/utils/optionsparser.h +++ b/src/utils/optionsparser.h @@ -1,6 +1,6 @@ /* This file is part of Leela Chess Zero. - Copyright (C) 2018 The LCZero Authors + Copyright (C) 2018-2020 The LCZero Authors Leela Chess is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -31,34 +31,12 @@ #include #include #include + #include "utils/exception.h" #include "utils/optionsdict.h" namespace lczero { -struct OptionId { - OptionId(const char* long_flag = "", const char* uci_option = "", - const char* help_text = "", const char short_flag = '\0') - : long_flag(long_flag), - uci_option(uci_option), - help_text(help_text), - short_flag(short_flag) {} - - OptionId(const OptionId& other) = delete; - - const char* const long_flag; - const char* const uci_option; - const char* const help_text; - const char short_flag; - - // Returns Option's own address as string. - // TODO(crem) Generalize OptionsDict to have a version which has OptionId* - // as keys instead of std::string. - std::string GetId() const { - return std::to_string(reinterpret_cast(this)); - } -}; - class OptionsParser { public: OptionsParser(); @@ -71,7 +49,7 @@ class OptionsParser { virtual void SetValue(const std::string& value, OptionsDict* dict) = 0; protected: - std::string GetId() const { return id_.GetId(); } + const OptionId& GetId() const { return id_; } std::string GetUciOption() const { return id_.uci_option; } std::string GetHelpText() const { return id_.help_text; } std::string GetLongFlag() const { return id_.long_flag; } @@ -146,7 +124,7 @@ class OptionsParser { // Returns an option based by its uci name. Option* FindOptionByUciName(const std::string& name) const; // Returns an option based by its id. - Option* FindOptionById(const std::string& name) const; + Option* FindOptionById(const OptionId& id) const; std::vector> options_; OptionsDict defaults_; From d80da0f884ade8cd7006368978003cd4901803f5 Mon Sep 17 00:00:00 2001 From: Alexander Lyashuk Date: Sat, 28 Mar 2020 19:41:08 +0100 Subject: [PATCH 076/151] Fixes #1106 (#1164) --- src/mcts/node.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mcts/node.cc b/src/mcts/node.cc index 08755aec98..8f86405d2a 100644 --- a/src/mcts/node.cc +++ b/src/mcts/node.cc @@ -317,6 +317,7 @@ void Node::ReleaseChildrenExceptOne(Node* node_to_save) { // Make saved node the only child. (kills previous siblings). gNodeGc.AddToGcQueue(std::move(child_)); child_ = std::move(saved_node); + edges_ = EdgeList(); // Clear edges list. } namespace { From eaed30fb8b8523bc9cfa771700bd81567b692b23 Mon Sep 17 00:00:00 2001 From: Alexander Lyashuk Date: Sat, 28 Mar 2020 19:41:31 +0100 Subject: [PATCH 077/151] Detect C++17 and show error if compiler doesn't support it. (#1165) --- meson.build | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/meson.build b/meson.build index c51bc88c74..31b55c11a1 100644 --- a/meson.build +++ b/meson.build @@ -19,6 +19,13 @@ project('lc0', 'cpp', meson_version: '>=0.52') cc = meson.get_compiler('cpp') + +if not cc.has_header('optional') or not cc.has_header('string_view') + error('Lc0 requires a compiler supporting C++17, for example g++ v7.0, ' + + 'clang v4.0 or later (with C++17 stdlib) and Visual Studio 2017 or ' + + 'later.') +endif + if cc.get_id() == 'clang' # Thread safety annotation add_project_arguments('-Wthread-safety', language : 'cpp') From 51fadc34ac6d97fa28a52e822077da22c2d30935 Mon Sep 17 00:00:00 2001 From: Tilps Date: Sun, 29 Mar 2020 15:50:55 +1100 Subject: [PATCH 078/151] Add extra params for passing defaults for edge functions that really should consider it. (#1151) * Add extra params for passing defaults for edge functions that really should consider it. * Fix additional issues. * Compile fix. * Review feedback. --- src/mcts/node.cc | 5 +++-- src/mcts/node.h | 8 +++++--- src/mcts/search.cc | 41 +++++++++++++++++++++++++++++++---------- 3 files changed, 39 insertions(+), 15 deletions(-) diff --git a/src/mcts/node.cc b/src/mcts/node.cc index 8f86405d2a..ee7ee9cfb9 100644 --- a/src/mcts/node.cc +++ b/src/mcts/node.cc @@ -245,8 +245,9 @@ void Node::MakeNotTerminal() { if (n > 0) { n_ += n; // Flip Q for opponent. - wl_ += -child.GetWL() * n; - d_ += child.GetD() * n; + // Default values don't matter as n is > 0. + wl_ += -child.GetWL(0.0f) * n; + d_ += child.GetD(0.0f) * n; } } diff --git a/src/mcts/node.h b/src/mcts/node.h index d01fe072b8..99f9b3fccb 100644 --- a/src/mcts/node.h +++ b/src/mcts/node.h @@ -355,9 +355,11 @@ class EdgeAndNode { : node_->GetQ(draw_score)) : default_q; } - float GetWL() const { return node_ ? node_->GetWL() : 0.0f; } - float GetD() const { - return (node_ && node_->GetN() > 0) ? node_->GetD() : 0.0f; + float GetWL(float default_wl) const { + return node_ ? node_->GetWL() : default_wl; + } + float GetD(float default_d) const { + return (node_ && node_->GetN() > 0) ? node_->GetD() : default_d; } float GetM(float default_m) const { return (node_ && node_->GetN() > 0) ? node_->GetM() : default_m; diff --git a/src/mcts/search.cc b/src/mcts/search.cc index cc2c79d165..d9319c4677 100644 --- a/src/mcts/search.cc +++ b/src/mcts/search.cc @@ -129,13 +129,15 @@ void Search::SendUciInfo() REQUIRES(nodes_mutex_) { common_info.tb_hits = tb_hits_.load(std::memory_order_acquire); int multipv = 0; - const auto default_q = -root_node_->GetWL(); + const auto default_q = -root_node_->GetQ(-draw_score); + const auto default_wl = -root_node_->GetWL(); + const auto default_d = root_node_->GetD(); for (const auto& edge : edges) { ++multipv; uci_infos.emplace_back(common_info); auto& uci_info = uci_infos.back(); - const auto wl = edge.GetWL(); - const auto d = edge.GetD(); + const auto wl = edge.GetWL(default_wl); + const auto d = edge.GetD(default_d); const int w = static_cast(std::round(500.0 * (1.0 + wl - d))); const auto q = edge.GetQ(default_q, draw_score, /* logit_q= */ false); if (edge.IsTerminal() && wl != 0.0f) { @@ -279,12 +281,15 @@ std::vector Search::GetVerboseStats(Node* node) const { oss << "(P: " << std::setw(5) << std::setprecision(2) << edge.GetP() * 100 << "%) "; - oss << "(WL: " << std::setw(8) << std::setprecision(5) << edge.GetWL() + // Default value here assumes user knows to ignore this field when N is 0. + oss << "(WL: " << std::setw(8) << std::setprecision(5) << edge.GetWL(0.0f) << ") "; - oss << "(D: " << std::setw(6) << std::setprecision(3) << edge.GetD() + // Default value here assumes user knows to ignore this field when N is 0. + oss << "(D: " << std::setw(6) << std::setprecision(3) << edge.GetD(0.0f) << ") "; + // Default value here assumes user knows to ignore this field when N is 0. oss << "(M: " << std::setw(4) << std::setprecision(1) << edge.GetM(0.0f) << ") "; @@ -406,7 +411,7 @@ Search::BestEval Search::GetBestEval() const { float parent_m = root_node_->GetM(); if (!root_node_->HasChildren()) return {parent_wl, parent_d, parent_m}; EdgeAndNode best_edge = GetBestChildNoTemperature(root_node_); - return {best_edge.GetWL(), best_edge.GetD(), + return {best_edge.GetWL(parent_wl), best_edge.GetD(parent_d), best_edge.GetM(parent_m - 1) + 1}; } @@ -486,6 +491,11 @@ std::vector Search::GetBestChildrenNoTemperature(Node* parent, if (parent == root_node_) { PopulateRootMoveLimit(&root_limit); } + // Assume this function is only ever called with root or immediate child of + // root to avoid traversing to get depth. + assert(parent == root_node_ || parent->GetParent() == root_node_); + const bool is_odd_depth = parent != root_node_; + const float draw_score = GetDrawScore(is_odd_depth); // Best child is selected using the following criteria: // * Prefer shorter terminal wins / avoid shorter terminal losses. // * Largest number of playouts. @@ -505,7 +515,8 @@ std::vector Search::GetBestChildrenNoTemperature(Node* parent, ? edges.begin() + count : edges.end(); std::partial_sort( - edges.begin(), middle, edges.end(), [](const auto& a, const auto& b) { + edges.begin(), middle, edges.end(), + [draw_score](const auto& a, const auto& b) { // The function returns "true" when a is preferred to b. // Lists edge types from less desirable to more desirable. @@ -518,7 +529,9 @@ std::vector Search::GetBestChildrenNoTemperature(Node* parent, }; auto GetEdgeRank = [](const EdgeAndNode& edge) { - const auto wl = edge.GetWL(); + // This default isn't used as wl only checked for case edge is + // terminal. + const auto wl = edge.GetWL(0.0f); if (!edge.IsTerminal() || !wl) return kNonTerminal; if (edge.IsTbTerminal()) { return wl < 0.0 ? kTablebaseLoss : kTablebaseWin; @@ -545,7 +558,14 @@ std::vector Search::GetBestChildrenNoTemperature(Node* parent, if (a_rank == kNonTerminal) { // Prefer largest playouts then eval then prior. if (a.GetN() != b.GetN()) return a.GetN() > b.GetN(); - if (a.GetWL() != b.GetWL()) return a.GetWL() > b.GetWL(); + // Default doesn't matter here so long as they are the same as either + // both are N==0 (thus we're comparing equal defaults) or N!=0 and + // default isn't used. + if (a.GetQ(0.0f, draw_score, false) != + b.GetQ(0.0f, draw_score, false)) { + return a.GetQ(0.0f, draw_score, false) > + b.GetQ(0.0f, draw_score, false); + } return a.GetP() > b.GetP(); } @@ -1404,7 +1424,8 @@ void SearchWorker::DoBackupUpdateSingleNode( float losing_m = 0.0f; if (can_convert && v <= 0.0f) { for (const auto& edge : p->Edges()) { - const auto WL = edge.GetWL(); + // Default_wl doesn't matter as WL is only used if IsTerminal is true. + const auto WL = edge.GetWL(0.0f); can_convert = can_convert && edge.IsTerminal() && WL <= 0.0f; if (!can_convert) break; all_losing = all_losing && WL < 0.0f; From 2544956580c7087f4c6fdb871c3bafa399c30c57 Mon Sep 17 00:00:00 2001 From: Tilps Date: Sun, 29 Mar 2020 17:03:41 +1100 Subject: [PATCH 079/151] Remove selfplay override for root has own cpuct params (#1155) * Remove selfplay override for root has own cpuct params no longer needed as its always sent by training server. * Review feedback. --- src/mcts/params.cc | 2 ++ src/selfplay/tournament.cc | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mcts/params.cc b/src/mcts/params.cc index 7c9283db9e..e2cc22cd4c 100644 --- a/src/mcts/params.cc +++ b/src/mcts/params.cc @@ -77,6 +77,8 @@ const OptionId SearchParams::kCpuctFactorId{ const OptionId SearchParams::kCpuctFactorAtRootId{ "cpuct-factor-at-root", "CPuctFactorAtRoot", "Multiplier for the cpuct growth formula at root."}; +// Remove this option after 0.25 has been made mandatory in training and the +// training server stops sending it. const OptionId SearchParams::kRootHasOwnCpuctParamsId{ "root-has-own-cpuct-params", "RootHasOwnCpuctParams", "If enabled, cpuct parameters for root node are taken from *AtRoot " diff --git a/src/selfplay/tournament.cc b/src/selfplay/tournament.cc index 23ae3fad14..d54988061e 100644 --- a/src/selfplay/tournament.cc +++ b/src/selfplay/tournament.cc @@ -126,7 +126,6 @@ void SelfPlayTournament::PopulateOptions(OptionsParser* options) { defaults->Set(NetworkFactory::kBackendId, "multiplexing"); defaults->Set(SearchParams::kStickyEndgamesId, false); defaults->Set(SearchParams::kLogitQId, false); - defaults->Set(SearchParams::kRootHasOwnCpuctParamsId, false); } SelfPlayTournament::SelfPlayTournament( From 6194bdd705b1f61a6d38cad4347772db44a6ad69 Mon Sep 17 00:00:00 2001 From: Tilps Date: Sun, 29 Mar 2020 22:46:19 +1100 Subject: [PATCH 080/151] Fix and simplify GetVisitsToReachU. (#1167) * Fix and simplify GetVisitsToReachU. Was wrong in the case that M is non-zero for best. * Minor tweak for consistency. --- src/mcts/node.h | 17 +++++++++-------- src/mcts/search.cc | 6 ++++-- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/src/mcts/node.h b/src/mcts/node.h index 99f9b3fccb..54650e4f1c 100644 --- a/src/mcts/node.h +++ b/src/mcts/node.h @@ -385,15 +385,16 @@ class EdgeAndNode { return numerator * GetP() / (1 + GetNStarted()); } - int GetVisitsToReachU(float target_score, float numerator, float default_q, - float draw_score, bool logit_q) const { - const auto q = GetQ(default_q, draw_score, logit_q); - if (q >= target_score) return std::numeric_limits::max(); + int GetVisitsToReachU(float target_score, float numerator, + float score_without_u) const { + if (score_without_u >= target_score) return std::numeric_limits::max(); const auto n1 = GetNStarted() + 1; - return std::max( - 1.0f, - std::min(std::floor(GetP() * numerator / (target_score - q) - n1) + 1, - 1e9f)); + return std::max(1.0f, + std::min(std::floor(GetP() * numerator / + (target_score - score_without_u) - + n1) + + 1, + 1e9f)); } std::string DebugString() const; diff --git a/src/mcts/search.cc b/src/mcts/search.cc index d9319c4677..4b00da8192 100644 --- a/src/mcts/search.cc +++ b/src/mcts/search.cc @@ -996,6 +996,7 @@ SearchWorker::NodeToProcess SearchWorker::PickNodeToExtend( const float puct_mult = cpuct * std::sqrt(std::max(node->GetChildrenVisits(), 1u)); float best = std::numeric_limits::lowest(); + float best_without_u = std::numeric_limits::lowest(); float second_best = std::numeric_limits::lowest(); // Root depth is 1 here, while for GetDrawScore() it's 0-based, that's why // the weirdness. @@ -1044,6 +1045,7 @@ SearchWorker::NodeToProcess SearchWorker::PickNodeToExtend( second_best = best; second_best_edge = best_edge; best = score; + best_without_u = Q + M; best_edge = child; } else if (score > second_best) { second_best = score; @@ -1052,8 +1054,8 @@ SearchWorker::NodeToProcess SearchWorker::PickNodeToExtend( } if (second_best_edge) { - int estimated_visits_to_change_best = best_edge.GetVisitsToReachU( - second_best, puct_mult, fpu, draw_score, params_.GetLogitQ()); + int estimated_visits_to_change_best = + best_edge.GetVisitsToReachU(second_best, puct_mult, best_without_u); // Only cache for n-2 steps as the estimate created by GetVisitsToReachU // has potential rounding errors and some conservative logic that can push // it up to 2 away from the real value. From 4d77eb01e131b8bcc61973f5c5f34555a1668eb9 Mon Sep 17 00:00:00 2001 From: Tilps Date: Sun, 29 Mar 2020 23:49:50 +1100 Subject: [PATCH 081/151] Fix assert in GetBestChildrenNoTemperature (#1169) * Fix GetBestChildNoTemperature Turns out I missed that it was called with arbitrary depth nodes. * Small adjustment. --- src/mcts/search.cc | 28 ++++++++++++++-------------- src/mcts/search.h | 6 +++--- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/mcts/search.cc b/src/mcts/search.cc index 4b00da8192..2394478bc4 100644 --- a/src/mcts/search.cc +++ b/src/mcts/search.cc @@ -97,7 +97,7 @@ void ApplyDirichletNoise(Node* node, float eps, double alpha) { void Search::SendUciInfo() REQUIRES(nodes_mutex_) { const auto max_pv = params_.GetMultiPv(); - const auto edges = GetBestChildrenNoTemperature(root_node_, max_pv); + const auto edges = GetBestChildrenNoTemperature(root_node_, max_pv, 0); const auto score_type = params_.GetScoreType(); const auto per_pv_counters = params_.GetPerPvCounters(); const auto display_cache_usage = params_.GetDisplayCacheUsage(); @@ -164,10 +164,12 @@ void Search::SendUciInfo() REQUIRES(nodes_mutex_) { if (max_pv > 1) uci_info.multipv = multipv; if (per_pv_counters) uci_info.nodes = edge.GetN(); bool flip = played_history_.IsBlackToMove(); + int depth = 0; for (auto iter = edge; iter; - iter = GetBestChildNoTemperature(iter.node()), flip = !flip) { + iter = GetBestChildNoTemperature(iter.node(), depth), flip = !flip) { uci_info.pv.push_back(iter.GetMove(flip)); if (!iter.node()) break; // Last edge was dangling, cannot continue. + depth += 1; } } @@ -410,7 +412,7 @@ Search::BestEval Search::GetBestEval() const { float parent_d = root_node_->GetD(); float parent_m = root_node_->GetM(); if (!root_node_->HasChildren()) return {parent_wl, parent_d, parent_m}; - EdgeAndNode best_edge = GetBestChildNoTemperature(root_node_); + EdgeAndNode best_edge = GetBestChildNoTemperature(root_node_, 0); return {best_edge.GetWL(parent_wl), best_edge.GetD(parent_d), best_edge.GetM(parent_m - 1) + 1}; } @@ -477,24 +479,22 @@ void Search::EnsureBestMoveKnown() REQUIRES(nodes_mutex_) } final_bestmove_ = temperature ? GetBestRootChildWithTemperature(temperature) - : GetBestChildNoTemperature(root_node_); + : GetBestChildNoTemperature(root_node_, 0); if (final_bestmove_.HasNode() && final_bestmove_.node()->HasChildren()) { - final_pondermove_ = GetBestChildNoTemperature(final_bestmove_.node()); + final_pondermove_ = GetBestChildNoTemperature(final_bestmove_.node(), 1); } } // Returns @count children with most visits. std::vector Search::GetBestChildrenNoTemperature(Node* parent, - int count) const { + int count, + int depth) const { MoveList root_limit; if (parent == root_node_) { PopulateRootMoveLimit(&root_limit); } - // Assume this function is only ever called with root or immediate child of - // root to avoid traversing to get depth. - assert(parent == root_node_ || parent->GetParent() == root_node_); - const bool is_odd_depth = parent != root_node_; + const bool is_odd_depth = (depth % 2) == 1; const float draw_score = GetDrawScore(is_odd_depth); // Best child is selected using the following criteria: // * Prefer shorter terminal wins / avoid shorter terminal losses. @@ -585,8 +585,8 @@ std::vector Search::GetBestChildrenNoTemperature(Node* parent, } // Returns a child with most visits. -EdgeAndNode Search::GetBestChildNoTemperature(Node* parent) const { - auto res = GetBestChildrenNoTemperature(parent, 1); +EdgeAndNode Search::GetBestChildNoTemperature(Node* parent, int depth) const { + auto res = GetBestChildrenNoTemperature(parent, 1, depth); return res.empty() ? EdgeAndNode() : res.front(); } @@ -618,7 +618,7 @@ EdgeAndNode Search::GetBestRootChildWithTemperature(float temperature) const { } // No move had enough visits for temperature, so use default child criteria - if (max_n <= 0.0f) return GetBestChildNoTemperature(root_node_); + if (max_n <= 0.0f) return GetBestChildNoTemperature(root_node_, 0); // TODO(crem) Simplify this code when samplers.h is merged. const float min_eval = @@ -1462,7 +1462,7 @@ void SearchWorker::DoBackupUpdateSingleNode( if (p == search_->root_node_ && search_->current_best_edge_.GetN() <= n->GetN()) { search_->current_best_edge_ = - search_->GetBestChildNoTemperature(search_->root_node_); + search_->GetBestChildNoTemperature(search_->root_node_, 0); } } search_->total_playouts_ += node_to_process.multivisit; diff --git a/src/mcts/search.h b/src/mcts/search.h index 714ee52f33..d3810622f3 100644 --- a/src/mcts/search.h +++ b/src/mcts/search.h @@ -103,9 +103,9 @@ class Search { // Returns a child with most visits, with or without temperature. // NoTemperature is safe to use on non-extended nodes, while WithTemperature // accepts only nodes with at least 1 visited child. - EdgeAndNode GetBestChildNoTemperature(Node* parent) const; - std::vector GetBestChildrenNoTemperature(Node* parent, - int count) const; + EdgeAndNode GetBestChildNoTemperature(Node* parent, int depth) const; + std::vector GetBestChildrenNoTemperature(Node* parent, int count, + int depth) const; EdgeAndNode GetBestRootChildWithTemperature(float temperature) const; int64_t GetTimeSinceStart() const; From c5d4a32eb07ad381eecf7320162f3523365c1073 Mon Sep 17 00:00:00 2001 From: Tilps Date: Mon, 30 Mar 2020 03:17:45 +1100 Subject: [PATCH 082/151] Only clear edge list if all children removed. (#1170) --- src/mcts/node.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mcts/node.cc b/src/mcts/node.cc index ee7ee9cfb9..db43d43085 100644 --- a/src/mcts/node.cc +++ b/src/mcts/node.cc @@ -318,7 +318,7 @@ void Node::ReleaseChildrenExceptOne(Node* node_to_save) { // Make saved node the only child. (kills previous siblings). gNodeGc.AddToGcQueue(std::move(child_)); child_ = std::move(saved_node); - edges_ = EdgeList(); // Clear edges list. + if (!child_) edges_ = EdgeList(); // Clear edges list. } namespace { From 5e25ee51d17fce5677a59ec364f598e893d9fcba Mon Sep 17 00:00:00 2001 From: Anson Hu <40702929+farmersrice@users.noreply.github.com> Date: Sun, 5 Apr 2020 12:21:27 -0700 Subject: [PATCH 083/151] Add a flag to allow `go nodes` to be treated as playouts in UCI (#1130) * Flag to allow "go nodes" to be treated as playouts in UCI * Hide playouts flag * Remove GetId() from playouts flag --- src/mcts/stoppers/factory.cc | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/mcts/stoppers/factory.cc b/src/mcts/stoppers/factory.cc index 994183c118..215093853f 100644 --- a/src/mcts/stoppers/factory.cc +++ b/src/mcts/stoppers/factory.cc @@ -97,6 +97,9 @@ const OptionId kMinimumSmartPruningBatchesId{ "Only allow smart pruning to stop search after at least this many batches " "have been evaluated. It may be useful to have this value greater than the " "number of search threads in use."}; +const OptionId kNodesAsPlayoutsId{ + "nodes-as-playouts", "NodesAsPlayouts", + "Treat UCI `go nodes` command as referring to playouts instead of visits."}; } // namespace @@ -114,10 +117,12 @@ void PopulateTimeManagementOptions(RunType for_what, OptionsParser* options) { options->Add(kTimeMidpointMoveId, 1.0f, 100.0f) = 51.5f; options->Add(kTimeSteepnessId, 1.0f, 100.0f) = 7.0f; options->Add(kSpendSavedTimeId, 0.0f, 1.0f) = 1.0f; + options->Add(kNodesAsPlayoutsId) = false; // Hide time curve options. options->HideOption(kTimeMidpointMoveId); options->HideOption(kTimeSteepnessId); + options->HideOption(kNodesAsPlayoutsId); } } @@ -156,7 +161,11 @@ void PopulateStoppers(ChainedSearchStopper* stopper, const OptionsDict& options, // "go nodes" stopper. if (params.nodes) { - stopper->AddStopper(std::make_unique(*params.nodes)); + if (options.Get(kNodesAsPlayoutsId)) { + stopper->AddStopper(std::make_unique(*params.nodes)); + } else { + stopper->AddStopper(std::make_unique(*params.nodes)); + } } // "go movetime" stopper. From a0886618a6086973bc5cf2968c6e39f3ccc2b47f Mon Sep 17 00:00:00 2001 From: Tilps Date: Mon, 6 Apr 2020 22:45:57 +1000 Subject: [PATCH 084/151] Shared collisions. (#1172) * Shared collisions. * Ensure no shared collisions are left unresolved at search exit. * Oops. * Oops 2. * Minor cleanup. --- src/mcts/search.cc | 42 +++++++++++++++++++++++++++++++++++++----- src/mcts/search.h | 9 +++++++++ 2 files changed, 46 insertions(+), 5 deletions(-) diff --git a/src/mcts/search.cc b/src/mcts/search.cc index 2394478bc4..bb4c3c65a3 100644 --- a/src/mcts/search.cc +++ b/src/mcts/search.cc @@ -764,9 +764,24 @@ void Search::Wait() { } } +void Search::CancelSharedCollisions() REQUIRES(nodes_mutex_) { + for (auto& entry : shared_collisions_) { + Node* node = entry.first; + for (node = node->GetParent(); node != root_node_->GetParent(); + node = node->GetParent()) { + node->CancelScoreUpdate(entry.second); + } + } + shared_collisions_.clear(); +} + Search::~Search() { Abort(); Wait(); + { + SharedMutex::Lock lock(nodes_mutex_); + CancelSharedCollisions(); + } LOGFILE << "Search destroyed."; } @@ -803,6 +818,9 @@ void SearchWorker::ExecuteOneIteration() { // 2. Gather minibatch. GatherMinibatch(); + // 2b. Collect collisions. + CollectCollisions(); + // 3. Prefetch into cache. MaybePrefetchIntoCache(); @@ -1196,6 +1214,18 @@ bool SearchWorker::AddNodeToComputation(Node* node, bool add_if_cached) { return false; } +// 2b. Copy collisions into shared collisions. +void SearchWorker::CollectCollisions() { + SharedMutex::Lock lock(search_->nodes_mutex_); + + for (const NodeToProcess& node_to_process : minibatch_) { + if (node_to_process.IsCollision()) { + search_->shared_collisions_.emplace_back(node_to_process.node, + node_to_process.multivisit); + } + } +} + // 3. Prefetch into cache. // ~~~~~~~~~~~~~~~~~~~~~~~ void SearchWorker::MaybePrefetchIntoCache() { @@ -1374,9 +1404,15 @@ void SearchWorker::DoBackupUpdate() { // Nodes mutex for doing node updates. SharedMutex::Lock lock(search_->nodes_mutex_); + bool work_done = number_out_of_order_ > 0; for (const NodeToProcess& node_to_process : minibatch_) { DoBackupUpdateSingleNode(node_to_process); + if (!node_to_process.IsCollision()) { + work_done = true; + } } + if (!work_done) return; + search_->CancelSharedCollisions(); search_->total_batches_ += 1; } @@ -1384,11 +1420,7 @@ void SearchWorker::DoBackupUpdateSingleNode( const NodeToProcess& node_to_process) REQUIRES(search_->nodes_mutex_) { Node* node = node_to_process.node; if (node_to_process.IsCollision()) { - // If it was a collision, just undo counters. - for (node = node->GetParent(); node != search_->root_node_->GetParent(); - node = node->GetParent()) { - node->CancelScoreUpdate(node_to_process.multivisit); - } + // Collisions are handled via shared_collisions instead. return; } diff --git a/src/mcts/search.h b/src/mcts/search.h index d3810622f3..86bbed8633 100644 --- a/src/mcts/search.h +++ b/src/mcts/search.h @@ -141,6 +141,9 @@ class Search { // Depth of a root node is 0 (even number). float GetDrawScore(bool is_odd_depth) const; + // Ensure that all shared collisions are cancelled and clear them out. + void CancelSharedCollisions(); + mutable Mutex counters_mutex_ ACQUIRED_AFTER(nodes_mutex_); // Tells all threads to stop. std::atomic stop_{false}; @@ -188,6 +191,9 @@ class Search { std::atomic pending_searchers_{0}; + std::vector> shared_collisions_ + GUARDED_BY(nodes_mutex_); + std::unique_ptr uci_responder_; const SearchParams params_; @@ -240,6 +246,9 @@ class SearchWorker { // 2. Gather minibatch. void GatherMinibatch(); + // 2b. Copy collisions into shared_collisions_. + void CollectCollisions(); + // 3. Prefetch into cache. void MaybePrefetchIntoCache(); From b39fd2bfd4366aeebeee5e41da4bbf3a8db2586b Mon Sep 17 00:00:00 2001 From: borg323 <39573933+borg323@users.noreply.github.com> Date: Tue, 7 Apr 2020 12:40:26 +0300 Subject: [PATCH 085/151] make a separate eigen backend (#1027) --- appveyor.yml | 2 +- meson.build | 25 +--- meson_options.txt | 5 - scripts/check_dx.bat | 2 +- scripts/check_opencl.bat | 2 +- src/neural/blas/convolution1.cc | 30 +++-- src/neural/blas/convolution1.h | 1 + src/neural/blas/fully_connected_layer.cc | 103 ++++++++------- src/neural/blas/fully_connected_layer.h | 1 + src/neural/blas/network_blas.cc | 161 ++++++++++++----------- src/neural/blas/se_unit.cc | 32 +++-- src/neural/blas/se_unit.h | 1 + src/neural/blas/winograd_convolution3.cc | 68 ++++++---- src/neural/blas/winograd_convolution3.h | 1 + src/neural/network_check.cc | 2 +- 15 files changed, 246 insertions(+), 190 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index cb586545eb..4412cb7058 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -75,7 +75,7 @@ before_build: - cmd: IF %DX%==true SET BUILD_BLAS=true - cmd: SET EMBED=false - cmd: IF %APPVEYOR_REPO_TAG%==true IF %ANDROID%==true SET EMBED=true -- cmd: IF %ANDROID%==false meson build --backend vs2017 --buildtype release -Dgtest=%GTEST% -Dopencl=%OPENCL% -Dblas=%BUILD_BLAS% -Ddnnl=true -Deigen=true -Ddx=%DX% -Dcudnn=%CUDA% -Dispc_native_only=false -Dpopcnt=false -Dcudnn_include="%CUDA_PATH%\include","%PKG_FOLDER%\cuda\include" -Dcudnn_libdirs="%CUDA_PATH%\lib\x64","%PKG_FOLDER%\cuda\lib\x64" -Dopenblas_include="%PKG_FOLDER%\OpenBLAS\dist64\include" -Dopenblas_libdirs="%PKG_FOLDER%\OpenBLAS\dist64\lib" -Ddnnl_dir="%PKG_FOLDER%\dnnl_win_1.1.1_cpu_vcomp" -Dopencl_include="%PKG_FOLDER%\opencl-nug.0.777.77\build\native\include" -Dopencl_libdirs="%PKG_FOLDER%\opencl-nug.0.777.77\build\native\lib\x64" -Ddefault_library=static +- cmd: IF %ANDROID%==false meson build --backend vs2017 --buildtype release -Dgtest=%GTEST% -Dopencl=%OPENCL% -Dblas=%BUILD_BLAS% -Ddnnl=true -Ddx=%DX% -Dcudnn=%CUDA% -Dispc_native_only=false -Dpopcnt=false -Dcudnn_include="%CUDA_PATH%\include","%PKG_FOLDER%\cuda\include" -Dcudnn_libdirs="%CUDA_PATH%\lib\x64","%PKG_FOLDER%\cuda\lib\x64" -Dopenblas_include="%PKG_FOLDER%\OpenBLAS\dist64\include" -Dopenblas_libdirs="%PKG_FOLDER%\OpenBLAS\dist64\lib" -Ddnnl_dir="%PKG_FOLDER%\dnnl_win_1.1.1_cpu_vcomp" -Dopencl_include="%PKG_FOLDER%\opencl-nug.0.777.77\build\native\include" -Dopencl_libdirs="%PKG_FOLDER%\opencl-nug.0.777.77\build\native\lib\x64" -Ddefault_library=static - cmd: IF %ANDROID%==true meson arm64-v8a --buildtype release -Dgtest=false -Dopenblas_include="%PKG_FOLDER%\OpenBLAS\android-aarch64\include" -Dopenblas_libdirs="%PKG_FOLDER%\OpenBLAS\android-aarch64\lib" -Dembed=%EMBED% -Ddefault_library=static --cross-file crossfile-aarch64 - cmd: IF %ANDROID%==true meson armeabi-v7a --buildtype release -Dgtest=false -Dopenblas_include="%PKG_FOLDER%\OpenBLAS\android-armv7a\include" -Dopenblas_libdirs="%PKG_FOLDER%\OpenBLAS\android-armv7a\lib" -Dembed=%EMBED% -Ddefault_library=static --cross-file crossfile-armv7a build_script: diff --git a/meson.build b/meson.build index 31b55c11a1..8a77f7ce1a 100644 --- a/meson.build +++ b/meson.build @@ -187,8 +187,6 @@ if get_option('build_backends') shared_files = [] - has_blas = false - accelerate_lib = dependency('Accelerate', required: false) mkl_libdirs = get_option('mkl_libdirs') @@ -208,23 +206,21 @@ if get_option('build_backends') if get_option('blas') if get_option('mkl') and mkl_lib.found() - add_project_arguments('-DUSE_MKL', language : 'cpp') + add_project_arguments(['-DUSE_MKL', '-DUSE_BLAS'], language : 'cpp') includes += include_directories(get_option('mkl_include')) deps += [ mkl_lib ] - has_blas = true elif get_option('dnnl') and dnnl_lib.found() - add_project_arguments('-DUSE_DNNL', language : 'cpp') + add_project_arguments(['-DUSE_DNNL', '-DUSE_BLAS'], language : 'cpp') includes += include_directories(get_option('dnnl_dir') + '/include') deps += [ dnnl_lib, dependency('openmp', required:true) ] - has_blas = true elif get_option('accelerate') and accelerate_lib.found() deps += [ accelerate_lib ] - has_blas = true + add_project_arguments('-DUSE_BLAS', language : 'cpp') elif get_option('openblas') and openblas_lib.found() - add_project_arguments('-DUSE_OPENBLAS', language : 'cpp') + add_project_arguments(['-DUSE_OPENBLAS', '-DUSE_BLAS'], language : 'cpp') required_openblas_header = 'openblas_config.h' if not cc.has_header(required_openblas_header) @@ -244,17 +240,14 @@ if get_option('build_backends') endif deps += [ openblas_lib ] - has_blas = true - elif get_option('eigen') - add_project_arguments('-DUSE_EIGEN', language : 'cpp') - has_blas = true - deps += subproject('eigen').get_variable('eigen_dep') endif + deps += subproject('eigen').get_variable('eigen_dep') + ispc = find_program('ispc', required: false) ispc_extra_args = [] - if has_blas and get_option('ispc') and ispc.found() + if get_option('ispc') and ispc.found() ispc_native_only = get_option('ispc_native_only') if host_machine.system() == 'windows' outputnames = [ '@BASENAME@.obj'] @@ -286,10 +279,6 @@ if get_option('build_backends') ) endif - endif - - if get_option('blas') and has_blas - blas_files = [ 'src/neural/blas/convolution1.cc', 'src/neural/blas/fully_connected_layer.cc', diff --git a/meson_options.txt b/meson_options.txt index f17b838089..1a2f93f81f 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -98,11 +98,6 @@ option('dnnl', value: false, description: 'Enable DNNL BLAS support') -option('eigen', - type: 'boolean', - value: false, - description: 'Use EIGEN as a BLAS alternative') - option('accelerate', type: 'boolean', value: true, diff --git a/scripts/check_dx.bat b/scripts/check_dx.bat index fee708729a..dc173f3c0c 100644 --- a/scripts/check_dx.bat +++ b/scripts/check_dx.bat @@ -1,5 +1,5 @@ @ECHO OFF ECHO Sanity checking the dx12 driver. -lc0 benchmark --backend=check --backend-opts=mode=check,freq=1.0,atol=5e-1,dx12,blas %* +lc0 benchmark --backend=check --backend-opts=mode=check,freq=1.0,atol=5e-1,dx12 %* PAUSE diff --git a/scripts/check_opencl.bat b/scripts/check_opencl.bat index 4ae972d5e4..ed89a60433 100755 --- a/scripts/check_opencl.bat +++ b/scripts/check_opencl.bat @@ -1,5 +1,5 @@ @ECHO OFF ECHO Sanity checking the opencl driver. -lc0 benchmark --backend=check --backend-opts=mode=check,freq=1.0,opencl,blas %* +lc0 benchmark --backend=check --backend-opts=mode=check,freq=1.0,opencl %* PAUSE diff --git a/src/neural/blas/convolution1.cc b/src/neural/blas/convolution1.cc index 39e840296c..c78b3976fb 100644 --- a/src/neural/blas/convolution1.cc +++ b/src/neural/blas/convolution1.cc @@ -19,23 +19,23 @@ #include "neural/blas/convolution1.h" #include "neural/blas/blas.h" -#ifdef USE_EIGEN #include -#endif namespace lczero { -#ifdef USE_EIGEN template using EigenMatrixMap = Eigen::Map>; template using ConstEigenMatrixMap = Eigen::Map>; -#endif -void Convolution1::Forward(const size_t batch_size, const size_t input_channels, - const size_t output_channels, const float* input, - const float* weights, float* output) { +#ifdef USE_BLAS +template <> +void Convolution1::Forward(const size_t batch_size, + const size_t input_channels, + const size_t output_channels, + const float* input, const float* weights, + float* output) { for (size_t i = 0; i < batch_size; i++) { // C←αAB + βC // M Number of rows in matrices A and C. @@ -56,7 +56,6 @@ void Convolution1::Forward(const size_t batch_size, const size_t input_channels, const float* batch_input = input + i * kSquares * input_channels; float* batch_output = output + i * kSquares * output_channels; -#ifndef USE_EIGEN cblas_sgemm(CblasRowMajor, // Row major formar CblasNoTrans, // A not transposed CblasNoTrans, // B not transposed @@ -71,12 +70,23 @@ void Convolution1::Forward(const size_t batch_size, const size_t input_channels, 0.0f, // beta batch_output, // C kSquares); // ldc, leading rank of B -#else + } +} +#endif + +template <> +void Convolution1::Forward(const size_t batch_size, + const size_t input_channels, + const size_t output_channels, + const float* input, const float* weights, + float* output) { + for (size_t i = 0; i < batch_size; i++) { + const float* batch_input = input + i * kSquares * input_channels; + float* batch_output = output + i * kSquares * output_channels; auto C_mat = EigenMatrixMap(batch_output, kSquares, output_channels); C_mat.noalias() = ConstEigenMatrixMap(batch_input, kSquares, input_channels) * ConstEigenMatrixMap(weights, input_channels, output_channels); -#endif } } diff --git a/src/neural/blas/convolution1.h b/src/neural/blas/convolution1.h index 8f160acfad..48aad402b4 100644 --- a/src/neural/blas/convolution1.h +++ b/src/neural/blas/convolution1.h @@ -24,6 +24,7 @@ namespace lczero { // Convolution 1x1 +template class Convolution1 { public: Convolution1() = delete; diff --git a/src/neural/blas/fully_connected_layer.cc b/src/neural/blas/fully_connected_layer.cc index da81f07a1d..e7959169f1 100644 --- a/src/neural/blas/fully_connected_layer.cc +++ b/src/neural/blas/fully_connected_layer.cc @@ -23,12 +23,31 @@ #include #include -#ifdef USE_EIGEN #include -#endif namespace lczero { -#ifdef USE_EIGEN +namespace { +void ApplyBias(size_t batch_size, const size_t output_size, const float* biases, + bool apply_relu, float* outputs) { + if (apply_relu) { + for (size_t i = 0; i < batch_size; i++) { + float* batch_outputs = outputs + i * output_size; + for (size_t o = 0; o < output_size; o++) { + float val = biases[o] + batch_outputs[o]; + batch_outputs[o] = val >= 0 ? val : 0; + } + } + } else { + for (size_t i = 0; i < batch_size; i++) { + float* batch_outputs = outputs + i * output_size; + for (size_t o = 0; o < output_size; o++) { + batch_outputs[o] += biases[o]; + } + } + } +} +} // namespace + template using EigenVectorMap = Eigen::Map>; template @@ -40,13 +59,13 @@ using EigenMatrixMap = template using ConstEigenMatrixMap = Eigen::Map>; -#endif -void FullyConnectedLayer::Forward1D(size_t batch_size, const size_t input_size, - const size_t output_size, - const float* inputs, const float* weights, - const float* biases, bool apply_relu, - float* outputs) { +#ifdef USE_BLAS +template <> +void FullyConnectedLayer::Forward1D( + size_t batch_size, const size_t input_size, const size_t output_size, + const float* inputs, const float* weights, const float* biases, + bool apply_relu, float* outputs) { if (batch_size == 1) { // Just a matrix-vector multiplication // @@ -58,17 +77,10 @@ void FullyConnectedLayer::Forward1D(size_t batch_size, const size_t input_size, // // rows output_size output_size input_size // -#ifndef USE_EIGEN cblas_sgemv(CblasRowMajor, CblasNoTrans, // M K (int)output_size, (int)input_size, 1.0f, weights, (int)input_size, inputs, 1, 0.0f, outputs, 1); -#else - EigenVectorMap y(outputs, output_size); - y.noalias() = ConstEigenMatrixMap(weights, input_size, output_size) - .transpose() * - ConstEigenVectorMap(inputs, input_size); -#endif } else { // more columns, matrix-matrix multiplication // @@ -89,7 +101,6 @@ void FullyConnectedLayer::Forward1D(size_t batch_size, const size_t input_size, // passing a matrix A[m][n], the value should be m. // cblas_sgemm(CblasRowMajor, TransA, TransB, M, N, K, alpha, A, lda, B, // ldb, beta, C, N); -#ifndef USE_EIGEN cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans, (int)output_size, // M (int)batch_size, // N @@ -102,44 +113,46 @@ void FullyConnectedLayer::Forward1D(size_t batch_size, const size_t input_size, 0.0f, // beta outputs, // C (int)output_size); // ldc, leading rank of C -#else - auto C_mat = EigenMatrixMap(outputs, output_size, batch_size); - C_mat.noalias() = - ConstEigenMatrixMap(weights, input_size, output_size) - .transpose() * - ConstEigenMatrixMap(inputs, input_size, batch_size); -#endif - } - if (apply_relu) { - for (size_t i = 0; i < batch_size; i++) { - float* batch_outputs = outputs + i * output_size; - for (size_t o = 0; o < output_size; o++) { - float val = biases[o] + batch_outputs[o]; - batch_outputs[o] = val >= 0 ? val : 0; - } - } - } else { - for (size_t i = 0; i < batch_size; i++) { - float* batch_outputs = outputs + i * output_size; - for (size_t o = 0; o < output_size; o++) { - batch_outputs[o] += biases[o]; - } - } } + ApplyBias(batch_size, output_size, biases, apply_relu, outputs); } -float FullyConnectedLayer::Forward0D(const size_t size, const float* x, - const float* y) { +template <> +float FullyConnectedLayer::Forward0D(const size_t size, const float* x, + const float* y) { // A scalar product, also known as a dot-product. // float cblas_sdot(const int N, const float *X, const int incX, const float // *Y, // const int incY); -#ifndef USE_EIGEN return cblas_sdot((int)size, x, 1, y, 1); -#else +} +#endif + +template <> +void FullyConnectedLayer::Forward1D( + size_t batch_size, const size_t input_size, const size_t output_size, + const float* inputs, const float* weights, const float* biases, + bool apply_relu, float* outputs) { + if (batch_size == 1) { + EigenVectorMap y(outputs, output_size); + y.noalias() = ConstEigenMatrixMap(weights, input_size, output_size) + .transpose() * + ConstEigenVectorMap(inputs, input_size); + } else { + auto C_mat = EigenMatrixMap(outputs, output_size, batch_size); + C_mat.noalias() = + ConstEigenMatrixMap(weights, input_size, output_size) + .transpose() * + ConstEigenMatrixMap(inputs, input_size, batch_size); + } + ApplyBias(batch_size, output_size, biases, apply_relu, outputs); +} + +template <> +float FullyConnectedLayer::Forward0D(const size_t size, const float* x, + const float* y) { return ConstEigenVectorMap(x, size) .dot(ConstEigenVectorMap(y, size)); -#endif } } // namespace lczero diff --git a/src/neural/blas/fully_connected_layer.h b/src/neural/blas/fully_connected_layer.h index 65f7367449..30e61f613c 100644 --- a/src/neural/blas/fully_connected_layer.h +++ b/src/neural/blas/fully_connected_layer.h @@ -23,6 +23,7 @@ namespace lczero { +template class FullyConnectedLayer { public: FullyConnectedLayer() = delete; diff --git a/src/neural/blas/network_blas.cc b/src/neural/blas/network_blas.cc index 626d136fb9..71721f4bc8 100644 --- a/src/neural/blas/network_blas.cc +++ b/src/neural/blas/network_blas.cc @@ -33,9 +33,7 @@ #include "neural/shared/policy_map.h" #include "neural/shared/winograd_filter.h" -#ifdef USE_EIGEN #include -#endif #ifdef USE_DNNL #include @@ -44,6 +42,7 @@ namespace lczero { namespace { +template class BlasComputation : public NetworkComputation { public: BlasComputation(const LegacyWeights& weights, const size_t max_batch_size, @@ -116,15 +115,16 @@ class BlasComputation : public NetworkComputation { bool conv_policy_; }; +template class BlasNetwork : public Network { public: BlasNetwork(const WeightsFile& weights, const OptionsDict& options); virtual ~BlasNetwork(){}; std::unique_ptr NewComputation() override { - return std::make_unique(weights_, max_batch_size_, wdl_, - moves_left_, conv_policy_, - blas_cores_); + return std::make_unique>( + weights_, max_batch_size_, wdl_, moves_left_, conv_policy_, + blas_cores_); } const NetworkCapabilities& GetCapabilities() const override { @@ -144,10 +144,10 @@ class BlasNetwork : public Network { int blas_cores_; }; -BlasComputation::BlasComputation(const LegacyWeights& weights, - const size_t max_batch_size, const bool wdl, - const bool moves_left, const bool conv_policy, - const int blas_cores) +template +BlasComputation::BlasComputation( + const LegacyWeights& weights, const size_t max_batch_size, const bool wdl, + const bool moves_left, const bool conv_policy, const int blas_cores) : weights_(weights), max_batch_size_(max_batch_size), policies_(0), @@ -163,7 +163,8 @@ BlasComputation::BlasComputation(const LegacyWeights& weights, #endif } -void BlasComputation::ComputeBlocking() { +template +void BlasComputation::ComputeBlocking() { // Retrieve network key dimensions from the weights structure. const auto num_value_channels = weights_.ip1_val_b.size(); const auto num_moves_channels = weights_.ip1_mov_b.size(); @@ -211,8 +212,8 @@ void BlasComputation::ComputeBlocking() { std::vector res_buffer3(largest_batch_size * output_channels * kSquares); - WinogradConvolution3 convolve3(largest_batch_size, max_channels, - max_output_channels); + WinogradConvolution3 convolve3(largest_batch_size, max_channels, + max_output_channels); size_t max_head_planes = std::max(num_policy_input_planes, @@ -268,9 +269,9 @@ void BlasComputation::ComputeBlocking() { std::swap(conv_out, conv_in); auto se_fc_outputs = se.b1.size(); - ApplySEUnit(batch_size, output_channels, se_fc_outputs, conv_in, res, - se.w1.data(), se.b1.data(), se.w2.data(), se.b2.data(), - conv_out); + ApplySEUnit(batch_size, output_channels, se_fc_outputs, + conv_in, res, se.w1.data(), se.b1.data(), + se.w2.data(), se.b2.data(), conv_out); } else { BiasResidualRelu(batch_size, output_channels, &conv_out[0], conv2.biases.data(), res); @@ -305,14 +306,14 @@ void BlasComputation::ComputeBlocking() { } } else { - Convolution1::Forward(batch_size, output_channels, - num_policy_input_planes, conv_out, - weights_.policy.weights.data(), head_buffer.data()); + Convolution1::Forward( + batch_size, output_channels, num_policy_input_planes, conv_out, + weights_.policy.weights.data(), head_buffer.data()); BiasResidualRelu(batch_size, num_policy_input_planes, &head_buffer[0], weights_.policy.biases.data()); - FullyConnectedLayer::Forward1D( + FullyConnectedLayer::Forward1D( batch_size, num_policy_input_planes * kSquares, num_output_policy, head_buffer.data(), weights_.ip_pol_w.data(), weights_.ip_pol_b.data(), @@ -330,14 +331,14 @@ void BlasComputation::ComputeBlocking() { } // Value head - Convolution1::Forward(batch_size, output_channels, num_value_input_planes, - conv_out, weights_.value.weights.data(), - head_buffer.data()); + Convolution1::Forward( + batch_size, output_channels, num_value_input_planes, conv_out, + weights_.value.weights.data(), head_buffer.data()); BiasResidualRelu(batch_size, num_value_input_planes, &head_buffer[0], weights_.value.biases.data()); - FullyConnectedLayer::Forward1D( + FullyConnectedLayer::Forward1D( batch_size, num_value_input_planes * kSquares, num_value_channels, head_buffer.data(), weights_.ip1_val_w.data(), weights_.ip1_val_b.data(), @@ -347,7 +348,7 @@ void BlasComputation::ComputeBlocking() { // Now get the score if (wdl_) { std::vector wdl(3 * batch_size); - FullyConnectedLayer::Forward1D( + FullyConnectedLayer::Forward1D( batch_size, num_value_channels, 3, output_fc.data(), weights_.ip2_val_w.data(), weights_.ip2_val_b.data(), false, // Relu Off @@ -363,7 +364,7 @@ void BlasComputation::ComputeBlocking() { } } else { for (size_t j = 0; j < batch_size; j++) { - double winrate = FullyConnectedLayer::Forward0D( + double winrate = FullyConnectedLayer::Forward0D( num_value_channels, weights_.ip2_val_w.data(), &output_fc[j * num_value_channels]) + weights_.ip2_val_b[0]; @@ -372,14 +373,14 @@ void BlasComputation::ComputeBlocking() { } } if (moves_left_) { - Convolution1::Forward(batch_size, output_channels, num_moves_input_planes, - conv_out, weights_.moves_left.weights.data(), - head_buffer.data()); + Convolution1::Forward( + batch_size, output_channels, num_moves_input_planes, conv_out, + weights_.moves_left.weights.data(), head_buffer.data()); BiasResidualRelu(batch_size, num_moves_input_planes, &head_buffer[0], weights_.moves_left.biases.data()); - FullyConnectedLayer::Forward1D( + FullyConnectedLayer::Forward1D( batch_size, num_moves_input_planes * kSquares, num_moves_channels, head_buffer.data(), weights_.ip1_mov_w.data(), weights_.ip1_mov_b.data(), @@ -387,7 +388,7 @@ void BlasComputation::ComputeBlocking() { output_fc.data()); std::vector output_moves_left(batch_size); - FullyConnectedLayer::Forward1D( + FullyConnectedLayer::Forward1D( batch_size, num_moves_channels, 1, output_fc.data(), weights_.ip2_mov_w.data(), weights_.ip2_mov_b.data(), true, // Relu On @@ -400,7 +401,9 @@ void BlasComputation::ComputeBlocking() { } } -void BlasComputation::EncodePlanes(const InputPlanes& sample, float* buffer) { +template +void BlasComputation::EncodePlanes(const InputPlanes& sample, + float* buffer) { for (const InputPlane& plane : sample) { const float value = plane.value; for (auto i = 0; i < kSquares; i++) @@ -408,13 +411,16 @@ void BlasComputation::EncodePlanes(const InputPlanes& sample, float* buffer) { } } -BlasNetwork::BlasNetwork(const WeightsFile& file, const OptionsDict& options) +template +BlasNetwork::BlasNetwork(const WeightsFile& file, + const OptionsDict& options) : capabilities_{file.format().network_format().input(), file.format().network_format().moves_left()}, weights_(file.weights()) { -#ifndef USE_EIGEN - blas_cores_ = options.GetOrDefault("blas_cores", 1); -#endif + if (!use_eigen) { + blas_cores_ = options.GetOrDefault("blas_cores", 1); + } + max_batch_size_ = static_cast(options.GetOrDefault("batch_size", 256)); @@ -456,57 +462,59 @@ BlasNetwork::BlasNetwork(const WeightsFile& file, const OptionsDict& options) pol_channels, channels); } -#ifdef USE_EIGEN - CERR << "Using Eigen version " << EIGEN_WORLD_VERSION << "." - << EIGEN_MAJOR_VERSION << "." << EIGEN_MINOR_VERSION; -#endif - + if (use_eigen) { + CERR << "Using Eigen version " << EIGEN_WORLD_VERSION << "." + << EIGEN_MAJOR_VERSION << "." << EIGEN_MINOR_VERSION; + CERR << "Eigen max batch size is " << max_batch_size_ << "."; + } else { #ifdef USE_OPENBLAS - int num_procs = openblas_get_num_procs(); - blas_cores_ = std::min(num_procs, blas_cores_); - openblas_set_num_threads(blas_cores_); - const char* core_name = openblas_get_corename(); - const char* config = openblas_get_config(); - CERR << "BLAS vendor: OpenBLAS."; - CERR << "OpenBLAS [" << config << "]."; - CERR << "OpenBLAS found " << num_procs << " " << core_name << " core(s)."; - CERR << "OpenBLAS using " << blas_cores_ << " core(s) for this backend."; + int num_procs = openblas_get_num_procs(); + blas_cores_ = std::min(num_procs, blas_cores_); + openblas_set_num_threads(blas_cores_); + const char* core_name = openblas_get_corename(); + const char* config = openblas_get_config(); + CERR << "BLAS vendor: OpenBLAS."; + CERR << "OpenBLAS [" << config << "]."; + CERR << "OpenBLAS found " << num_procs << " " << core_name << " core(s)."; + CERR << "OpenBLAS using " << blas_cores_ << " core(s) for this backend."; #endif #ifdef USE_MKL - int max_procs = mkl_get_max_threads(); - blas_cores_ = std::min(max_procs, blas_cores_); - mkl_set_num_threads(blas_cores_); - CERR << "BLAS vendor: MKL."; - constexpr int len = 256; - char versionbuf[len]; - mkl_get_version_string(versionbuf, len); - CERR << "MKL " << versionbuf << "."; - MKLVersion version; - mkl_get_version(&version); - CERR << "MKL platform: " << version.Platform - << ", processor: " << version.Processor << "."; - CERR << "MKL can use up to " << max_procs << " thread(s)."; - CERR << "MKL using " << blas_cores_ << " thread(s) for this backend."; + int max_procs = mkl_get_max_threads(); + blas_cores_ = std::min(max_procs, blas_cores_); + mkl_set_num_threads(blas_cores_); + CERR << "BLAS vendor: MKL."; + constexpr int len = 256; + char versionbuf[len]; + mkl_get_version_string(versionbuf, len); + CERR << "MKL " << versionbuf << "."; + MKLVersion version; + mkl_get_version(&version); + CERR << "MKL platform: " << version.Platform + << ", processor: " << version.Processor << "."; + CERR << "MKL can use up to " << max_procs << " thread(s)."; + CERR << "MKL using " << blas_cores_ << " thread(s) for this backend."; #endif #ifdef USE_DNNL - int max_procs = omp_get_max_threads(); - blas_cores_ = std::min(max_procs, blas_cores_); - const dnnl_version_t* ver = dnnl_version(); - CERR << "BLAS functions from DNNL version " << ver->major << "." << ver->minor - << "." << ver->patch; - CERR << "DNNL using up to " << blas_cores_ << " core(s) per search thread"; + int max_procs = omp_get_max_threads(); + blas_cores_ = std::min(max_procs, blas_cores_); + const dnnl_version_t* ver = dnnl_version(); + CERR << "BLAS functions from DNNL version " << ver->major << "." + << ver->minor << "." << ver->patch; + CERR << "DNNL using up to " << blas_cores_ << " core(s) per search thread"; #endif #ifdef USE_ACCELERATE - CERR << "BLAS vendor: Apple vecLib."; - CERR << "Apple vecLib ignores blas_cores (" << blas_cores_ << ") parameter."; + CERR << "BLAS vendor: Apple vecLib."; + CERR << "Apple vecLib ignores blas_cores (" << blas_cores_ + << ") parameter."; #endif - - CERR << "BLAS max batch size is " << max_batch_size_ << "."; + CERR << "BLAS max batch size is " << max_batch_size_ << "."; + } } +template std::unique_ptr MakeBlasNetwork(const WeightsFile& weights, const OptionsDict& options) { if (weights.format().network_format().network() != @@ -534,10 +542,13 @@ std::unique_ptr MakeBlasNetwork(const WeightsFile& weights, std::to_string(weights.format().network_format().value()) + " is not supported by BLAS backend."); } - return std::make_unique(weights, options); + return std::make_unique>(weights, options); } -REGISTER_NETWORK("blas", MakeBlasNetwork, 50) +#ifdef USE_BLAS +REGISTER_NETWORK("blas", MakeBlasNetwork, 50) +#endif +REGISTER_NETWORK("eigen", MakeBlasNetwork, 49) } // namespace } // namespace lczero diff --git a/src/neural/blas/se_unit.cc b/src/neural/blas/se_unit.cc index f051d04e6b..41cac205fa 100644 --- a/src/neural/blas/se_unit.cc +++ b/src/neural/blas/se_unit.cc @@ -61,6 +61,7 @@ static void apply_se(const size_t channels, const size_t batch_size, } } +template void ApplySEUnit(const size_t batch_size, const size_t channels, const size_t se_fc_outputs, const float* input, const float* residual, const float* weights_w1, @@ -71,18 +72,33 @@ void ApplySEUnit(const size_t batch_size, const size_t channels, global_avg_pooling(channels * batch_size, input, pool.data()); - FullyConnectedLayer::Forward1D(batch_size, channels, se_fc_outputs, - pool.data(), weights_w1, weights_b1, - true, // Relu On - fc_out1.data()); + FullyConnectedLayer::Forward1D(batch_size, channels, se_fc_outputs, + pool.data(), weights_w1, weights_b1, + true, // Relu On + fc_out1.data()); - FullyConnectedLayer::Forward1D(batch_size, se_fc_outputs, 2 * channels, - fc_out1.data(), weights_w2, weights_b2, - false, // Relu Off - pool.data()); + FullyConnectedLayer::Forward1D(batch_size, se_fc_outputs, + 2 * channels, fc_out1.data(), + weights_w2, weights_b2, + false, // Relu Off + pool.data()); // Sigmoid, scale and add residual apply_se(channels, batch_size, input, residual, pool.data(), output); } +template void ApplySEUnit(const size_t batch_size, const size_t channels, + const size_t se_fc_outputs, const float* input, + const float* residual, const float* weights_w1, + const float* weights_b1, + const float* weights_w2, + const float* weights_b2, float* output); +#ifdef USE_BLAS +template void ApplySEUnit(const size_t batch_size, const size_t channels, + const size_t se_fc_outputs, const float* input, + const float* residual, const float* weights_w1, + const float* weights_b1, + const float* weights_w2, + const float* weights_b2, float* output); +#endif } // namespace lczero diff --git a/src/neural/blas/se_unit.h b/src/neural/blas/se_unit.h index 3881f99611..e5f1fb88ee 100644 --- a/src/neural/blas/se_unit.h +++ b/src/neural/blas/se_unit.h @@ -22,6 +22,7 @@ namespace lczero { +template void ApplySEUnit(const size_t batch_size, const size_t channels, const size_t se_fc_outputs, const float* input, const float* residual, const float* weights_w1, diff --git a/src/neural/blas/winograd_convolution3.cc b/src/neural/blas/winograd_convolution3.cc index 72128a32f4..cd77eb1a21 100644 --- a/src/neural/blas/winograd_convolution3.cc +++ b/src/neural/blas/winograd_convolution3.cc @@ -29,39 +29,39 @@ #include "winograd_transform_ispc.h" #endif -#ifdef USE_EIGEN #include -#endif namespace lczero { -#ifdef USE_EIGEN template using EigenMatrixMap = Eigen::Map>; template using ConstEigenMatrixMap = Eigen::Map>; -#endif -WinogradConvolution3::WinogradConvolution3(const size_t max_batch_size, - const size_t max_input_layers, - const size_t max_output_layers) +template +WinogradConvolution3::WinogradConvolution3( + const size_t max_batch_size, const size_t max_input_layers, + const size_t max_output_layers) : V_(max_batch_size * kWinogradTile * max_input_layers * kTiles), M_(max_batch_size * kWinogradTile * max_output_layers * kTiles) {} -void WinogradConvolution3::Forward(const size_t batch_size, - const size_t input_channels, - const size_t output_channels, - const float* input, const float* weights, - float* output) { +template +void WinogradConvolution3::Forward(const size_t batch_size, + const size_t input_channels, + const size_t output_channels, + const float* input, + const float* weights, + float* output) { TransformIn(batch_size, input, input_channels); Sgemm(batch_size, weights, input_channels, output_channels); TransformOut(batch_size, output, output_channels); } -void WinogradConvolution3::TransformIn(const size_t batch_size, - const float* input, - const size_t channels) { +template +void WinogradConvolution3::TransformIn(const size_t batch_size, + const float* input, + const size_t channels) { #ifndef USE_ISPC static const size_t kCacheSize = 128; @@ -160,9 +160,12 @@ void WinogradConvolution3::TransformIn(const size_t batch_size, #endif // USE_ISPC } -void WinogradConvolution3::Sgemm(const size_t batch_size, const float* weights, - const size_t input_channels, - const size_t output_channels) { +#ifdef USE_BLAS +template <> +void WinogradConvolution3::Sgemm(const size_t batch_size, + const float* weights, + const size_t input_channels, + const size_t output_channels) { #ifdef USE_MKL /* @@ -218,7 +221,6 @@ void WinogradConvolution3::Sgemm(const size_t batch_size, const float* weights, auto offset_v = b * batch_size * input_channels * kTiles; auto offset_m = b * batch_size * output_channels * kTiles; -#ifndef USE_EIGEN cblas_sgemm(CblasColMajor, // Row major format CblasNoTrans, // A no trans CblasNoTrans, // B no trans @@ -232,21 +234,33 @@ void WinogradConvolution3::Sgemm(const size_t batch_size, const float* weights, (int)input_channels, 0.0f, // ldV &M_[offset_m], // M (int)output_channels); // ldM -#else + } +#endif +} +#endif + +template <> +void WinogradConvolution3::Sgemm(const size_t batch_size, + const float* weights, + const size_t input_channels, + const size_t output_channels) { + for (size_t b = 0; b < kWinogradTile; b++) { + auto offset_u = b * output_channels * input_channels; + auto offset_v = b * batch_size * input_channels * kTiles; + auto offset_m = b * batch_size * output_channels * kTiles; auto C_mat = EigenMatrixMap(&M_[offset_m], output_channels, batch_size * kTiles); C_mat.noalias() = ConstEigenMatrixMap( &weights[offset_u], output_channels, input_channels) * ConstEigenMatrixMap(&V_[offset_v], input_channels, batch_size * kTiles); -#endif } - -#endif } -void WinogradConvolution3::TransformOut(const size_t batch_size, float* output, - const size_t channels) { +template +void WinogradConvolution3::TransformOut(const size_t batch_size, + float* output, + const size_t channels) { #ifndef USE_ISPC float m[kWinogradTile]; @@ -311,4 +325,8 @@ void WinogradConvolution3::TransformOut(const size_t batch_size, float* output, #endif // USE_ISPC } +template class WinogradConvolution3; +#ifdef USE_BLAS +template class WinogradConvolution3; +#endif } // namespace lczero diff --git a/src/neural/blas/winograd_convolution3.h b/src/neural/blas/winograd_convolution3.h index e86bcddd98..91c1b95ae8 100644 --- a/src/neural/blas/winograd_convolution3.h +++ b/src/neural/blas/winograd_convolution3.h @@ -34,6 +34,7 @@ namespace lczero { // https://ai.intel.com/winograd-2/ // Convolution 3x3 using the Winograd algorithm +template class WinogradConvolution3 { public: // The instance will allocate memory resources for the diff --git a/src/neural/network_check.cc b/src/neural/network_check.cc index fa77a0706a..975569023b 100644 --- a/src/neural/network_check.cc +++ b/src/neural/network_check.cc @@ -250,7 +250,7 @@ class CheckNetwork : public Network { OptionsDict& backend1_dict = dict1; OptionsDict dict2; - std::string backendName2 = "blas"; + std::string backendName2 = "eigen"; OptionsDict& backend2_dict = dict2; const std::string mode = options.GetOrDefault("mode", "check"); From 0cd52f721ac01e6a7dee9a3486aabe03f7d7acd1 Mon Sep 17 00:00:00 2001 From: cn4750 Date: Tue, 7 Apr 2020 08:01:15 -0400 Subject: [PATCH 086/151] Make benchmark more like Stockfish bench (#1069) * Make benchmark more like Stockfish bench Add some of the SF bench positions, iterate over them while collecting nodes and times, report time and node count summary at the end. * Fix whitespace Spaces instead of tabs. * Properly handle nodes per second reporting Handle division by zero and proper rounding. * Fix C4596 error Fix illegal qualified name in member declaration error by removing redundant Benchmark::. * Add support for old bench of only starting position Use the --starting-position flag to change the position set to only be the starting position so that scripts that use benchmark can run as they always have. * Update the check scripts to use only the starting position in bench Bench only the starting position to save time. * Fix assignment Allow assignment for starting position. * Fix variable names Fix the variables names to conform to the contributor style. * Remove positions which inflate nps Remove TB endgame positions which inflate nps greatly. * Update the dx check script to use only the starting position in bench Bench only the starting position to save time. * Change how positions are chosen Allow for choosing more than just the starting position. Allow choice of the first number of positions in the list of positions to allow for faster benching. * Update the check scripts to use the new benchmark option Use --num-positions=1 instead of --starting-position. * Update our PGO AppVeyor build to use the new benchmark For now only bench the starting position. * Re-add support for benchmarking a FEN string If provided a FEN string, only test that one position irrespective of --num-positions which is forced to one. * Revert "Update our PGO AppVeyor build to use the new benchmark" This reverts commit dd48294deef7e5e06a7683fb60258c6484aa9fea. We no longer do PGO builds and no longer do benching here. * Update our PGO AppVeyor build to use the new benchmark For now only bench the starting position. * Adjust header ordering to conform to style Conform to the style of other code. --- scripts/appveyor_win_build.cmd | 2 +- scripts/check_dx.bat | 2 +- scripts/check_opencl.bat | 2 +- src/benchmark/benchmark.cc | 95 ++++++++++++++++++++++------------ src/benchmark/benchmark.h | 40 ++++++++++++++ 5 files changed, 106 insertions(+), 35 deletions(-) mode change 100755 => 100644 scripts/check_opencl.bat diff --git a/scripts/appveyor_win_build.cmd b/scripts/appveyor_win_build.cmd index 746f8e097a..aa75c0c762 100644 --- a/scripts/appveyor_win_build.cmd +++ b/scripts/appveyor_win_build.cmd @@ -10,7 +10,7 @@ IF %PGO%==true ( IF %OPENCL%==true copy C:\cache\opencl-nug.0.777.77\build\native\bin\OpenCL.dll IF %CUDA%==true copy "%CUDA_PATH%"\bin\*.dll IF %CUDA%==true copy %PKG_FOLDER%\cuda\bin\cudnn64_7.dll - lc0 benchmark --weights=c:\cache\591226.pb.gz --backend=random --movetime=10000 + lc0 benchmark --num-positions=1 --weights=c:\cache\591226.pb.gz --backend=random --movetime=10000 ) cd .. IF %PGO%==true msbuild "C:\projects\lc0\build\lc0.sln" /m /p:WholeProgramOptimization=PGOptimize /p:DebugInformationFormat=ProgramDatabase /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" diff --git a/scripts/check_dx.bat b/scripts/check_dx.bat index dc173f3c0c..18d1c79562 100644 --- a/scripts/check_dx.bat +++ b/scripts/check_dx.bat @@ -1,5 +1,5 @@ @ECHO OFF ECHO Sanity checking the dx12 driver. -lc0 benchmark --backend=check --backend-opts=mode=check,freq=1.0,atol=5e-1,dx12 %* +lc0 benchmark --num-positions=1 --backend=check --backend-opts=mode=check,freq=1.0,atol=5e-1,dx12 %* PAUSE diff --git a/scripts/check_opencl.bat b/scripts/check_opencl.bat old mode 100755 new mode 100644 index ed89a60433..ded12defa7 --- a/scripts/check_opencl.bat +++ b/scripts/check_opencl.bat @@ -1,5 +1,5 @@ @ECHO OFF ECHO Sanity checking the opencl driver. -lc0 benchmark --backend=check --backend-opts=mode=check,freq=1.0,opencl %* +lc0 benchmark --num-positions=1 --backend=check --backend-opts=mode=check,freq=1.0,opencl %* PAUSE diff --git a/src/benchmark/benchmark.cc b/src/benchmark/benchmark.cc index eadeab3d0a..131b923b3f 100644 --- a/src/benchmark/benchmark.cc +++ b/src/benchmark/benchmark.cc @@ -27,6 +27,8 @@ #include "benchmark/benchmark.h" +#include + #include "mcts/search.h" #include "mcts/stoppers/factory.h" #include "mcts/stoppers/stoppers.h" @@ -40,7 +42,9 @@ const OptionId kThreadsOptionId{"threads", "Threads", const OptionId kNodesId{"nodes", "", "Number of nodes to run as a benchmark."}; const OptionId kMovetimeId{"movetime", "", "Benchmark time allocation, in milliseconds."}; -const OptionId kFenId{"fen", "", "Benchmark initial position FEN."}; +const OptionId kFenId{"fen", "", "Benchmark position FEN."}; +const OptionId kNumPositionsId{"num-positions", "", + "The number of benchmark positions to test."}; } // namespace void Benchmark::Run() { @@ -52,7 +56,8 @@ void Benchmark::Run() { options.Add(kNodesId, -1, 999999999) = -1; options.Add(kMovetimeId, -1, 999999999) = 10000; - options.Add(kFenId) = ChessBoard::kStartposFen; + options.Add(kFenId) = ""; + options.Add(kNumPositionsId, 1, 34) = 34; if (!options.ProcessAllFlags()) return; @@ -61,41 +66,67 @@ void Benchmark::Run() { auto network = NetworkFactory::LoadNetwork(option_dict); - NodeTree tree; - tree.ResetToPosition(option_dict.Get(kFenId), {}); - - NNCache cache; - cache.SetCapacity(option_dict.Get(kNNCacheSizeId)); - - int visits = option_dict.Get(kNodesId); + const int visits = option_dict.Get(kNodesId); const int movetime = option_dict.Get(kMovetimeId); + const std::string fen = option_dict.Get(kFenId); + int num_positions = option_dict.Get(kNumPositionsId); + + std::vector times; + std::vector playouts; + std::uint64_t cnt = 1; - auto stopper = std::make_unique(); - if (movetime > -1) { - stopper->AddStopper(std::make_unique(movetime)); + if (fen.length() > 0) { + positions = {fen}; + num_positions = 1; } - if (visits > -1) { - stopper->AddStopper(std::make_unique(visits)); + std::vector testing_positions( + positions.cbegin(), positions.cbegin() + num_positions); + + for (std::string position : testing_positions) { + std::cout << "\nPosition: " << cnt++ << "/" << testing_positions.size() + << " " << position << std::endl; + + auto stopper = std::make_unique(); + if (movetime > -1) { + stopper->AddStopper(std::make_unique(movetime)); + } + if (visits > -1) { + stopper->AddStopper(std::make_unique(visits)); + } + + NNCache cache; + cache.SetCapacity(option_dict.Get(kNNCacheSizeId)); + + NodeTree tree; + tree.ResetToPosition(position, {}); + + const auto start = std::chrono::steady_clock::now(); + auto search = std::make_unique( + tree, network.get(), + std::make_unique( + std::bind(&Benchmark::OnBestMove, this, std::placeholders::_1), + std::bind(&Benchmark::OnInfo, this, std::placeholders::_1)), + MoveList(), start, std::move(stopper), false, option_dict, &cache, + nullptr); + search->StartThreads(option_dict.Get(kThreadsOptionId)); + search->Wait(); + const auto end = std::chrono::steady_clock::now(); + + const auto time = + std::chrono::duration_cast(end - start); + times.push_back(time.count()); + playouts.push_back(search->GetTotalPlayouts()); } - const auto start = std::chrono::steady_clock::now(); - auto search = std::make_unique( - tree, network.get(), - std::make_unique( - std::bind(&Benchmark::OnBestMove, this, std::placeholders::_1), - std::bind(&Benchmark::OnInfo, this, std::placeholders::_1)), - MoveList(), start, std::move(stopper), false, option_dict, &cache, - nullptr); - - search->StartThreads(option_dict.Get(kThreadsOptionId)); - - search->Wait(); - - const auto end = std::chrono::steady_clock::now(); - std::chrono::duration time = end - start; - std::cout << "Benchmark final time " << time.count() << "s calculating " - << search->GetTotalPlayouts() / time.count() - << " nodes per second." << std::endl; + const auto total_playouts = + std::accumulate(playouts.begin(), playouts.end(), 0); + const auto total_time = std::accumulate(times.begin(), times.end(), 0); + std::cout << "\n===========================" + << "\nTotal time (ms) : " << total_time + << "\nNodes searched : " << total_playouts + << "\nNodes/second : " + << std::lround(1000.0 * total_playouts / (total_time + 1)) + << std::endl; } catch (Exception& ex) { std::cerr << ex.what() << std::endl; } diff --git a/src/benchmark/benchmark.h b/src/benchmark/benchmark.h index 025d1847ab..a081a76883 100644 --- a/src/benchmark/benchmark.h +++ b/src/benchmark/benchmark.h @@ -38,6 +38,46 @@ class Benchmark{ public: Benchmark() = default; + // Same positions as Stockfish uses. + std::vector positions = { + "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1", + "r3k2r/p1ppqpb1/bn2pnp1/3PN3/1p2P3/2N2Q1p/PPPBBPPP/R3K2R w KQkq - 0 10", + "8/2p5/3p4/KP5r/1R3p1k/8/4P1P1/8 w - - 0 11", + "4rrk1/pp1n3p/3q2pQ/2p1pb2/2PP4/2P3N1/P2B2PP/4RRK1 b - - 7 19", + "rq3rk1/ppp2ppp/1bnpb3/3N2B1/3NP3/7P/PPPQ1PP1/2KR3R w - - 7 14 moves " + "d4e6", + "r1bq1r1k/1pp1n1pp/1p1p4/4p2Q/4Pp2/1BNP4/PPP2PPP/3R1RK1 w - - 2 14 moves " + "g2g4", + "r3r1k1/2p2ppp/p1p1bn2/8/1q2P3/2NPQN2/PPP3PP/R4RK1 b - - 2 15", + "r1bbk1nr/pp3p1p/2n5/1N4p1/2Np1B2/8/PPP2PPP/2KR1B1R w kq - 0 13", + "r1bq1rk1/ppp1nppp/4n3/3p3Q/3P4/1BP1B3/PP1N2PP/R4RK1 w - - 1 16", + "4r1k1/r1q2ppp/ppp2n2/4P3/5Rb1/1N1BQ3/PPP3PP/R5K1 w - - 1 17", + "2rqkb1r/ppp2p2/2npb1p1/1N1Nn2p/2P1PP2/8/PP2B1PP/R1BQK2R b KQ - 0 11", + "r1bq1r1k/b1p1npp1/p2p3p/1p6/3PP3/1B2NN2/PP3PPP/R2Q1RK1 w - - 1 16", + "3r1rk1/p5pp/bpp1pp2/8/q1PP1P2/b3P3/P2NQRPP/1R2B1K1 b - - 6 22", + "r1q2rk1/2p1bppp/2Pp4/p6b/Q1PNp3/4B3/PP1R1PPP/2K4R w - - 2 18", + "4k2r/1pb2ppp/1p2p3/1R1p4/3P4/2r1PN2/P4PPP/1R4K1 b - - 3 22", + "3q2k1/pb3p1p/4pbp1/2r5/PpN2N2/1P2P2P/5PP1/Q2R2K1 b - - 4 26", + "6k1/6p1/6Pp/ppp5/3pn2P/1P3K2/1PP2P2/3N4 b - - 0 1", + "3b4/5kp1/1p1p1p1p/pP1PpP1P/P1P1P3/3KN3/8/8 w - - 0 1", + "2K5/p7/7P/5pR1/8/5k2/r7/8 w - - 0 1 moves g5g6 f3e3 g6g5 e3f3", + "8/6pk/1p6/8/PP3p1p/5P2/4KP1q/3Q4 w - - 0 1", + "7k/3p2pp/4q3/8/4Q3/5Kp1/P6b/8 w - - 0 1", + "8/2p5/8/2kPKp1p/2p4P/2P5/3P4/8 w - - 0 1", + "8/1p3pp1/7p/5P1P/2k3P1/8/2K2P2/8 w - - 0 1", + "8/pp2r1k1/2p1p3/3pP2p/1P1P1P1P/P5KR/8/8 w - - 0 1", + "8/3p4/p1bk3p/Pp6/1Kp1PpPp/2P2P1P/2P5/5B2 b - - 0 1", + "5k2/7R/4P2p/5K2/p1r2P1p/8/8/8 b - - 0 1", + "6k1/6p1/P6p/r1N5/5p2/7P/1b3PP1/4R1K1 w - - 0 1", + "1r3k2/4q3/2Pp3b/3Bp3/2Q2p2/1p1P2P1/1P2KP2/3N4 w - - 0 1", + "6k1/4pp1p/3p2p1/P1pPb3/R7/1r2P1PP/3B1P2/6K1 w - - 0 1", + "8/3p3B/5p2/5P2/p7/PP5b/k7/6K1 w - - 0 1", + "5rk1/q6p/2p3bR/1pPp1rP1/1P1Pp3/P3B1Q1/1K3P2/R7 w - - 93 90", + "4rrk1/1p1nq3/p7/2p1P1pp/3P2bp/3Q1Bn1/PPPB4/1K2R1NR w - - 40 21", + "r3k2r/3nnpbp/q2pp1p1/p7/Pp1PPPP1/4BNN1/1P5P/R2Q1RK1 w kq - 0 16", + "3Qb1k1/1r2ppb1/pN1n2q1/Pp1Pp1Pr/4P2p/4BP2/4B1R1/1R5K b - - 11 40" + }; + void Run(); void OnBestMove(const BestMoveInfo& move); void OnInfo(const std::vector& infos); From 69c12a148a26651888128128e8635b8579001734 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Leandro=20=C3=81lvarez=20Gonz=C3=A1lez?= Date: Tue, 7 Apr 2020 10:56:04 -0400 Subject: [PATCH 087/151] Newer Android NDK for better C++17 support (#1166) --- appveyor.yml | 8 ++++---- cross-files/aarch64-linux-android | 12 ++++-------- cross-files/armv7a-linux-android | 17 +++++++---------- 3 files changed, 15 insertions(+), 22 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index 4412cb7058..31867a70fc 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -44,13 +44,12 @@ install: - cmd: set PATH=C:\Python36;C:\Python36\scripts;%PATH% - cmd: pip3 install --upgrade meson - cmd: call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat" amd64 -- cmd: IF %NAME%==android C:\ProgramData\Microsoft\AndroidNDK64\android-ndk-r17\build\tools\make_standalone_toolchain.py --arch arm64 --api 21 --stl libc++ --install-dir \android-standalone-64 -- cmd: IF %NAME%==android set PATH=C:\android-standalone-64\bin;%PATH% +- cmd: IF %NAME%==android IF NOT EXIST C:\ndk\android-ndk-r19c\toolchains\llvm\prebuilt\windows-x86_64 appveyor DownloadFile https://dl.google.com/android/repository/android-ndk-r19c-windows-x86_64.zip +- cmd: IF %NAME%==android IF NOT EXIST C:\ndk\android-ndk-r19c\toolchains\llvm\prebuilt\windows-x86_64 7z x android-ndk-r19c-windows-x86_64.zip -oC:\ndk +- cmd: IF %NAME%==android set PATH=C:\ndk\android-ndk-r19c\toolchains\llvm\prebuilt\windows-x86_64\bin;%PATH% - cmd: IF %NAME%==android sed "s/clang+*/&.cmd/" cross-files/aarch64-linux-android >crossfile-aarch64 - cmd: IF %NAME%==android IF NOT EXIST C:\cache\OpenBLAS\android-aarch64 appveyor DownloadFile https://github.com/borg323/OpenBLAS/releases/download/android-0.3.8-2/openblas-android-aarch64.zip - cmd: IF %NAME%==android IF NOT EXIST C:\cache\OpenBLAS\android-aarch64 7z x openblas-android-aarch64.zip -oC:\cache\OpenBLAS -- cmd: IF %NAME%==android C:\ProgramData\Microsoft\AndroidNDK64\android-ndk-r17\build\tools\make_standalone_toolchain.py --arch arm --api 21 --stl libc++ --install-dir \android-standalone-32 -- cmd: IF %NAME%==android set PATH=C:\android-standalone-32\bin;%PATH% - cmd: IF %NAME%==android sed "s/clang+*/&.cmd/" cross-files/armv7a-linux-android >crossfile-armv7a - cmd: IF %NAME%==android IF NOT EXIST C:\cache\OpenBLAS\android-armv7a appveyor DownloadFile https://github.com/borg323/OpenBLAS/releases/download/android-0.3.8-2/openblas-android-armv7a.zip - cmd: IF %NAME%==android IF NOT EXIST C:\cache\OpenBLAS\android-armv7a 7z x openblas-android-armv7a.zip -oC:\cache\OpenBLAS @@ -68,6 +67,7 @@ cache: - C:\cache -> appveyor.yml - 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.0' - C:\projects\lc0\subprojects\packagecache + - C:\ndk\android-ndk-r19c\toolchains\llvm\prebuilt\windows-x86_64 before_build: - cmd: git submodule update --init --recursive - cmd: SET BUILD_BLAS=%BLAS% diff --git a/cross-files/aarch64-linux-android b/cross-files/aarch64-linux-android index afb3596b7d..40e1faca3a 100644 --- a/cross-files/aarch64-linux-android +++ b/cross-files/aarch64-linux-android @@ -1,13 +1,9 @@ -# Tested with Android NDK r18, default toolchain +# Tested with Android NDK r19c, default toolchain # Targeting API level 21 -# Make the standalone toolchain -# cd android-ndk-r18b/build/tools/ -# ./make_standalone_toolchain.py --arch arm64 --api 21 --stl libc++ --install-dir android-standalone-64 - # Set the toolchain path on your environment -# export PATH="$HOME/.local/share/android-sdk/android-toolchains/android-standalone-64/bin:$PATH" +# export PATH="$HOME/.local/share/android-sdk/ndk-bundle/toolchains/llvm/prebuilt/linux-x86_64/bin:$PATH" [host_machine] system = 'android' @@ -19,8 +15,8 @@ endian = 'little' cpp_link_args = ['-llog', '-static-libstdc++'] [binaries] -c = 'aarch64-linux-android-clang' -cpp = 'aarch64-linux-android-clang++' +c = 'aarch64-linux-android21-clang' +cpp = 'aarch64-linux-android21-clang++' ar = 'aarch64-linux-android-ar' strip = 'aarch64-linux-android-strip' ld = 'aarch64-linux-android-ld' diff --git a/cross-files/armv7a-linux-android b/cross-files/armv7a-linux-android index 131c840036..16b3e93f90 100644 --- a/cross-files/armv7a-linux-android +++ b/cross-files/armv7a-linux-android @@ -1,18 +1,15 @@ -# Tested with Android NDK r18, standalone toolchain +# Tested with Android NDK r19c, default toolchain # Targeting API level 21 -# + # When targeting API levels < 24 the build fails unless _FILE_OFFSET_BITS is unset. # Meson passes _FILE_OFFSET_BITS=64 but recent NDK toolchains have issues building # for 32-bit ABIs when such macro it set. Relevant links: # https://android.googlesource.com/platform/bionic/+/master/docs/32-bit-abi.md # https://github.com/mesonbuild/meson/pull/2996#issuecomment-384045808 -# -# First create the standalone toolchain: -# ./make_standalone_toolchain.py --arch arm --api 21 --stl libc++ --install-dir android-standalone-32 -# -# Then set the toolchain path on your environment: -# export PATH="$HOME/.local/share/android-sdk/android-toolchains/android-standalone-32/bin:$PATH" + +# Set the toolchain path on your environment +# export PATH="$HOME/.local/share/android-sdk/ndk-bundle/toolchains/llvm/prebuilt/linux-x86_64/bin:$PATH" [host_machine] system = 'android' @@ -25,8 +22,8 @@ cpp_args = ['-U_FILE_OFFSET_BITS'] cpp_link_args = ['-llog', '-static-libstdc++'] [binaries] -c = 'arm-linux-androideabi-clang' -cpp = 'arm-linux-androideabi-clang++' +c = 'armv7a-linux-androideabi21-clang' +cpp = 'armv7a-linux-androideabi21-clang++' ar = 'arm-linux-androideabi-ar' strip = 'arm-linux-androideabi-strip' ld = 'arm-linux-androideabi-ld' From b73176d441a61532c1901d9c3b810ee4404be899 Mon Sep 17 00:00:00 2001 From: borg323 <39573933+borg323@users.noreply.github.com> Date: Wed, 8 Apr 2020 00:49:45 +0300 Subject: [PATCH 088/151] New windows build script and instructions update (#1153) * New windows build script and instructions update * opencl dependency gets bad path on windows - make it macos only * udpade instructions Co-authored-by: borg323 --- README.md | 19 +++++++------- build-cl.cmd | 31 ---------------------- build-cuda-ninja.cmd | 14 ---------- build-cuda.cmd | 22 ---------------- build.cmd | 62 ++++++++++++++++++++++++++++++++++++++++++++ meson.build | 2 +- windows_build.md | 57 ++++++++++++++++++++++------------------ 7 files changed, 104 insertions(+), 103 deletions(-) delete mode 100644 build-cl.cmd delete mode 100644 build-cuda-ninja.cmd delete mode 100644 build-cuda.cmd create mode 100644 build.cmd diff --git a/README.md b/README.md index 654d883988..d1e9a5c7c1 100644 --- a/README.md +++ b/README.md @@ -113,24 +113,23 @@ to run latest releases of lc0 and the client inside a Docker container. ### Windows -0. Install Microsoft Visual Studio -1. Install [CUDA](https://developer.nvidia.com/cuda-zone) (v9.2 is fine) +Here are the brief instructions for CUDA/CuDNN, for details and other options see `windows-build.md`. + +0. Install Microsoft Visual Studio (2017 or later) +1. Install [CUDA](https://developer.nvidia.com/cuda-zone) 2. Install [cuDNN](https://developer.nvidia.com/cudnn). 3. Install Python3 4. Install Meson: `pip3 install --upgrade meson` -5. Edit `build-cuda.cmd`: +5. Edit `build.cmd`: -* If you use MSVS other than 2015 (or if it's installed into non-standard location): - * `C:\Program Files (x86)\Microsoft Visual Studio 14.0\` replace 14.0 with your version - * `--backend 2015` replace 2015 with your version -* `C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.2\lib\x64` replace with your CUDA path -* `C:\dev\cuDNN\` replace with your cuDNN directory +* Set `CUDA_PATH` with your CUDA directory +* Set `CUDNN_PATH` with your cuDNN directory (may be the same with CUDA_PATH) -6. Run `build-cuda.cmd`. It will generate MSVS project and pause. +6. Run `build.cmd`. It will ask permission to delete the build directory, then generate MSVS project and pause. Then either: -7. Hit to build it. +7. Hit `Enter` to build it. 8. Resulting binary will be `build/lc0.exe` Or. diff --git a/build-cl.cmd b/build-cl.cmd deleted file mode 100644 index 785a439629..0000000000 --- a/build-cl.cmd +++ /dev/null @@ -1,31 +0,0 @@ -rd /s build - -rem set MSBuild="C:\Program Files (x86)\MSBuild\14.0\Bin\MSBuild.exe" -set MSBuild="C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\MSBuild\15.0\Bin\MSBuild.exe" - -rem call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64 -call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat" amd64 - -rem change to '-Dblas=true' to also build the blas backend with mkl -meson build --backend vs2017 --buildtype release -Dblas=false ^ --Dmkl_include="C:\Program Files (x86)\IntelSWTools\compilers_and_libraries\windows\mkl\include" ^ --Dmkl_libdirs="C:\Program Files (x86)\IntelSWTools\compilers_and_libraries\windows\mkl\lib\intel64" ^ --Dopencl_libdirs="C:\Program Files (x86)\AMD APP SDK\3.0\lib\x86_64" ^ --Dopencl_include="C:\Program Files (x86)\AMD APP SDK\3.0\include" ^ --Ddefault_library=static - -pause - -cd build - -%MSBuild% /p:Configuration=Release /p:Platform=x64 ^ -/p:PreferredToolArchitecture=x64 "subprojects\zlib-1.2.11\Windows resource for file 'win32_zlib1.rc'@cus.vcxproj" ^ -/filelogger - -%MSBuild% /p:Configuration=Release /p:Platform=x64 ^ -/p:PreferredToolArchitecture=x64 subprojects\zlib-1.2.11\subprojects@zlib-1.2.11@@z@sta.vcxproj ^ -/filelogger - -%MSBuild% /p:Configuration=Release /p:Platform=x64 ^ -/p:PreferredToolArchitecture=x64 lc0@exe.vcxproj ^ -/filelogger diff --git a/build-cuda-ninja.cmd b/build-cuda-ninja.cmd deleted file mode 100644 index e326c9b3be..0000000000 --- a/build-cuda-ninja.cmd +++ /dev/null @@ -1,14 +0,0 @@ -rd /s build - -call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat" amd64 -meson.py build --buildtype release ^ --Dcudnn_libdirs="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.0\lib\x64","C:\dev\cuDNN\cuda\lib\x64" ^ --Dcudnn_include="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.0\include","C:\dev\cuDNN\cuda\include" ^ --Ddefault_library=static - -pause - - -cd build - -ninja \ No newline at end of file diff --git a/build-cuda.cmd b/build-cuda.cmd deleted file mode 100644 index d46ec9e141..0000000000 --- a/build-cuda.cmd +++ /dev/null @@ -1,22 +0,0 @@ -rd /s build - -rem set MSBuild="C:\Program Files (x86)\MSBuild\14.0\Bin\MSBuild.exe" -set MSBuild="C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\MSBuild\15.0\Bin\MSBuild.exe" -rem call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64 -call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat" amd64 -meson.py build --backend vs2017 --buildtype release ^ --Dcudnn_libdirs="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.0\lib\x64","C:\dev\cuDNN\cuda\lib\x64" ^ --Dcudnn_include="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.0\include","C:\dev\cuDNN\cuda\include" ^ --Ddefault_library=static - -pause - - -cd build - -%MSBuild% ^ -/p:Configuration=Release ^ -/p:Platform=x64 ^ -/p:PreferredToolArchitecture=x64 lc0.sln ^ -/filelogger - diff --git a/build.cmd b/build.cmd new file mode 100644 index 0000000000..50d4f308d0 --- /dev/null +++ b/build.cmd @@ -0,0 +1,62 @@ +@echo off +setlocal + +rem 1. Set the following for the options you want to build. +set CUDNN=true +set DX12=false +set OPENCL=false +set MKL=false +set DNNL=false +set OPENBLAS=false +set EIGEN=false +set TEST=false + +rem 2. Edit the paths for the build dependencies. +set CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.0 +set CUDNN_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.0 +set OPENBLAS_PATH=C:\OpenBLAS +set MKL_PATH=C:\Program Files (x86)\IntelSWTools\compilers_and_libraries\windows\mkl +set DNNL_PATH=C:\dnnl_win_1.1.1_cpu_vcomp +set OPENCL_LIB_PATH=%CUDA_PATH%\lib\x64 +set OPENCL_INCLUDE_PATH=%CUDA_PATH%\include + +rem 3. In most cases you won't need to change anything further down. +echo Deleting build directory: +rd /s build + +if exist "C:\Program Files (x86)\Microsoft Visual Studio\2019" ( + call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvarsall.bat" amd64 + set MSBuild=msbuild + set backend=vs2019 +) else ( + call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat" amd64 + set MSBuild="C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\MSBuild\15.0\Bin\MSBuild.exe" + set backend=vs2017 +) + +if "%CUDA_PATH%"=="%CUDNN_PATH%" ( + set CUDNN_LIB_PATH=%CUDNN_PATH%\lib\x64 + set CUDNN_INCLUDE_PATH=%CUDNN_PATH%\include +) else ( + set CUDNN_LIB_PATH=%CUDA_PATH%\lib\x64,%CUDNN_PATH%\lib\x64 + set CUDNN_INCLUDE_PATH=%CUDA_PATH%\include,%CUDNN_PATH%\include +) + +if %CUDNN%==true set PATH=%CUDA_PATH%\bin;%PATH% + +meson build --backend %backend% --buildtype release -Ddx=%DX12% -Dcudnn=%CUDNN% -Dopencl=%OPENCL% ^ +-Dblas=true -Dmkl=%MKL% -Dopenblas=%OPENBLAS% -Deigen=%EIGEN% -Ddnnl=%DNNL% -Dgtest=%TEST% ^ +-Dcudnn_include="%CUDNN_INCLUDE_PATH%" -Dcudnn_libdirs="%CUDNN_LIB_PATH%" ^ +-Dmkl_include="%MKL_PATH%\include" -Dmkl_libdirs="%MKL_PATH%\lib\intel64" -Ddnnl_dir="%DNNL_PATH%" ^ +-Dopencl_libdirs="%OPENCL_LIB_PATH%" -Dopencl_include="%OPENCL_INCLUDE_PATH%" ^ +-Dopenblas_include="%OPENBLAS_PATH%\include" -Dopenblas_libdirs="%OPENBLAS_PATH%\lib" ^ +-Ddefault_library=static + +if errorlevel 1 exit /b + +pause + +cd build + +%MSBuild% /m /p:Configuration=Release /p:Platform=x64 /p:WholeProgramOptimization=true ^ +/p:PreferredToolArchitecture=x64 lc0.sln /filelogger \ No newline at end of file diff --git a/meson.build b/meson.build index 8a77f7ce1a..92068149a9 100644 --- a/meson.build +++ b/meson.build @@ -312,7 +312,7 @@ if get_option('build_backends') opencl_libdirs = get_option('opencl_libdirs') opencl_lib=cc.find_library('OpenCL', dirs: opencl_libdirs, required: false) - opencl_framework=dependency('OpenCL', required: false) + opencl_framework=dependency('OpenCL', method: 'extraframework', required: false) if opencl_framework.found() opencl_dep = [ opencl_framework ] has_opencl = true diff --git a/windows_build.md b/windows_build.md index a74b6dc159..3b0efea31a 100644 --- a/windows_build.md +++ b/windows_build.md @@ -1,49 +1,56 @@ -## Windows BLAS/OpenCL +## Windows build -0. [Install Microsoft Visual Studio](https://visualstudio.microsoft.com/). For VS2017 make sure the - option "Desktop development with C++" is installed (you can add it later if not). +0. [Install Microsoft Visual Studio](https://visualstudio.microsoft.com/) (2017 or later). Make sure + the option "Desktop development with C++" is selected (you can add it later if not). 1. [Install git for windows](https://git-scm.com/download/win) - this can be used to get lc0 but is also - needed for meson. + needed for meson. If you haven't downloaded lc0, you can do it now following the instructions in + the `README`(https://github.com/LeelaChessZero/lc0/blob/master/README.md). -2. Install a BLAS library. This can be either OpenBLAS or Intel MKL. +2. GPU users with nVIDIA cards (and "compute capability" 3.0 or higher) can build with CUDA/CuDNN. +* Install [CUDA](https://developer.nvidia.com/cuda-zone) (v10.0 is fine for Visual Studio 2017, newer is + needed for Visual Studio 2019) and then +* install the appropriate [cuDNN](https://developer.nvidia.com/cudnn). + +3. GPU users with recent Windows 10 installations can build with DirectX 12, this only requires updated + SDK headers (that may already be available in Visual Studio). + +4. CPU users may want to install a BLAS library. This can be either OpenBLAS, Intel MKL or Intel DNNL. + This is optional since the Eigen library can be used without installing anything, but probably with + worse performance. * For [OpenBLAS go here](http://www.openblas.net/), you need a binary package with a filename of the form `OpenBLAS-version-Win64-int32.zip`, they are not available for all versions, which you just unpack at a location of your choise (but not inside the lc0 directory). * For [Intel MKL go here](https://software.intel.com/en-us/mkl), where you need to register. After installation don't forget to run `mklvars.bat intel64` to set up the paths to the dlls. +* For [Intel DNNL go here](https://github.com/intel/mkl-dnn/releases). Note that not all releases have + binaries available, you want `dnnl_win_*_cpu_vcomp.zip`. -3. For OpenCL you also need to install OpenCL developer libraries. +5. For OpenCL you also need to install OpenCL developer libraries. * For AMD cards the AMD APP SDK 3.0 seems to be the appropriate one, to be installed after the card drivers. This is not currently available on the AMD website, but links to a signed installer are available in the [AMD community forum](https://community.amd.com/thread/222855). -* For nVIDIA cards you probably need the [CUDA toolkit](https://developer.nvidia.com/cuda-downloads). +* For nVIDIA cards it is included in the [CUDA toolkit](https://developer.nvidia.com/cuda-downloads). -4. [Install Python3](https://www.python.org/) - be sure to check the box to add python to the path. +6. [Install Python3](https://www.python.org/) - be sure to check the box to add python to the path. -5. Install Meson: `pip3 install --upgrade meson` +7. Install Meson: `pip3 install --upgrade meson` -6. Edit `build-cl.cmd`: -* If you use MSVS other than 2017 community edition (or if it's installed into non-standard location) - replace the path to vcvarsall.bat and MSBuild.exe. If you can't find vcvarsall.bat on VS2017, you - need to install option "Desktop development with C++". Some example paths are in comments. -* In `--backend 2017` replace 2017 with the correct MSVS version. -* Set the BLAS (and optionally OpenCL) library `include` and `lib` directories in the appropriate - variables. - - For OpenBLAS, they are `openblas_include` and `openblas_libdirs`. - - For Intel MKL, they are `mkl_include` and `mkl_libdirs`. The `lib` directory typically ends in - `\lib\intel64`. - - For OpenCL, they are `opencl_libdirs` and `opencl_include`. The include directory is the one with - the `CL` directory containing `opencl.h`, not directly the one containing `opencl.h`. +8. Edit `build.cmd`: +* At the top, set to `true` and `false` the variables for the backends you want to build. +* Then set the paths for the build dependencies. + - Note: for `OPENCL_INCLUDE_PATH` you don't want the directory containing `opencl.h`, but one level higher + (the one containing `CL`). -7. Run `build-cl.cmd`. It will generate MSVS project and pause. +9. Run `build.cmd`. It will ask permission to delete the build directory, then generate MSVS project and + pause. -8. Hit `Enter` to build it. +10. Hit `Enter` to build it. -9. Resulting binary will be `build/lc0.exe` +11. Resulting binary will be `build/lc0.exe` Alternatively you can -8. open generated solution `build/lc0.sln` in Visual Studio and build yourself. +10. open generated solution `build/lc0.sln` in Visual Studio and build yourself. From 4f03b42100dbfd3ed96c3e171395dd6a5ad8a1aa Mon Sep 17 00:00:00 2001 From: Tilps Date: Wed, 8 Apr 2020 07:50:16 +1000 Subject: [PATCH 089/151] Allow user override of input format to random backend. (#1177) * Allow user override of input format to random backend. * Review feedback. --- src/neural/network_random.cc | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/neural/network_random.cc b/src/neural/network_random.cc index d353f07df2..639e6bcfe1 100644 --- a/src/neural/network_random.cc +++ b/src/neural/network_random.cc @@ -78,9 +78,7 @@ class RandomNetworkComputation : public NetworkComputation { return d; } - float GetMVal(int /* sample */) const override { - return 0.0f; - } + float GetMVal(int /* sample */) const override { return 0.0f; } float GetPVal(int sample, int move_id) const override { if (uniform_mode_) return 1.0f; @@ -108,7 +106,13 @@ class RandomNetwork : public Network { RandomNetwork(const OptionsDict& options) : delay_ms_(options.GetOrDefault("delay", 0)), seed_(options.GetOrDefault("seed", 0)), - uniform_mode_(options.GetOrDefault("uniform", false)) {} + uniform_mode_(options.GetOrDefault("uniform", false)), + capabilities_{ + static_cast( + options.GetOrDefault( + "input_mode", + pblczero::NetworkFormat::INPUT_CLASSICAL_112_PLANE)), + pblczero::NetworkFormat::MOVES_LEFT_NONE} {} std::unique_ptr NewComputation() override { return std::make_unique(delay_ms_, seed_, uniform_mode_); @@ -123,8 +127,7 @@ class RandomNetwork : public Network { bool uniform_mode_ = false; NetworkCapabilities capabilities_{ pblczero::NetworkFormat::INPUT_CLASSICAL_112_PLANE, - pblczero::NetworkFormat::MOVES_LEFT_NONE - }; + pblczero::NetworkFormat::MOVES_LEFT_NONE}; }; } // namespace From 01c7761d61372ef0e8b432e625faf0a588fc635b Mon Sep 17 00:00:00 2001 From: Tilps Date: Wed, 8 Apr 2020 07:50:36 +1000 Subject: [PATCH 090/151] Support for selfplay from books with fen tag. (#1178) * Add support for pgn reading with fen tag. Selfplay games correspondingly can be played from non-default startpos and the gameready output will record non-standard fen. Also fixed a bug where it wasn't possible to have pgn with startpos as book entry. * Fix bug loading books with fen + moves. * Handle a buggy opening book 2moves_v1 doesn't have valid fens. * Fix bug with first game book missing start fen if book has no fen. * Add header to make circleci happy. * Fix for non-upper case FEN tags. * Formatting. * Add header trying to make build happy. --- src/chess/callbacks.h | 2 ++ src/chess/pgn.h | 48 ++++++++++++++++++++++++++++++++------ src/selfplay/game.cc | 11 +++++---- src/selfplay/game.h | 8 ++++--- src/selfplay/loop.cc | 4 ++++ src/selfplay/tournament.cc | 7 +++--- src/selfplay/tournament.h | 5 ++-- 7 files changed, 65 insertions(+), 20 deletions(-) diff --git a/src/chess/callbacks.h b/src/chess/callbacks.h index d7e62d1cd0..560e347552 100644 --- a/src/chess/callbacks.h +++ b/src/chess/callbacks.h @@ -102,6 +102,8 @@ struct GameInfo { GameResult game_result = GameResult::UNDECIDED; // Name of the file with training data. std::string training_filename; + // Initial fen of the game. + std::string initial_fen; // Game moves. std::vector moves; // Ply within moves that the game actually started. diff --git a/src/chess/pgn.h b/src/chess/pgn.h index 94bce12146..e1629d3083 100644 --- a/src/chess/pgn.h +++ b/src/chess/pgn.h @@ -27,26 +27,57 @@ #pragma once +#include +#include #include #include "chess/bitboard.h" #include "chess/board.h" +#include "utils/exception.h" #include "utils/logging.h" namespace lczero { +struct Opening { + std::string start_fen = ChessBoard::kStartposFen; + MoveList moves; +}; + class PgnReader { public: void AddPgnFile(const std::string& filepath) { std::ifstream file(filepath); std::string line; bool in_comment = false; + bool started = false; while (std::getline(file, line)) { // TODO: support line breaks in tags to ensure they are properly ignored. if (line.empty() || line[0] == '[') { - Flush(); + if (started) { + Flush(); + started = false; + } + auto uc_line = line; + std::transform( + uc_line.begin(), uc_line.end(), uc_line.begin(), + [](unsigned char c) { return std::toupper(c); } // correct + ); + if (uc_line.find("[FEN \"", 0) == 0) { + auto start_trimmed = line.substr(6); + cur_startpos_ = start_trimmed.substr(0, start_trimmed.find('"')); + // Some 'opening books' omit the last 2 fields, so there is only 3 + // space delimiters. + if (std::count(cur_startpos_.begin(), cur_startpos_.end(), ' ') == + 3) { + cur_startpos_ += " 0 1"; + } + cur_board_.SetFromFen(cur_startpos_); + } continue; } + // Must have at least one non-tag non-empty line in order to be considered + // a game. + started = true; // Handle braced comments. int cur_offset = 0; while ((in_comment && line.find('}', cur_offset) != std::string::npos) || @@ -104,17 +135,19 @@ class PgnReader { cur_board_.Mirror(); } } - Flush(); + if (started) { + Flush(); + } } - std::vector GetGames() const { return games_; } - std::vector&& ReleaseGames() { return std::move(games_); } + std::vector GetGames() const { return games_; } + std::vector&& ReleaseGames() { return std::move(games_); } private: void Flush() { - if (cur_game_.empty()) return; - games_.push_back(cur_game_); + games_.push_back({cur_startpos_, cur_game_}); cur_game_.clear(); cur_board_.SetFromFen(ChessBoard::kStartposFen); + cur_startpos_ = ChessBoard::kStartposFen; } Move::Promotion PieceToPromotion(int p) { @@ -257,7 +290,8 @@ class PgnReader { ChessBoard cur_board_{ChessBoard::kStartposFen}; MoveList cur_game_; - std::vector games_; + std::string cur_startpos_ = ChessBoard::kStartposFen; + std::vector games_; }; } // namespace lczero diff --git a/src/selfplay/game.cc b/src/selfplay/game.cc index cb8ff88ffa..84b7f025e3 100644 --- a/src/selfplay/game.cc +++ b/src/selfplay/game.cc @@ -69,20 +69,21 @@ void SelfPlayGame::PopulateUciParams(OptionsParser* options) { } SelfPlayGame::SelfPlayGame(PlayerOptions player1, PlayerOptions player2, - bool shared_tree, const MoveList& opening) + bool shared_tree, const Opening& opening) : options_{player1, player2}, chess960_{player1.uci_options->Get(kUciChess960) || player2.uci_options->Get(kUciChess960)} { + orig_fen_ = opening.start_fen; tree_[0] = std::make_shared(); - tree_[0]->ResetToPosition(ChessBoard::kStartposFen, {}); + tree_[0]->ResetToPosition(orig_fen_, {}); if (shared_tree) { tree_[1] = tree_[0]; } else { tree_[1] = std::make_shared(); - tree_[1]->ResetToPosition(ChessBoard::kStartposFen, {}); + tree_[1]->ResetToPosition(orig_fen_, {}); } - for (Move m : opening) { + for (Move m : opening.moves) { tree_[0]->MakeMove(m); if (tree_[0] != tree_[1]) tree_[1]->MakeMove(m); } @@ -219,7 +220,7 @@ void SelfPlayGame::Play(int white_threads, int black_threads, bool training, if (history_copy.ComputeGameResult() == GameResult::UNDECIDED) { auto move_list_to_discard = GetMoves(); move_list_to_discard.push_back(move); - options_[idx].discarded_callback(move_list_to_discard); + options_[idx].discarded_callback({orig_fen_, move_list_to_discard}); } search_->ResetBestMove(); } diff --git a/src/selfplay/game.h b/src/selfplay/game.h index 3d3a0c35a5..4647f70453 100644 --- a/src/selfplay/game.h +++ b/src/selfplay/game.h @@ -27,6 +27,7 @@ #pragma once +#include "chess/pgn.h" #include "chess/position.h" #include "chess/uciloop.h" #include "mcts/search.h" @@ -46,7 +47,7 @@ struct SelfPlayLimits { }; struct PlayerOptions { - using MoveListCallback = std::function; + using OpeningCallback = std::function; // Network to use by the player. Network* network; // Callback when player moves. @@ -54,7 +55,7 @@ struct PlayerOptions { // Callback when player outputs info. CallbackUciResponder::ThinkingCallback info_callback; // Callback when player discards a selected move due to low visits. - MoveListCallback discarded_callback; + OpeningCallback discarded_callback; // NNcache to use. NNCache* cache; // User options dictionary. @@ -71,7 +72,7 @@ class SelfPlayGame { // (useful for training games). Otherwise the tree is separate for black // and white (useful i.e. when they use different networks). SelfPlayGame(PlayerOptions player1, PlayerOptions player2, bool shared_tree, - const MoveList& opening); + const Opening& opening); // Populate command line options that it uses. static void PopulateUciParams(OptionsParser* options); @@ -100,6 +101,7 @@ class SelfPlayGame { // Node tree for player1 and player2. If the tree is shared between players, // tree_[0] == tree_[1]. std::shared_ptr tree_[2]; + std::string orig_fen_; // Search that is currently in progress. Stored in members so that Abort() // can stop it. diff --git a/src/selfplay/loop.cc b/src/selfplay/loop.cc index 86587fa683..d17a0ef949 100644 --- a/src/selfplay/loop.cc +++ b/src/selfplay/loop.cc @@ -122,6 +122,10 @@ void SelfPlayLoop::SendGameInfo(const GameInfo& info) { res += " moves"; for (const auto& move : info.moves) res += " " + move.as_string(); } + if (!info.initial_fen.empty() && + info.initial_fen != ChessBoard::kStartposFen) { + res += " from_fen " + info.initial_fen; + } responses.push_back(res); SendResponses(responses); } diff --git a/src/selfplay/tournament.cc b/src/selfplay/tournament.cc index d54988061e..400b406fdb 100644 --- a/src/selfplay/tournament.cc +++ b/src/selfplay/tournament.cc @@ -204,7 +204,7 @@ SelfPlayTournament::SelfPlayTournament( void SelfPlayTournament::PlayOneGame(int game_number) { bool player1_black; // Whether player1 will player as black in this game. - MoveList opening; + Opening opening; { Mutex::Lock lock(mutex_); player1_black = ((game_number % 2) == 1) != first_game_black_; @@ -281,7 +281,7 @@ void SelfPlayTournament::PlayOneGame(int game_number) { last_thinking_info = std::move(rich_info); } }; - opt.discarded_callback = [this](const MoveList& moves) { + opt.discarded_callback = [this](const Opening& moves) { // Only track discards if discard start chance is non-zero. if (kDiscardedStartChance == 0.0f) return; Mutex::Lock lock(mutex_); @@ -326,8 +326,9 @@ void SelfPlayTournament::PlayOneGame(int game_number) { game_info.game_result = game.GetGameResult(); game_info.is_black = player1_black; game_info.game_id = game_number; + game_info.initial_fen = opening.start_fen; game_info.moves = game.GetMoves(); - game_info.play_start_ply = opening.size(); + game_info.play_start_ply = opening.moves.size(); if (!enable_resign) { game_info.min_false_positive_threshold = game.GetWorstEvalForWinnerOrDraw(); diff --git a/src/selfplay/tournament.h b/src/selfplay/tournament.h index 48201d1833..df9fab5d1f 100644 --- a/src/selfplay/tournament.h +++ b/src/selfplay/tournament.h @@ -29,6 +29,7 @@ #include +#include "chess/pgn.h" #include "selfplay/game.h" #include "utils/mutex.h" #include "utils/optionsdict.h" @@ -73,11 +74,11 @@ class SelfPlayTournament { Mutex mutex_; // Whether first game will be black for player1. bool first_game_black_ GUARDED_BY(mutex_) = false; - std::vector discard_pile_ GUARDED_BY(mutex_); + std::vector discard_pile_ GUARDED_BY(mutex_); // Number of games which already started. int games_count_ GUARDED_BY(mutex_) = 0; bool abort_ GUARDED_BY(mutex_) = false; - std::vector openings_ GUARDED_BY(mutex_); + std::vector openings_ GUARDED_BY(mutex_); // Games in progress. Exposed here to be able to abort them in case if // Abort(). Stored as list and not vector so that threads can keep iterators // to them and not worry that it becomes invalid. From a89a03e7d371c9a6f363d1e33d4a3e319aa78ec0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Leandro=20=C3=81lvarez=20Gonz=C3=A1lez?= Date: Tue, 7 Apr 2020 17:52:22 -0400 Subject: [PATCH 091/151] Validate string values for integer flags (#1156) * First try. * Using from_chars * Demote member function * Back to member * Spacing --- src/utils/optionsparser.cc | 23 +++++++++++++++++++---- src/utils/optionsparser.h | 3 ++- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/src/utils/optionsparser.cc b/src/utils/optionsparser.cc index c40bfa9e73..ab6fa9fb35 100644 --- a/src/utils/optionsparser.cc +++ b/src/utils/optionsparser.cc @@ -30,7 +30,7 @@ #include #include #include - +#include #include "utils/commandline.h" #include "utils/configfile.h" #include "utils/logging.h" @@ -409,13 +409,13 @@ IntOption::IntOption(const OptionId& id, int min, int max) : Option(id), min_(min), max_(max) {} void IntOption::SetValue(const std::string& value, OptionsDict* dict) { - SetVal(dict, std::stoi(value)); + SetVal(dict, ValidateIntString(value)); } bool IntOption::ProcessLongFlag(const std::string& flag, const std::string& value, OptionsDict* dict) { if (flag == GetLongFlag()) { - SetVal(dict, std::stoi(value)); + SetVal(dict, ValidateIntString(value)); return true; } return false; @@ -424,7 +424,7 @@ bool IntOption::ProcessLongFlag(const std::string& flag, bool IntOption::ProcessShortFlagWithValue(char flag, const std::string& value, OptionsDict* dict) { if (flag == GetShortFlag()) { - SetVal(dict, std::stoi(value)); + SetVal(dict, ValidateIntString(value)); return true; } return false; @@ -460,6 +460,21 @@ void IntOption::SetVal(OptionsDict* dict, const ValueType& val) const { dict->Set(GetId(), val); } +int IntOption::ValidateIntString(const std::string& val) const { + int result; + const auto end = val.data() + val.size(); + auto [ptr, err] = std::from_chars(val.data(), end, result); + if (err == std::errc::invalid_argument) { + throw Exception("Flag '--" + GetLongFlag() + "' has an invalid format."); + } else if (err == std::errc::result_out_of_range) { + throw Exception("Flag '--" + GetLongFlag() + "' is out of range."); + } else if (ptr != end) { + throw Exception("Flag '--" + GetLongFlag() + "' has trailing characters."); + } else { + return result; + } +} + ///////////////////////////////////////////////////////////////// // FloatOption ///////////////////////////////////////////////////////////////// diff --git a/src/utils/optionsparser.h b/src/utils/optionsparser.h index 0c718245ae..fe15a9f204 100644 --- a/src/utils/optionsparser.h +++ b/src/utils/optionsparser.h @@ -167,7 +167,8 @@ class IntOption : public OptionsParser::Option { ValueType GetVal(const OptionsDict&) const; void SetVal(OptionsDict* dict, const ValueType& val) const; - + int ValidateIntString(const std::string& val) const; + int min_; int max_; }; From cfaddfb4fb222fe969a4c6ace9a7c5a7ed416ac8 Mon Sep 17 00:00:00 2001 From: Gian-Carlo Pascutto Date: Wed, 8 Apr 2020 22:08:46 +0200 Subject: [PATCH 092/151] Check for presence of charconv headers. (#1183) Also update the instructions to indicate Ubuntu 18.04 needs GCC 8 or later regardless of the clang in use, because clang falls back to the GCC libstdc++ headers. Fixes issue #1182. --- README.md | 4 ++-- meson.build | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index d1e9a5c7c1..28bffb62af 100644 --- a/README.md +++ b/README.md @@ -80,9 +80,9 @@ in ("Deep Learning"). #### Ubuntu 18.04 -For Ubuntu 18.04 you need the latest version of meson and clang-6.0 before performing the steps above: +For Ubuntu 18.04 you need the latest version of meson, g++-8 and clang-6.0 before performing the steps above: - sudo apt-get install clang-6.0 ninja-build pkg-config protobuf-compiler libprotobuf-dev meson + sudo apt-get install gcc-8 g++-8 clang-6.0 ninja-build pkg-config protobuf-compiler libprotobuf-dev meson CC=clang-6.0 CXX=clang++-6.0 INSTALL_PREFIX=~/.local ./build.sh Make sure that `~/.local/bin` is in your `PATH` environment variable. You can now type `lc0 --help` and start. diff --git a/meson.build b/meson.build index 92068149a9..32e8df1c7d 100644 --- a/meson.build +++ b/meson.build @@ -20,8 +20,8 @@ project('lc0', 'cpp', cc = meson.get_compiler('cpp') -if not cc.has_header('optional') or not cc.has_header('string_view') - error('Lc0 requires a compiler supporting C++17, for example g++ v7.0, ' + +if not cc.has_header('optional') or not cc.has_header('string_view') or not cc.has_header('charconv') + error('Lc0 requires a compiler supporting C++17, for example g++ v8.0, ' + 'clang v4.0 or later (with C++17 stdlib) and Visual Studio 2017 or ' + 'later.') endif From 9fc391fcf20da4eb3ab7263dacf2746cbc4f73fa Mon Sep 17 00:00:00 2001 From: borg323 <39573933+borg323@users.noreply.github.com> Date: Wed, 8 Apr 2020 23:09:34 +0300 Subject: [PATCH 093/151] update c++17 feature check and readme (#1181) --- README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 28bffb62af..48f0b78766 100644 --- a/README.md +++ b/README.md @@ -34,9 +34,11 @@ Having successfully acquired Lc0 via either of these methods, proceed to the bui Building should be easier now than it was in the past. Please report any problems you have. -Aside from the git submodule, lc0 requires the Meson build system and at least one backend library for evaluating the neural network, as well as the required libraries `protobuf` and `zlib`. (`gtest` is optionally used for the test suite.) If your system already has those two libraries installed, they will be used; otherwise Meson will generate its own copy of the two (a "subproject"), which in turn requires that git is installed (yes, separately from cloning the actual lc0 repository). Meson also requires python and Ninja. +Aside from the git submodule, lc0 requires the Meson build system and at least one backend library for evaluating the neural network, as well as the required `zlib`. (`gtest` is optionally used for the test suite.) If your system already has this library installed, they will be used; otherwise Meson will generate its own copy of the two (a "subproject"), which in turn requires that git is installed (yes, separately from cloning the actual lc0 repository). Meson also requires python and Ninja. -Backend support includes (in theory) any CBLAS-compatible library for CPU usage, such as OpenBLAS or Intel's MKL. For GPUs, OpenCL and CUDA+cudnn are supported. +Backend support includes (in theory) any CBLAS-compatible library for CPU usage, such as OpenBLAS or Intel's DNNL or MKL. For GPUs, OpenCL and CUDA+cudnn are supported, while DX-12 can be used in Windows 10 with latest drivers. + +Finally, lc0 requires a compiler supporting C++17. Minimal versions seem to be g++ v8.0, clang v4.0 (with C++17 stdlib) or Visual Studio 2017. Given those basics, the OS and backend specific instructions are below. From fbbdb59a13890e8071210b55d078dbc4b42be6aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Leandro=20=C3=81lvarez=20Gonz=C3=A1lez?= Date: Wed, 8 Apr 2020 16:10:20 -0400 Subject: [PATCH 094/151] Update Readme section for Ubuntu 16.04: gcc-8, ninja (#1184) --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 48f0b78766..6864730997 100644 --- a/README.md +++ b/README.md @@ -91,14 +91,14 @@ Make sure that `~/.local/bin` is in your `PATH` environment variable. You can no #### Ubuntu 16.04 -For Ubuntu 16.04 you need the latest version of meson and clang-6.0 before performing the steps above: +For Ubuntu 16.04 you need the latest version of meson, ninja and also gcc-8.0 before performing the steps above: - wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add - - sudo apt-add-repository 'deb http://apt.llvm.org/xenial/ llvm-toolchain-xenial-6.0 main' + sudo add-apt-repository ppa:ubuntu-toolchain-r/test sudo apt-get update - sudo apt-get install clang-6.0 ninja-build protobuf-compiler libprotobuf-dev + sudo apt-get install gcc-8 g++-8 protobuf-compiler libprotobuf-dev pip3 install meson --user - CC=clang-6.0 CXX=clang++-6.0 INSTALL_PREFIX=~/.local ./build.sh + pip3 install ninja --user + CC=gcc-8 CXX=g++-8 INSTALL_PREFIX=~/.local ./build.sh Make sure that `~/.local/bin` is in your `PATH` environment variable. You can now type `lc0 --help` and start. From 9bc6be0a2942f3fa99dab7a54c4c0b643b7f0230 Mon Sep 17 00:00:00 2001 From: borg323 <39573933+borg323@users.noreply.github.com> Date: Wed, 8 Apr 2020 23:29:02 +0300 Subject: [PATCH 095/151] make it possible to call backend without a net (#1179) * make it possible to call backend without a net * do not check optional net in multiplexing backends Co-authored-by: borg323 --- src/neural/blas/network_blas.cc | 9 +++++++-- src/neural/cuda/network_cudnn.cc | 13 ++++++++++--- src/neural/dx/network_dx.cc | 6 +++++- src/neural/factory.cc | 13 ++++++++----- src/neural/factory.h | 20 +++++++++++--------- src/neural/loader.cc | 4 ++-- src/neural/network_check.cc | 9 +++++---- src/neural/network_demux.cc | 12 +++++++----- src/neural/network_mux.cc | 12 +++++++----- src/neural/network_random.cc | 6 +++--- src/neural/network_rr.cc | 12 +++++++----- src/neural/network_tf_cc.cc | 17 +++++++++++++---- src/neural/opencl/network_opencl.cc | 8 ++++++-- 13 files changed, 91 insertions(+), 50 deletions(-) diff --git a/src/neural/blas/network_blas.cc b/src/neural/blas/network_blas.cc index 71721f4bc8..f6255859b8 100644 --- a/src/neural/blas/network_blas.cc +++ b/src/neural/blas/network_blas.cc @@ -1,6 +1,6 @@ /* This file is part of Leela Chess Zero. - Copyright (C) 2018-2019 The LCZero Authors + Copyright (C) 2018-2020 The LCZero Authors Leela Chess is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -515,8 +515,13 @@ BlasNetwork::BlasNetwork(const WeightsFile& file, } template -std::unique_ptr MakeBlasNetwork(const WeightsFile& weights, +std::unique_ptr MakeBlasNetwork(const std::optional& w, const OptionsDict& options) { + if (!w) { + throw Exception("The " + std::string(use_eigen ? "eigen" : "blas") + + " backend requires a network file."); + } + const WeightsFile& weights = *w; if (weights.format().network_format().network() != pblczero::NetworkFormat::NETWORK_CLASSICAL_WITH_HEADFORMAT && weights.format().network_format().network() != diff --git a/src/neural/cuda/network_cudnn.cc b/src/neural/cuda/network_cudnn.cc index 4759cd9eed..de7d101b0b 100644 --- a/src/neural/cuda/network_cudnn.cc +++ b/src/neural/cuda/network_cudnn.cc @@ -864,8 +864,15 @@ void CudnnNetworkComputation::ComputeBlocking() { } template -std::unique_ptr MakeCudnnNetwork(const WeightsFile& weights, +std::unique_ptr MakeCudnnNetwork(const std::optional& w, const OptionsDict& options) { + if (!w) { + throw Exception( + "The cudnn" + + std::string(std::is_same::value ? "-fp16" : "") + + " backend requires a network file."); + } + const WeightsFile& weights = *w; if (weights.format().network_format().network() != pblczero::NetworkFormat::NETWORK_CLASSICAL_WITH_HEADFORMAT && weights.format().network_format().network() != @@ -903,8 +910,8 @@ std::unique_ptr MakeCudnnNetwork(const WeightsFile& weights, return std::make_unique>(weights, options); } -std::unique_ptr MakeCudnnNetworkAuto(const WeightsFile& weights, - const OptionsDict& options) { +std::unique_ptr MakeCudnnNetworkAuto( + const std::optional& weights, const OptionsDict& options) { int gpu_id = options.GetOrDefault("gpu", 0); cudaDeviceProp deviceProp = {}; // No error checking here, this will be repeated later. diff --git a/src/neural/dx/network_dx.cc b/src/neural/dx/network_dx.cc index 8aaae82729..c1792f6898 100644 --- a/src/neural/dx/network_dx.cc +++ b/src/neural/dx/network_dx.cc @@ -1065,8 +1065,12 @@ InputsOutputsDx::~InputsOutputsDx() { if (moves_left_) delete[] op_moves_left_mem_final_; } -std::unique_ptr MakeDxNetwork(const WeightsFile& weights, +std::unique_ptr MakeDxNetwork(const std::optional& w, const OptionsDict& options) { + if (!w) { + throw Exception("The dx12 backend requires a network file."); + } + const WeightsFile& weights = *w; return std::make_unique(weights, options); } diff --git a/src/neural/factory.cc b/src/neural/factory.cc index b288845aaa..03c69ef682 100644 --- a/src/neural/factory.cc +++ b/src/neural/factory.cc @@ -1,6 +1,6 @@ /* This file is part of Leela Chess Zero. - Copyright (C) 2018 The LCZero Authors + Copyright (C) 2018-2020 The LCZero Authors Leela Chess is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -85,9 +85,9 @@ std::vector NetworkFactory::GetBackendsList() const { return result; } -std::unique_ptr NetworkFactory::Create(const std::string& network, - const WeightsFile& weights, - const OptionsDict& options) { +std::unique_ptr NetworkFactory::Create( + const std::string& network, const std::optional& weights, + const OptionsDict& options) { CERR << "Creating backend [" << network << "]..."; for (const auto& factory : factories_) { if (factory.name == network) { @@ -123,7 +123,10 @@ std::unique_ptr NetworkFactory::LoadNetwork( } else { CERR << "Loading weights file from: " << net_path; } - const WeightsFile weights = LoadWeightsFromFile(net_path); + std::optional weights; + if (!net_path.empty()) { + weights = LoadWeightsFromFile(net_path); + } OptionsDict network_options(&options); network_options.AddSubdictFromString(backend_options); diff --git a/src/neural/factory.h b/src/neural/factory.h index f9021d4d34..7c22e62825 100644 --- a/src/neural/factory.h +++ b/src/neural/factory.h @@ -1,6 +1,6 @@ /* This file is part of Leela Chess Zero. - Copyright (C) 2018 The LCZero Authors + Copyright (C) 2018-2020 The LCZero Authors Leela Chess is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -28,7 +28,9 @@ #pragma once #include +#include #include + #include "neural/network.h" #include "neural/loader.h" #include "utils/optionsdict.h" @@ -39,7 +41,7 @@ namespace lczero { class NetworkFactory { public: using FactoryFunc = std::function( - const WeightsFile&, const OptionsDict&)>; + const std::optional&, const OptionsDict&)>; static NetworkFactory* Get(); @@ -61,7 +63,7 @@ class NetworkFactory { // Creates a backend given name and config. std::unique_ptr Create(const std::string& network, - const WeightsFile&, + const std::optional&, const OptionsDict& options); // Helper function to load the network from the options. Returns nullptr @@ -109,12 +111,12 @@ class NetworkFactory { friend class Register; }; -#define REGISTER_NETWORK_WITH_COUNTER2(name, func, priority, counter) \ - namespace { \ - static NetworkFactory::Register regH38fhs##counter( \ - name, \ - [](const WeightsFile& w, const OptionsDict& o) { return func(w, o); }, \ - priority); \ +#define REGISTER_NETWORK_WITH_COUNTER2(name, func, priority, counter) \ + namespace { \ + static NetworkFactory::Register regH38fhs##counter( \ + name, [](const std::optional& w, const OptionsDict& o) { \ + return func(w, o); \ + }, priority); \ } #define REGISTER_NETWORK_WITH_COUNTER(name, func, priority, counter) \ REGISTER_NETWORK_WITH_COUNTER2(name, func, priority, counter) diff --git a/src/neural/loader.cc b/src/neural/loader.cc index 1a77de88ae..955f90033b 100644 --- a/src/neural/loader.cc +++ b/src/neural/loader.cc @@ -1,6 +1,6 @@ /* This file is part of Leela Chess Zero. - Copyright (C) 2018-2019 The LCZero Authors + Copyright (C) 2018-2020 The LCZero Authors Leela Chess is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -232,7 +232,7 @@ std::string DiscoverWeightsFile() { } } - throw Exception("Network weights file not found."); + LOGFILE << "Network weights file not found."; return {}; } diff --git a/src/neural/network_check.cc b/src/neural/network_check.cc index 975569023b..7354007508 100644 --- a/src/neural/network_check.cc +++ b/src/neural/network_check.cc @@ -1,6 +1,6 @@ /* This file is part of Leela Chess Zero. - Copyright (C) 2018 The LCZero Authors + Copyright (C) 2018-2020 The LCZero Authors Leela Chess is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -239,7 +239,8 @@ class CheckNetwork : public Network { static constexpr double kDefaultAbsoluteTolerance = 1e-5; static constexpr double kDefaultRelativeTolerance = 1e-4; - CheckNetwork(const WeightsFile& weights, const OptionsDict& options) { + CheckNetwork(const std::optional& weights, + const OptionsDict& options) { params_.mode = kDefaultMode; params_.absolute_tolerance = kDefaultAbsoluteTolerance; params_.relative_tolerance = kDefaultRelativeTolerance; @@ -343,8 +344,8 @@ class CheckNetwork : public Network { NetworkCapabilities capabilities_; }; -std::unique_ptr MakeCheckNetwork(const WeightsFile& weights, - const OptionsDict& options) { +std::unique_ptr MakeCheckNetwork( + const std::optional& weights, const OptionsDict& options) { return std::make_unique(weights, options); } diff --git a/src/neural/network_demux.cc b/src/neural/network_demux.cc index a6c272f093..11c098a8cc 100644 --- a/src/neural/network_demux.cc +++ b/src/neural/network_demux.cc @@ -1,6 +1,6 @@ /* This file is part of Leela Chess Zero. - Copyright (C) 2018 The LCZero Authors + Copyright (C) 2018-2020 The LCZero Authors Leela Chess is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -102,7 +102,8 @@ class DemuxingComputation : public NetworkComputation { class DemuxingNetwork : public Network { public: - DemuxingNetwork(const WeightsFile& weights, const OptionsDict& options) { + DemuxingNetwork(const std::optional& weights, + const OptionsDict& options) { minimum_split_size_ = options.GetOrDefault("minimum-split-size", 0); const auto parents = options.ListSubdicts(); if (parents.empty()) { @@ -117,7 +118,8 @@ class DemuxingNetwork : public Network { } } - void AddBackend(const std::string& name, const WeightsFile& weights, + void AddBackend(const std::string& name, + const std::optional& weights, const OptionsDict& opts) { const int nn_threads = opts.GetOrDefault("threads", 1); const std::string backend = opts.GetOrDefault("backend", name); @@ -235,8 +237,8 @@ void DemuxingComputation::ComputeBlocking() { dataready_cv_.wait(lock, [this]() { return dataready_ == 0; }); } -std::unique_ptr MakeDemuxingNetwork(const WeightsFile& weights, - const OptionsDict& options) { +std::unique_ptr MakeDemuxingNetwork( + const std::optional& weights, const OptionsDict& options) { return std::make_unique(weights, options); } diff --git a/src/neural/network_mux.cc b/src/neural/network_mux.cc index 3c8b0ff109..e8d6ec71d3 100644 --- a/src/neural/network_mux.cc +++ b/src/neural/network_mux.cc @@ -1,6 +1,6 @@ /* This file is part of Leela Chess Zero. - Copyright (C) 2018 The LCZero Authors + Copyright (C) 2018-2020 The LCZero Authors Leela Chess is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -88,7 +88,8 @@ class MuxingComputation : public NetworkComputation { class MuxingNetwork : public Network { public: - MuxingNetwork(const WeightsFile& weights, const OptionsDict& options) { + MuxingNetwork(const std::optional& weights, + const OptionsDict& options) { // int threads, int max_batch) //: network_(std::move(network)), max_batch_(max_batch) { @@ -105,7 +106,8 @@ class MuxingNetwork : public Network { } } - void AddBackend(const std::string& name, const WeightsFile& weights, + void AddBackend(const std::string& name, + const std::optional& weights, const OptionsDict& opts) { const int nn_threads = opts.GetOrDefault("threads", 1); const int max_batch = opts.GetOrDefault("max_batch", 256); @@ -222,8 +224,8 @@ void MuxingComputation::ComputeBlocking() { dataready_cv_.wait(lock, [this]() { return dataready_; }); } -std::unique_ptr MakeMuxingNetwork(const WeightsFile& weights, - const OptionsDict& options) { +std::unique_ptr MakeMuxingNetwork( + const std::optional& weights, const OptionsDict& options) { return std::make_unique(weights, options); } diff --git a/src/neural/network_random.cc b/src/neural/network_random.cc index 639e6bcfe1..5b4a2661bb 100644 --- a/src/neural/network_random.cc +++ b/src/neural/network_random.cc @@ -1,6 +1,6 @@ /* This file is part of Leela Chess Zero. - Copyright (C) 2018 The LCZero Authors + Copyright (C) 2018-2020 The LCZero Authors Leela Chess is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -131,8 +131,8 @@ class RandomNetwork : public Network { }; } // namespace -std::unique_ptr MakeRandomNetwork(const WeightsFile& /*weights*/, - const OptionsDict& options) { +std::unique_ptr MakeRandomNetwork( + const std::optional& /*weights*/, const OptionsDict& options) { return std::make_unique(options); } diff --git a/src/neural/network_rr.cc b/src/neural/network_rr.cc index 34946fec59..513631d0e7 100644 --- a/src/neural/network_rr.cc +++ b/src/neural/network_rr.cc @@ -1,6 +1,6 @@ /* This file is part of Leela Chess Zero. - Copyright (C) 2018 The LCZero Authors + Copyright (C) 2018-2020 The LCZero Authors Leela Chess is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -37,7 +37,8 @@ namespace { class RoundRobinNetwork : public Network { public: - RoundRobinNetwork(const WeightsFile& weights, const OptionsDict& options) { + RoundRobinNetwork(const std::optional& weights, + const OptionsDict& options) { const auto parents = options.ListSubdicts(); if (parents.empty()) { // If options are empty, or multiplexer configured in root object, @@ -51,7 +52,8 @@ class RoundRobinNetwork : public Network { } } - void AddBackend(const std::string& name, const WeightsFile& weights, + void AddBackend(const std::string& name, + const std::optional& weights, const OptionsDict& opts) { const std::string backend = opts.GetOrDefault("backend", name); @@ -82,8 +84,8 @@ class RoundRobinNetwork : public Network { NetworkCapabilities capabilities_; }; -std::unique_ptr MakeRoundRobinNetwork(const WeightsFile& weights, - const OptionsDict& options) { +std::unique_ptr MakeRoundRobinNetwork( + const std::optional& weights, const OptionsDict& options) { return std::make_unique(weights, options); } diff --git a/src/neural/network_tf_cc.cc b/src/neural/network_tf_cc.cc index 8b2c11e75c..26b8096904 100644 --- a/src/neural/network_tf_cc.cc +++ b/src/neural/network_tf_cc.cc @@ -25,6 +25,10 @@ Program grant you additional permission to convey the resulting work. */ +// Hack around c++ version incompatibility. +#include +#undef ABSL_HAVE_STD_STRING_VIEW + #include #include #include @@ -436,8 +440,14 @@ std::unique_ptr TFNetwork::NewComputation() { } template -std::unique_ptr MakeTFNetwork(const WeightsFile& weights, +std::unique_ptr MakeTFNetwork(const std::optional& w, const OptionsDict& options) { + if (!w) { + throw Exception("The " + + std::string(CPU ? "tensorflow-cc-cpu" : "tensorflow-cc") + + " backend requires a network file."); + } + const WeightsFile& weights = *w; if (weights.format().network_format().network() != pblczero::NetworkFormat::NETWORK_CLASSICAL_WITH_HEADFORMAT && weights.format().network_format().network() != @@ -464,9 +474,8 @@ std::unique_ptr MakeTFNetwork(const WeightsFile& weights, " is not supported by Tensorflow C++ backend."); } return std::make_unique>( - weights, options, - weights.format().network_format().value() == - pblczero::NetworkFormat::VALUE_WDL); + weights, options, weights.format().network_format().value() == + pblczero::NetworkFormat::VALUE_WDL); } REGISTER_NETWORK("tensorflow-cc-cpu", MakeTFNetwork, 90) diff --git a/src/neural/opencl/network_opencl.cc b/src/neural/opencl/network_opencl.cc index 3c8b9e945e..95f193d48f 100644 --- a/src/neural/opencl/network_opencl.cc +++ b/src/neural/opencl/network_opencl.cc @@ -1,6 +1,6 @@ /* This file is part of Leela Chess Zero. - Copyright (C) 2018-2019 The LCZero Authors + Copyright (C) 2018-2020 The LCZero Authors Leela Chess is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -418,8 +418,12 @@ class OpenCLNetwork : public Network { bool moves_left_; }; -std::unique_ptr MakeOpenCLNetwork(const WeightsFile& weights, +std::unique_ptr MakeOpenCLNetwork(const std::optional& w, const OptionsDict& options) { + if (!w) { + throw Exception("The opencl backend requires a network file."); + } + const WeightsFile& weights = *w; if (weights.format().network_format().network() != pblczero::NetworkFormat::NETWORK_CLASSICAL_WITH_HEADFORMAT && weights.format().network_format().network() != From 7bb893fec8c281d6f263e5b50024ee50e5542d0a Mon Sep 17 00:00:00 2001 From: Alexander Lyashuk Date: Thu, 9 Apr 2020 00:38:01 +0200 Subject: [PATCH 096/151] Refactor time manager code a bit, to allow alternative time managers. (#1163) * WIP * Some more changes. * Don't ignore move-overhead parameter for `go movetime` searches. * Added a comment. --- meson.build | 2 + src/engine.cc | 15 +- src/engine.h | 1 + src/mcts/stoppers/common.cc | 177 ++++++++++++++++++++++ src/mcts/stoppers/common.h | 51 +++++++ src/mcts/stoppers/factory.cc | 278 +++-------------------------------- src/mcts/stoppers/factory.h | 18 +-- src/mcts/stoppers/legacy.cc | 153 +++++++++++++++++++ src/mcts/stoppers/legacy.h | 37 +++++ src/mcts/stoppers/timemgr.h | 5 +- src/selfplay/game.cc | 1 + 11 files changed, 457 insertions(+), 281 deletions(-) create mode 100644 src/mcts/stoppers/common.cc create mode 100644 src/mcts/stoppers/common.h create mode 100644 src/mcts/stoppers/legacy.cc create mode 100644 src/mcts/stoppers/legacy.h diff --git a/meson.build b/meson.build index 32e8df1c7d..53fceb8dab 100644 --- a/meson.build +++ b/meson.build @@ -122,7 +122,9 @@ files += [ 'src/mcts/node.cc', 'src/mcts/params.cc', 'src/mcts/search.cc', + 'src/mcts/stoppers/common.cc', 'src/mcts/stoppers/factory.cc', + 'src/mcts/stoppers/legacy.cc', 'src/mcts/stoppers/stoppers.cc', 'src/mcts/stoppers/timemgr.cc', 'src/neural/cache.cc', diff --git a/src/engine.cc b/src/engine.cc index 05319a6290..e76d5cede1 100644 --- a/src/engine.cc +++ b/src/engine.cc @@ -73,9 +73,7 @@ MoveList StringsToMovelist(const std::vector& moves, EngineController::EngineController(std::unique_ptr uci_responder, const OptionsDict& options) - : options_(options), - uci_responder_(std::move(uci_responder)), - time_manager_(MakeLegacyTimeManager()) {} + : options_(options), uci_responder_(std::move(uci_responder)) {} void EngineController::PopulateOptions(OptionsParser* options) { using namespace std::placeholders; @@ -144,7 +142,7 @@ void EngineController::NewGame() { cache_.Clear(); search_.reset(); tree_.reset(); - time_manager_->ResetGame(); + CreateFreshTimeManager(); current_position_.reset(); UpdateFromUciOptions(); } @@ -171,7 +169,11 @@ void EngineController::SetupPosition( std::vector moves; for (const auto& move : moves_str) moves.emplace_back(move); const bool is_same_game = tree_->ResetToPosition(fen, moves); - if (!is_same_game) time_manager_->ResetGame(); + if (!is_same_game) CreateFreshTimeManager(); +} + +void EngineController::CreateFreshTimeManager() { + time_manager_ = MakeTimeManager(options_); } namespace { @@ -249,8 +251,7 @@ void EngineController::Go(const GoParams& params) { responder = std::make_unique(std::move(responder)); } - auto stopper = - time_manager_->GetStopper(options_, params, tree_->HeadPosition()); + auto stopper = time_manager_->GetStopper(params, tree_->HeadPosition()); search_ = std::make_unique( *tree_, network_.get(), std::move(responder), StringsToMovelist(params.searchmoves, tree_->HeadPosition().GetBoard()), diff --git a/src/engine.h b/src/engine.h index 0c4537aef4..b2d518eeae 100644 --- a/src/engine.h +++ b/src/engine.h @@ -80,6 +80,7 @@ class EngineController { void SetupPosition(const std::string& fen, const std::vector& moves); void ResetMoveTimer(); + void CreateFreshTimeManager(); const OptionsDict& options_; diff --git a/src/mcts/stoppers/common.cc b/src/mcts/stoppers/common.cc new file mode 100644 index 0000000000..a4e076b430 --- /dev/null +++ b/src/mcts/stoppers/common.cc @@ -0,0 +1,177 @@ +/* + This file is part of Leela Chess Zero. + Copyright (C) 2020 The LCZero Authors + + Leela Chess is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Leela Chess is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Leela Chess. If not, see . + + Additional permission under GNU GPL version 3 section 7 + + If you modify this Program, or any covered work, by linking or + combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA + Toolkit and the NVIDIA CUDA Deep Neural Network library (or a + modified version of those libraries), containing parts covered by the + terms of the respective license agreement, the licensors of this + Program grant you additional permission to convey the resulting work. +*/ + +#include "src/mcts/stoppers/common.h" + +namespace lczero { + +const OptionId kNNCacheSizeId{ + "nncache", "NNCacheSize", + "Number of positions to store in a memory cache. A large cache can speed " + "up searching, but takes memory."}; + +namespace { + +const OptionId kRamLimitMbId{ + "ramlimit-mb", "RamLimitMb", + "Maximum memory usage for the engine, in megabytes. The estimation is very " + "rough, and can be off by a lot. For example, multiple visits to a " + "terminal node counted several times, and the estimation assumes that all " + "positions have 30 possible moves. When set to 0, no RAM limit is " + "enforced."}; +const OptionId kMinimumKLDGainPerNodeId{ + "minimum-kldgain-per-node", "MinimumKLDGainPerNode", + "If greater than 0 search will abort unless the last " + "KLDGainAverageInterval nodes have an average gain per node of at least " + "this much."}; +const OptionId kKLDGainAverageIntervalId{ + "kldgain-average-interval", "KLDGainAverageInterval", + "Used to decide how frequently to evaluate the average KLDGainPerNode to " + "check the MinimumKLDGainPerNode, if specified."}; +const OptionId kSmartPruningFactorId{ + "smart-pruning-factor", "SmartPruningFactor", + "Do not spend time on the moves which cannot become bestmove given the " + "remaining time to search. When no other move can overtake the current " + "best, the search stops, saving the time. Values greater than 1 stop less " + "promising moves from being considered even earlier. Values less than 1 " + "causes hopeless moves to still have some attention. When set to 0, smart " + "pruning is deactivated."}; +const OptionId kMinimumSmartPruningBatchesId{ + "smart-pruning-minimum-batches", "SmartPruningMinimumBatches", + "Only allow smart pruning to stop search after at least this many batches " + "have been evaluated. It may be useful to have this value greater than the " + "number of search threads in use."}; +const OptionId kNodesAsPlayoutsId{ + "nodes-as-playouts", "NodesAsPlayouts", + "Treat UCI `go nodes` command as referring to playouts instead of visits."}; + +} // namespace + +void PopulateCommonStopperOptions(RunType for_what, OptionsParser* options) { + options->Add(kKLDGainAverageIntervalId, 1, 10000000) = 100; + options->Add(kMinimumKLDGainPerNodeId, 0.0f, 1.0f) = 0.0f; + options->Add(kSmartPruningFactorId, 0.0f, 10.0f) = + (for_what == RunType::kUci ? 1.33f : 0.00f); + options->Add(kMinimumSmartPruningBatchesId, 0, 10000) = 0; + options->Add(kNodesAsPlayoutsId) = false; + + if (for_what == RunType::kUci) { + options->Add(kRamLimitMbId, 0, 100000000) = 0; + options->HideOption(kNodesAsPlayoutsId); + } +} + +// Parameters needed for selfplay and uci, but not benchmark nor infinite mode. +void PopulateIntrinsicStoppers(ChainedSearchStopper* stopper, + const OptionsDict& options) { + // KLD gain. + const auto min_kld_gain = options.Get(kMinimumKLDGainPerNodeId); + if (min_kld_gain > 0.0f) { + stopper->AddStopper(std::make_unique( + min_kld_gain, options.Get(kKLDGainAverageIntervalId))); + } + + // Should be last in the chain. + const auto smart_pruning_factor = options.Get(kSmartPruningFactorId); + if (smart_pruning_factor > 0.0f) { + stopper->AddStopper(std::make_unique( + smart_pruning_factor, options.Get(kMinimumSmartPruningBatchesId))); + } +} + +namespace { +// Stoppers for uci mode only. +void PopulateCommonUciStoppers(ChainedSearchStopper* stopper, + const OptionsDict& options, + const GoParams& params, int64_t move_overhead) { + const bool infinite = params.infinite || params.ponder; + + // RAM limit watching stopper. + const auto cache_size_mb = options.Get(kNNCacheSizeId); + const int ram_limit = options.Get(kRamLimitMbId); + if (ram_limit) { + stopper->AddStopper( + std::make_unique(cache_size_mb, ram_limit)); + } + + // "go nodes" stopper. + if (params.nodes) { + if (options.Get(kNodesAsPlayoutsId)) { + stopper->AddStopper(std::make_unique(*params.nodes)); + } else { + stopper->AddStopper(std::make_unique(*params.nodes)); + } + } + + // "go movetime" stopper. + if (params.movetime && !infinite) { + stopper->AddStopper( + std::make_unique(*params.movetime - move_overhead)); + } + + // "go depth" stopper. + if (params.depth) { + stopper->AddStopper(std::make_unique(*params.depth)); + } + + // Add internal search tree stoppers when we want to automatically stop. + if (!infinite) PopulateIntrinsicStoppers(stopper, options); +} + +class CommonTimeManager : public TimeManager { + public: + CommonTimeManager(std::unique_ptr child_mgr, + const OptionsDict& options, int64_t move_overhead) + : child_mgr_(std::move(child_mgr)), + options_(options), + move_overhead_(move_overhead) {} + + private: + std::unique_ptr GetStopper(const GoParams& params, + const Position& position) override { + auto result = std::make_unique(); + if (child_mgr_) + result->AddStopper(child_mgr_->GetStopper(params, position)); + PopulateCommonUciStoppers(result.get(), options_, params, move_overhead_); + return result; + } + + const std::unique_ptr child_mgr_; + const OptionsDict& options_; + const int64_t move_overhead_; +}; + +} // namespace + +std::unique_ptr MakeCommonTimeManager( + std::unique_ptr child_manager, const OptionsDict& options, + int64_t move_overhead) { + return std::make_unique(std::move(child_manager), options, + move_overhead); +} + +} // namespace lczero \ No newline at end of file diff --git a/src/mcts/stoppers/common.h b/src/mcts/stoppers/common.h new file mode 100644 index 0000000000..dc9e98d846 --- /dev/null +++ b/src/mcts/stoppers/common.h @@ -0,0 +1,51 @@ +/* + This file is part of Leela Chess Zero. + Copyright (C) 2020 The LCZero Authors + + Leela Chess is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Leela Chess is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Leela Chess. If not, see . + + Additional permission under GNU GPL version 3 section 7 + + If you modify this Program, or any covered work, by linking or + combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA + Toolkit and the NVIDIA CUDA Deep Neural Network library (or a + modified version of those libraries), containing parts covered by the + terms of the respective license agreement, the licensors of this + Program grant you additional permission to convey the resulting work. +*/ + +#pragma once + +#include "mcts/stoppers/stoppers.h" +#include "utils/optionsdict.h" +#include "utils/optionsparser.h" + +namespace lczero { + +enum class RunType { kUci, kSelfplay }; +void PopulateCommonStopperOptions(RunType for_what, OptionsParser* options); + +// Option ID for a cache size. It's used from multiple places and there's no +// really nice place to declare, so let it be here. +extern const OptionId kNNCacheSizeId; + +// Populates KLDGain and SmartPruning stoppers. +void PopulateIntrinsicStoppers(ChainedSearchStopper* stopper, + const OptionsDict& options); + +std::unique_ptr MakeCommonTimeManager( + std::unique_ptr child_manager, const OptionsDict& options, + int64_t move_overhead); + +} // namespace lczero \ No newline at end of file diff --git a/src/mcts/stoppers/factory.cc b/src/mcts/stoppers/factory.cc index 215093853f..ed58f7b4e2 100644 --- a/src/mcts/stoppers/factory.cc +++ b/src/mcts/stoppers/factory.cc @@ -30,289 +30,53 @@ #include #include "factory.h" +#include "mcts/stoppers/legacy.h" #include "mcts/stoppers/stoppers.h" +#include "utils/exception.h" namespace lczero { - -const OptionId kNNCacheSizeId{ - "nncache", "NNCacheSize", - "Number of positions to store in a memory cache. A large cache can speed " - "up searching, but takes memory."}; - namespace { -const OptionId kRamLimitMbId{ - "ramlimit-mb", "RamLimitMb", - "Maximum memory usage for the engine, in megabytes. The estimation is very " - "rough, and can be off by a lot. For example, multiple visits to a " - "terminal node counted several times, and the estimation assumes that all " - "positions have 30 possible moves. When set to 0, no RAM limit is " - "enforced."}; const OptionId kMoveOverheadId{ "move-overhead", "MoveOverheadMs", "Amount of time, in milliseconds, that the engine subtracts from it's " "total available time (to compensate for slow connection, interprocess " "communication, etc)."}; -const OptionId kSlowMoverId{ - "slowmover", "Slowmover", - "Budgeted time for a move is multiplied by this value, causing the engine " - "to spend more time (if value is greater than 1) or less time (if the " - "value is less than 1)."}; -const OptionId kTimeMidpointMoveId{ - "time-midpoint-move", "TimeMidpointMove", - "The move where the time budgeting algorithm guesses half of all " - "games to be completed by. Half of the time allocated for the first move " - "is allocated at approximately this move."}; -const OptionId kTimeSteepnessId{ - "time-steepness", "TimeSteepness", - "\"Steepness\" of the function the time budgeting algorithm uses to " - "consider when games are completed. Lower values leave more time for " - "the endgame, higher values use more time for each move before the " - "midpoint."}; -const OptionId kSpendSavedTimeId{ - "immediate-time-use", "ImmediateTimeUse", - "Fraction of time saved by smart pruning, which is added to the budget to " - "the next move rather than to the entire game. When 1, all saved time is " - "added to the next move's budget; when 0, saved time is distributed among " - "all future moves."}; -const OptionId kMinimumKLDGainPerNodeId{ - "minimum-kldgain-per-node", "MinimumKLDGainPerNode", - "If greater than 0 search will abort unless the last " - "KLDGainAverageInterval nodes have an average gain per node of at least " - "this much."}; -const OptionId kKLDGainAverageIntervalId{ - "kldgain-average-interval", "KLDGainAverageInterval", - "Used to decide how frequently to evaluate the average KLDGainPerNode to " - "check the MinimumKLDGainPerNode, if specified."}; -const OptionId kSmartPruningFactorId{ - "smart-pruning-factor", "SmartPruningFactor", - "Do not spend time on the moves which cannot become bestmove given the " - "remaining time to search. When no other move can overtake the current " - "best, the search stops, saving the time. Values greater than 1 stop less " - "promising moves from being considered even earlier. Values less than 1 " - "causes hopeless moves to still have some attention. When set to 0, smart " - "pruning is deactivated."}; -const OptionId kMinimumSmartPruningBatchesId{ - "smart-pruning-minimum-batches", "SmartPruningMinimumBatches", - "Only allow smart pruning to stop search after at least this many batches " - "have been evaluated. It may be useful to have this value greater than the " - "number of search threads in use."}; -const OptionId kNodesAsPlayoutsId{ - "nodes-as-playouts", "NodesAsPlayouts", - "Treat UCI `go nodes` command as referring to playouts instead of visits."}; +const OptionId kTimeManagerId{"time-manager", "TimeManager", + "Name and config of atime manager."}; } // namespace void PopulateTimeManagementOptions(RunType for_what, OptionsParser* options) { - options->Add(kKLDGainAverageIntervalId, 1, 10000000) = 100; - options->Add(kMinimumKLDGainPerNodeId, 0.0f, 1.0f) = 0.0f; - options->Add(kSmartPruningFactorId, 0.0f, 10.0f) = - (for_what == RunType::kUci ? 1.33f : 0.00f); - options->Add(kMinimumSmartPruningBatchesId, 0, 10000) = 0; - + PopulateCommonStopperOptions(for_what, options); if (for_what == RunType::kUci) { - options->Add(kRamLimitMbId, 0, 100000000) = 0; options->Add(kMoveOverheadId, 0, 100000000) = 200; - options->Add(kSlowMoverId, 0.0f, 100.0f) = 1.0f; - options->Add(kTimeMidpointMoveId, 1.0f, 100.0f) = 51.5f; - options->Add(kTimeSteepnessId, 1.0f, 100.0f) = 7.0f; - options->Add(kSpendSavedTimeId, 0.0f, 1.0f) = 1.0f; - options->Add(kNodesAsPlayoutsId) = false; - - // Hide time curve options. - options->HideOption(kTimeMidpointMoveId); - options->HideOption(kTimeSteepnessId); - options->HideOption(kNodesAsPlayoutsId); + options->Add(kTimeManagerId) = "legacy"; } } -// Parameters needed for selfplay and uci, but not benchmark nor infinite mode. -void PopulateIntrinsicStoppers(ChainedSearchStopper* stopper, - const OptionsDict& options) { - // KLD gain. - const auto min_kld_gain = options.Get(kMinimumKLDGainPerNodeId); - if (min_kld_gain > 0.0f) { - stopper->AddStopper(std::make_unique( - min_kld_gain, options.Get(kKLDGainAverageIntervalId))); - } - - // Should be last in the chain. - const auto smart_pruning_factor = options.Get(kSmartPruningFactorId); - if (smart_pruning_factor > 0.0f) { - stopper->AddStopper(std::make_unique( - smart_pruning_factor, options.Get(kMinimumSmartPruningBatchesId))); - } -} - -namespace { -// Stoppers for uci mode only. -void PopulateStoppers(ChainedSearchStopper* stopper, const OptionsDict& options, - const GoParams& params) { - const bool infinite = params.infinite || params.ponder; +std::unique_ptr MakeTimeManager(const OptionsDict& options) { const int64_t move_overhead = options.Get(kMoveOverheadId); - // RAM limit watching stopper. - const auto cache_size_mb = options.Get(kNNCacheSizeId); - const int ram_limit = options.Get(kRamLimitMbId); - if (ram_limit) { - stopper->AddStopper( - std::make_unique(cache_size_mb, ram_limit)); - } - - // "go nodes" stopper. - if (params.nodes) { - if (options.Get(kNodesAsPlayoutsId)) { - stopper->AddStopper(std::make_unique(*params.nodes)); - } else { - stopper->AddStopper(std::make_unique(*params.nodes)); - } - } + OptionsDict tm_options; + tm_options.AddSubdictFromString(options.Get(kTimeManagerId)); + const auto managers = tm_options.ListSubdicts(); - // "go movetime" stopper. - if (params.movetime && !infinite) { - stopper->AddStopper( - std::make_unique(*params.movetime - move_overhead)); + std::unique_ptr time_manager; + if (managers.size() != 1) { + throw Exception("Exactly one time manager should be specified, " + + std::to_string(managers.size()) + " specified instead."); } - - // "go depth" stopper. - if (params.depth) { - stopper->AddStopper(std::make_unique(*params.depth)); + if (managers[0] == "legacy") { + time_manager = + MakeLegacyTimeManager(move_overhead, tm_options.GetSubdict("legacy")); } - - // Add internal search tree stoppers when we want to automatically stop. - if (!infinite) PopulateIntrinsicStoppers(stopper, options); -} - -class LegacyStopper : public TimeLimitStopper { - public: - LegacyStopper(int64_t deadline_ms, int64_t* time_piggy_bank) - : TimeLimitStopper(deadline_ms), time_piggy_bank_(time_piggy_bank) {} - virtual void OnSearchDone(const IterationStats& stats) override { - *time_piggy_bank_ += GetTimeLimitMs() - stats.time_since_movestart; + if (!time_manager) { + throw Exception("Unknown time manager: [" + managers[0] + "]"); } + tm_options.CheckAllOptionsRead(""); - private: - int64_t* const time_piggy_bank_; -}; - -float ComputeEstimatedMovesToGo(int ply, float midpoint, float steepness) { - // An analysis of chess games shows that the distribution of game lengths - // looks like a log-logistic distribution. The mean residual time function - // calculates how many more moves are expected in the game given that we are - // at the current ply. Given that this function can be expensive to compute, - // we calculate the median residual time function instead. This is derived and - // shown to be similar to the mean residual time in "Some Useful Properties of - // Log-Logistic Random Variables for Health Care Simulations" (Clark & - // El-Taha, 2015). - // midpoint: The median length of games. - // steepness: How quickly the function drops off from its maximum value, - // around the midpoint. - const float move = ply / 2.0f; - return midpoint * std::pow(1 + 2 * std::pow(move / midpoint, steepness), - 1 / steepness) - - move; -} - -class LegacyTimeManager : public TimeManager { - public: - void ResetGame() override; - std::unique_ptr GetStopper(const OptionsDict& options, - const GoParams& params, - const Position& position) override; - - private: - std::unique_ptr CreateTimeManagementStopper( - const OptionsDict& options, const GoParams& params, - const Position& position); - // No need to be atomic as only one thread will update it. - int64_t time_spared_ms_ = 0; -}; -} // namespace - -std::unique_ptr MakeLegacyTimeManager() { - return std::make_unique(); -} - -void LegacyTimeManager::ResetGame() { time_spared_ms_ = 0; } - -std::unique_ptr LegacyTimeManager::CreateTimeManagementStopper( - const OptionsDict& options, const GoParams& params, - const Position& position) { - const bool is_black = position.IsBlackToMove(); - const std::optional& time = (is_black ? params.btime : params.wtime); - // If no time limit is given, don't stop on this condition. - if (params.infinite || params.ponder || !time) return nullptr; - - const int64_t move_overhead = options.Get(kMoveOverheadId); - const std::optional& inc = is_black ? params.binc : params.winc; - const int increment = inc ? std::max(int64_t(0), *inc) : 0; - - // How to scale moves time. - const float slowmover = options.Get(kSlowMoverId); - const float time_curve_midpoint = options.Get(kTimeMidpointMoveId); - const float time_curve_steepness = options.Get(kTimeSteepnessId); - - float movestogo = ComputeEstimatedMovesToGo( - position.GetGamePly(), time_curve_midpoint, time_curve_steepness); - - // If the number of moves remaining until the time control are less than - // the estimated number of moves left in the game, then use the number of - // moves until the time control instead. - if (params.movestogo && - *params.movestogo > 0 && // Ignore non-standard uci command. - *params.movestogo < movestogo) { - movestogo = *params.movestogo; - } - - // Total time, including increments, until time control. - auto total_moves_time = - std::max(0.0f, *time + increment * (movestogo - 1) - move_overhead); - - // If there is time spared from previous searches, the `time_to_squander` part - // of it will be used immediately, remove that from planning. - int time_to_squander = 0; - if (time_spared_ms_ > 0) { - time_to_squander = time_spared_ms_ * options.Get(kSpendSavedTimeId); - time_spared_ms_ -= time_to_squander; - total_moves_time -= time_to_squander; - } - - // Evenly split total time between all moves. - float this_move_time = total_moves_time / movestogo; - - // Only extend thinking time with slowmover if smart pruning can potentially - // reduce it. - constexpr int kSmartPruningToleranceMs = 200; - if (slowmover < 1.0 || - this_move_time * slowmover > kSmartPruningToleranceMs) { - this_move_time *= slowmover; - // If time is planned to be overused because of slowmover, remove excess - // of that time from spared time. - time_spared_ms_ -= this_move_time * (slowmover - 1); - } - - LOGFILE << "Budgeted time for the move: " << this_move_time << "ms(+" - << time_to_squander << "ms to squander). Remaining time " << *time - << "ms(-" << move_overhead << "ms overhead)"; - // Use `time_to_squander` time immediately. - this_move_time += time_to_squander; - - // Make sure we don't exceed current time limit with what we calculated. - auto deadline = - std::min(static_cast(this_move_time), *time - move_overhead); - return std::make_unique(deadline, &time_spared_ms_); -} - -std::unique_ptr LegacyTimeManager::GetStopper( - const OptionsDict& options, const GoParams& params, - const Position& position) { - auto result = std::make_unique(); - - // Time management stopper. - result->AddStopper(CreateTimeManagementStopper(options, params, position)); - // All the standard stoppers (go nodes, RAM limit, smart pruning, etc). - PopulateStoppers(result.get(), options, params); - return result; + return MakeCommonTimeManager(std::move(time_manager), options, move_overhead); } } // namespace lczero diff --git a/src/mcts/stoppers/factory.h b/src/mcts/stoppers/factory.h index 13ec147119..a8b0c15c7e 100644 --- a/src/mcts/stoppers/factory.h +++ b/src/mcts/stoppers/factory.h @@ -1,6 +1,6 @@ /* This file is part of Leela Chess Zero. - Copyright (C) 2019 The LCZero Authors + Copyright (C) 2019-2020 The LCZero Authors Leela Chess is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -27,27 +27,17 @@ #pragma once -#include "mcts/stoppers/stoppers.h" +#include "mcts/stoppers/common.h" #include "mcts/stoppers/timemgr.h" #include "utils/optionsdict.h" #include "utils/optionsparser.h" namespace lczero { -// Option ID for a cache size. It's used from multiple places and there's no -// really nice place to declare, so let it be here. -extern const OptionId kNNCacheSizeId; - -enum class RunType { kUci, kSelfplay }; - // Populates UCI/command line flags with time management options. void PopulateTimeManagementOptions(RunType for_what, OptionsParser* options); -// Creates a time management ("Legacy" because it's planned to be replaced). -std::unique_ptr MakeLegacyTimeManager(); - -// Populates KLDGain and SmartPruning stoppers. -void PopulateIntrinsicStoppers(ChainedSearchStopper* stopper, - const OptionsDict& options); +// Creates a new time manager for a new search. +std::unique_ptr MakeTimeManager(const OptionsDict& dict); } // namespace lczero diff --git a/src/mcts/stoppers/legacy.cc b/src/mcts/stoppers/legacy.cc new file mode 100644 index 0000000000..2ad2c764b7 --- /dev/null +++ b/src/mcts/stoppers/legacy.cc @@ -0,0 +1,153 @@ +/* + This file is part of Leela Chess Zero. + Copyright (C) 2020 The LCZero Authors + + Leela Chess is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Leela Chess is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Leela Chess. If not, see . + + Additional permission under GNU GPL version 3 section 7 + + If you modify this Program, or any covered work, by linking or + combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA + Toolkit and the NVIDIA CUDA Deep Neural Network library (or a + modified version of those libraries), containing parts covered by the + terms of the respective license agreement, the licensors of this + Program grant you additional permission to convey the resulting work. +*/ + +#include "mcts/stoppers/stoppers.h" + +namespace lczero { + +namespace { + +class LegacyStopper : public TimeLimitStopper { + public: + LegacyStopper(int64_t deadline_ms, int64_t* time_piggy_bank) + : TimeLimitStopper(deadline_ms), time_piggy_bank_(time_piggy_bank) {} + virtual void OnSearchDone(const IterationStats& stats) override { + *time_piggy_bank_ += GetTimeLimitMs() - stats.time_since_movestart; + } + + private: + int64_t* const time_piggy_bank_; +}; + +class LegacyTimeManager : public TimeManager { + public: + LegacyTimeManager(int64_t move_overhead, const OptionsDict& params) + : move_overhead_(move_overhead), + slowmover_(params.GetOrDefault("slowmover", 1.0f)), + time_curve_midpoint_( + params.GetOrDefault("midpoint-move", 51.5f)), + time_curve_steepness_(params.GetOrDefault("steepness", 7.0f)), + spend_saved_time_(params.GetOrDefault("immediate-use", 1.0f)) {} + std::unique_ptr GetStopper(const GoParams& params, + const Position& position) override; + + private: + const int64_t move_overhead_; + const float slowmover_; + const float time_curve_midpoint_; + const float time_curve_steepness_; + const float spend_saved_time_; + // No need to be atomic as only one thread will update it. + int64_t time_spared_ms_ = 0; +}; + +float ComputeEstimatedMovesToGo(int ply, float midpoint, float steepness) { + // An analysis of chess games shows that the distribution of game lengths + // looks like a log-logistic distribution. The mean residual time function + // calculates how many more moves are expected in the game given that we are + // at the current ply. Given that this function can be expensive to compute, + // we calculate the median residual time function instead. This is derived and + // shown to be similar to the mean residual time in "Some Useful Properties of + // Log-Logistic Random Variables for Health Care Simulations" (Clark & + // El-Taha, 2015). + // midpoint: The median length of games. + // steepness: How quickly the function drops off from its maximum value, + // around the midpoint. + const float move = ply / 2.0f; + return midpoint * std::pow(1 + 2 * std::pow(move / midpoint, steepness), + 1 / steepness) - + move; +} + +std::unique_ptr LegacyTimeManager::GetStopper( + const GoParams& params, const Position& position) { + const bool is_black = position.IsBlackToMove(); + const std::optional& time = (is_black ? params.btime : params.wtime); + // If no time limit is given, don't stop on this condition. + if (params.infinite || params.ponder || !time) return nullptr; + + const std::optional& inc = is_black ? params.binc : params.winc; + const int increment = inc ? std::max(int64_t(0), *inc) : 0; + + float movestogo = ComputeEstimatedMovesToGo( + position.GetGamePly(), time_curve_midpoint_, time_curve_steepness_); + + // If the number of moves remaining until the time control are less than + // the estimated number of moves left in the game, then use the number of + // moves until the time control instead. + if (params.movestogo && + *params.movestogo > 0 && // Ignore non-standard uci command. + *params.movestogo < movestogo) { + movestogo = *params.movestogo; + } + + // Total time, including increments, until time control. + auto total_moves_time = + std::max(0.0f, *time + increment * (movestogo - 1) - move_overhead_); + + // If there is time spared from previous searches, the `time_to_squander` part + // of it will be used immediately, remove that from planning. + int time_to_squander = 0; + if (time_spared_ms_ > 0) { + time_to_squander = time_spared_ms_ * spend_saved_time_; + time_spared_ms_ -= time_to_squander; + total_moves_time -= time_to_squander; + } + + // Evenly split total time between all moves. + float this_move_time = total_moves_time / movestogo; + + // Only extend thinking time with slowmover if smart pruning can potentially + // reduce it. + constexpr int kSmartPruningToleranceMs = 200; + if (slowmover_ < 1.0 || + this_move_time * slowmover_ > kSmartPruningToleranceMs) { + this_move_time *= slowmover_; + // If time is planned to be overused because of slowmover, remove excess + // of that time from spared time. + time_spared_ms_ -= this_move_time * (slowmover_ - 1); + } + + LOGFILE << "Budgeted time for the move: " << this_move_time << "ms(+" + << time_to_squander << "ms to squander). Remaining time " << *time + << "ms(-" << move_overhead_ << "ms overhead)"; + // Use `time_to_squander` time immediately. + this_move_time += time_to_squander; + + // Make sure we don't exceed current time limit with what we calculated. + auto deadline = + std::min(static_cast(this_move_time), *time - move_overhead_); + return std::make_unique(deadline, &time_spared_ms_); +} + +} // namespace + +std::unique_ptr MakeLegacyTimeManager(int64_t move_overhead, + const OptionsDict& params) { + return std::make_unique(move_overhead, params); +} +} // namespace lczero \ No newline at end of file diff --git a/src/mcts/stoppers/legacy.h b/src/mcts/stoppers/legacy.h new file mode 100644 index 0000000000..9fff527063 --- /dev/null +++ b/src/mcts/stoppers/legacy.h @@ -0,0 +1,37 @@ +/* + This file is part of Leela Chess Zero. + Copyright (C) 2020 The LCZero Authors + + Leela Chess is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Leela Chess is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Leela Chess. If not, see . + + Additional permission under GNU GPL version 3 section 7 + + If you modify this Program, or any covered work, by linking or + combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA + Toolkit and the NVIDIA CUDA Deep Neural Network library (or a + modified version of those libraries), containing parts covered by the + terms of the respective license agreement, the licensors of this + Program grant you additional permission to convey the resulting work. +*/ + +#pragma once + +#include "utils/optionsdict.h" + +namespace lczero { + +std::unique_ptr MakeLegacyTimeManager(int64_t move_overhead, + const OptionsDict& params); + +} // namespace lczero \ No newline at end of file diff --git a/src/mcts/stoppers/timemgr.h b/src/mcts/stoppers/timemgr.h index 6db412b793..ba8554041f 100644 --- a/src/mcts/stoppers/timemgr.h +++ b/src/mcts/stoppers/timemgr.h @@ -30,6 +30,7 @@ #include #include #include + #include "chess/uciloop.h" #include "utils/optionsdict.h" @@ -87,10 +88,8 @@ class SearchStopper { class TimeManager { public: virtual ~TimeManager() = default; - virtual void ResetGame() = 0; virtual std::unique_ptr GetStopper( - const OptionsDict& options, const GoParams& params, - const Position& position) = 0; + const GoParams& params, const Position& position) = 0; }; } // namespace lczero diff --git a/src/selfplay/game.cc b/src/selfplay/game.cc index 84b7f025e3..c5611868fd 100644 --- a/src/selfplay/game.cc +++ b/src/selfplay/game.cc @@ -29,6 +29,7 @@ #include +#include "mcts/stoppers/common.h" #include "mcts/stoppers/factory.h" #include "mcts/stoppers/stoppers.h" #include "neural/writer.h" From fe9a38c596cf83ea53c7b3baf6fbb4d4ea37e905 Mon Sep 17 00:00:00 2001 From: Tilps Date: Thu, 9 Apr 2020 08:38:33 +1000 Subject: [PATCH 097/151] Add moves left effect scaling factors. (#1175) * Add moves left effect scaling factors. * Update to use child Q instead of parent Q for scaling. --- src/mcts/params.cc | 21 +++++++++++++++++++++ src/mcts/params.h | 11 +++++++++++ src/mcts/search.cc | 8 ++++++-- 3 files changed, 38 insertions(+), 2 deletions(-) diff --git a/src/mcts/params.cc b/src/mcts/params.cc index e2cc22cd4c..17eb3c4aef 100644 --- a/src/mcts/params.cc +++ b/src/mcts/params.cc @@ -218,6 +218,18 @@ const OptionId SearchParams::kMovesLeftSlopeId{ "based on how many moves the move is estimated to shorten/lengthen the " "game. The move difference is multiplied with the slope and capped at " "MovesLeftMaxEffect."}; +const OptionId SearchParams::kMovesLeftConstantFactorId{ + "moves-left-constant-factor", "MovesLeftConstantFactor", + "A simple multiplier to the moves left effect, can be set to 0 to only use " + "an effect scaled by Q."}; +const OptionId SearchParams::kMovesLeftScaledFactorId{ + "moves-left-scaled-factor", "MovesLeftScaledFactor", + "A factor which is multiplied by the absolute Q of parent node and the " + "base moves left effect."}; +const OptionId SearchParams::kMovesLeftQuadraticFactorId{ + "moves-left-quadratic-factor", "MovesLeftQuadraticFactor", + "A factor which is multiplied by the square of Q of parent node and the " + "base moves left effect."}; const OptionId SearchParams::kShortSightednessId{ "short-sightedness", "ShortSightedness", "Used to focus more on short term gains over long term."}; @@ -293,6 +305,9 @@ void SearchParams::Populate(OptionsParser* options) { options->Add(kMovesLeftMaxEffectId, 0.0f, 1.0f) = 0.0f; options->Add(kMovesLeftThresholdId, 0.0f, 1.0f) = 1.0f; options->Add(kMovesLeftSlopeId, 0.0f, 1.0f) = 0.001f; + options->Add(kMovesLeftConstantFactorId, 0.0f, 1.0f) = 1.0f; + options->Add(kMovesLeftScaledFactorId, 0.0f, 1.0f) = 0.0f; + options->Add(kMovesLeftQuadraticFactorId, 0.0f, 1.0f) = 0.0f; options->Add(kShortSightednessId, 0.0f, 1.0f) = 0.0f; options->Add(kDisplayCacheUsageId) = false; options->Add(kMaxConcurrentSearchersId, 0, 128) = 1; @@ -306,6 +321,9 @@ void SearchParams::Populate(OptionsParser* options) { options->HideOption(kLogLiveStatsId); options->HideOption(kDisplayCacheUsageId); options->HideOption(kRootHasOwnCpuctParamsId); + options->HideOption(kMovesLeftConstantFactorId); + options->HideOption(kMovesLeftScaledFactorId); + options->HideOption(kMovesLeftQuadraticFactorId); } SearchParams::SearchParams(const OptionsDict& options) @@ -346,6 +364,9 @@ SearchParams::SearchParams(const OptionsDict& options) kMovesLeftMaxEffect(options.Get(kMovesLeftMaxEffectId)), kMovesLeftThreshold(options.Get(kMovesLeftThresholdId)), kMovesLeftSlope(options.Get(kMovesLeftSlopeId)), + kMovesLeftConstantFactor(options.Get(kMovesLeftConstantFactorId)), + kMovesLeftScaledFactor(options.Get(kMovesLeftScaledFactorId)), + kMovesLeftQuadraticFactor(options.Get(kMovesLeftQuadraticFactorId)), kShortSightedness(options.Get(kShortSightednessId)), kDisplayCacheUsage(options.Get(kDisplayCacheUsageId)), kMaxConcurrentSearchers(options.Get(kMaxConcurrentSearchersId)), diff --git a/src/mcts/params.h b/src/mcts/params.h index f547e316b6..4ed15a71f8 100644 --- a/src/mcts/params.h +++ b/src/mcts/params.h @@ -96,6 +96,11 @@ class SearchParams { float GetMovesLeftMaxEffect() const { return kMovesLeftMaxEffect; } float GetMovesLeftThreshold() const { return kMovesLeftThreshold; } float GetMovesLeftSlope() const { return kMovesLeftSlope; } + float GetMovesLeftConstantFactor() const { return kMovesLeftConstantFactor; } + float GetMovesLeftScaledFactor() const { return kMovesLeftScaledFactor; } + float GetMovesLeftQuadraticFactor() const { + return kMovesLeftQuadraticFactor; + } bool GetDisplayCacheUsage() const { return kDisplayCacheUsage; } int GetMaxConcurrentSearchers() const { return kMaxConcurrentSearchers; } float GetSidetomoveDrawScore() const { return kDrawScoreSidetomove; } @@ -143,6 +148,9 @@ class SearchParams { static const OptionId kHistoryFillId; static const OptionId kMovesLeftMaxEffectId; static const OptionId kMovesLeftThresholdId; + static const OptionId kMovesLeftConstantFactorId; + static const OptionId kMovesLeftScaledFactorId; + static const OptionId kMovesLeftQuadraticFactorId; static const OptionId kMovesLeftSlopeId; static const OptionId kShortSightednessId; static const OptionId kDisplayCacheUsageId; @@ -186,6 +194,9 @@ class SearchParams { const float kMovesLeftMaxEffect; const float kMovesLeftThreshold; const float kMovesLeftSlope; + const float kMovesLeftConstantFactor; + const float kMovesLeftScaledFactor; + const float kMovesLeftQuadraticFactor; const float kShortSightedness; const bool kDisplayCacheUsage; const int kMaxConcurrentSearchers; diff --git a/src/mcts/search.cc b/src/mcts/search.cc index bb4c3c65a3..358a900c3c 100644 --- a/src/mcts/search.cc +++ b/src/mcts/search.cc @@ -1047,6 +1047,7 @@ SearchWorker::NodeToProcess SearchWorker::PickNodeToExtend( } } + const float Q = child.GetQ(fpu, draw_score, params_.GetLogitQ()); float M = 0.0f; if (do_moves_left_adjustment) { const float m_slope = params_.GetMovesLeftSlope(); @@ -1054,10 +1055,13 @@ SearchWorker::NodeToProcess SearchWorker::PickNodeToExtend( const float parent_m = node->GetM(); const float child_m = child.GetM(parent_m); M = std::clamp(m_slope * (child_m - parent_m), -m_cap, m_cap) * - std::copysign(1.0f, node_q); + std::copysign(1.0f, -Q); + const float a = params_.GetMovesLeftConstantFactor(); + const float b = params_.GetMovesLeftScaledFactor(); + const float c = params_.GetMovesLeftQuadraticFactor(); + M *= a + b * std::abs(Q) + c * Q * Q; } - const float Q = child.GetQ(fpu, draw_score, params_.GetLogitQ()); const float score = child.GetU(puct_mult) + Q + M; if (score > best) { second_best = best; From 9c12df09fd96c4660f19d92a60491c21914b3df5 Mon Sep 17 00:00:00 2001 From: Tilps Date: Thu, 9 Apr 2020 15:30:50 +1000 Subject: [PATCH 098/151] Add input format type 3 support to encoder/training data/search. (#1162) * Add input format type 3 support to encoder. * No horizontal transform for castling. * wopos. * Point at updated submodule. * Propigate transform a bit further. * Fix test. * Another try optimizing TranspoeBitsInBytes * cleanup some deadcode. * Transform probabilities. Cache hits currently broken. * Fix the cache hit case. * Some fixes for crash in prefetch path. * Send transform in the training data for rescorer to use. * Centralize some bit manipulation into bititer.h * Fix encoding enpassant for format 3. * Some basic testing. * Some basic testing of an endgame position. * Bug fix with flip transform and test that caught it. Also some extra u's on constants to be safe. * A test for king on diagonal transpose. * Formatting. * Formatting. * Formatting. * Fix bug. * Add an enum to improve readability. * Review feedback. * Save some lines to make the function less bulky looking. * Make enum names less 'common' to reduce collision concerns --- libs/lczero-common | 2 +- src/benchmark/backendbench.cc | 2 +- src/chess/bitboard.cc | 23 +- src/chess/bitboard.h | 13 +- src/mcts/node.cc | 48 ++-- src/mcts/search.cc | 48 ++-- src/mcts/search.h | 3 +- src/neural/encoder.cc | 141 +++++++++++- src/neural/encoder.h | 6 +- src/neural/encoder_test.cc | 404 +++++++++++++++++++++++++++++++++- src/utils/bititer.h | 35 +++ 11 files changed, 665 insertions(+), 60 deletions(-) diff --git a/libs/lczero-common b/libs/lczero-common index 5b8667e4ab..a6539d613c 160000 --- a/libs/lczero-common +++ b/libs/lczero-common @@ -1 +1 @@ -Subproject commit 5b8667e4ab51e18b2ea26ac221723d6dd8f95533 +Subproject commit a6539d613cf9076a0423b776dd5d03fc2ec0c3ac diff --git a/src/benchmark/backendbench.cc b/src/benchmark/backendbench.cc index 5fb7bbee94..951e03962e 100644 --- a/src/benchmark/backendbench.cc +++ b/src/benchmark/backendbench.cc @@ -75,7 +75,7 @@ void BackendBenchmark::Run() { for (int k = 0; k < i; k++) { computation->AddInput(EncodePositionForNN( network->GetCapabilities().input_format, - tree.GetPositionHistory(), 8, FillEmptyHistory::ALWAYS)); + tree.GetPositionHistory(), 8, FillEmptyHistory::ALWAYS, nullptr)); } computation->ComputeBlocking(); } diff --git a/src/chess/bitboard.cc b/src/chess/bitboard.cc index 3a58290604..0399556fbf 100644 --- a/src/chess/bitboard.cc +++ b/src/chess/bitboard.cc @@ -281,6 +281,19 @@ const int kKingCastleIndex = kMoveToIdx[BoardSquare("e1").as_int() * 64 + BoardSquare("h1").as_int()]; const int kQueenCastleIndex = kMoveToIdx[BoardSquare("e1").as_int() * 64 + BoardSquare("a1").as_int()]; + +BoardSquare Transform(BoardSquare sq, int transform) { + if ((transform & FlipTransform) != 0) { + sq.set(sq.row(), 7 - sq.col()); + } + if ((transform & MirrorTransform) != 0) { + sq.set(7 - sq.row(), sq.col()); + } + if ((transform & TransposeTransform) != 0) { + sq.set(7 - sq.col(), 7 - sq.row()); + } + return sq; +} } // namespace Move::Move(const std::string& str, bool black) { @@ -321,6 +334,14 @@ uint16_t Move::as_packed_int() const { } } -uint16_t Move::as_nn_index() const { return kMoveToIdx[as_packed_int()]; } +uint16_t Move::as_nn_index(int transform) const { + if (transform == 0) { + return kMoveToIdx[as_packed_int()]; + } + Move transformed = *this; + transformed.SetTo(Transform(to(), transform)); + transformed.SetFrom(Transform(from(), transform)); + return transformed.as_nn_index(0); +} } // namespace lczero diff --git a/src/chess/bitboard.h b/src/chess/bitboard.h index 3f478240ae..63e77559b1 100644 --- a/src/chess/bitboard.h +++ b/src/chess/bitboard.h @@ -165,14 +165,7 @@ class BitBoard { bool intersects(const BitBoard& other) const { return board_ & other.board_; } // Flips black and white side of a board. - void Mirror() { - board_ = (board_ & 0x00000000FFFFFFFF) << 32 | - (board_ & 0xFFFFFFFF00000000) >> 32; - board_ = (board_ & 0x0000FFFF0000FFFF) << 16 | - (board_ & 0xFFFF0000FFFF0000) >> 16; - board_ = - (board_ & 0x00FF00FF00FF00FF) << 8 | (board_ & 0xFF00FF00FF00FF00) >> 8; - } + void Mirror() { board_ = ReverseBytesInBytes(board_); } bool operator==(const BitBoard& other) const { return board_ == other.board_; @@ -261,7 +254,9 @@ class Move { uint16_t as_packed_int() const; // 0 .. 1857, to use in neural networks. - uint16_t as_nn_index() const; + // Transform is a bit field which describes a transform to be applied to the + // the move before converting it to an index. + uint16_t as_nn_index(int transform) const; explicit operator bool() const { return data_ != 0; } bool operator==(const Move& other) { return data_ == other.data_; } diff --git a/src/mcts/node.cc b/src/mcts/node.cc index db43d43085..0fd185e4c4 100644 --- a/src/mcts/node.cc +++ b/src/mcts/node.cc @@ -321,16 +321,6 @@ void Node::ReleaseChildrenExceptOne(Node* node_to_save) { if (!child_) edges_ = EdgeList(); // Clear edges list. } -namespace { -// Reverse bits in every byte of a number -uint64_t ReverseBitsInBytes(uint64_t v) { - v = ((v >> 1) & 0x5555555555555555ull) | ((v & 0x5555555555555555ull) << 1); - v = ((v >> 2) & 0x3333333333333333ull) | ((v & 0x3333333333333333ull) << 2); - v = ((v >> 4) & 0x0F0F0F0F0F0F0F0Full) | ((v & 0x0F0F0F0F0F0F0F0Full) << 4); - return v; -} -} // namespace - V5TrainingData Node::GetV5TrainingData( GameResult game_result, const PositionHistory& history, FillEmptyHistory fill_empty_history, @@ -342,6 +332,15 @@ V5TrainingData Node::GetV5TrainingData( result.version = 5; result.input_format = input_format; + // Populate planes. + int transform; + InputPlanes planes = EncodePositionForNN(input_format, history, 8, + fill_empty_history, &transform); + int plane_idx = 0; + for (auto& plane : result.planes) { + plane = ReverseBitsInBytes(planes[plane_idx++].mask); + } + // Populate probabilities. auto total_n = GetChildrenVisits(); // Prevent garbage/invalid training data from being uploaded to server. @@ -355,18 +354,10 @@ V5TrainingData Node::GetV5TrainingData( -1); // Set moves probabilities according to their relative amount of visits. for (const auto& child : Edges()) { - result.probabilities[child.edge()->GetMove().as_nn_index()] = + result.probabilities[child.edge()->GetMove().as_nn_index(transform)] = total_n > 0 ? child.GetN() / static_cast(total_n) : 1; } - // Populate planes. - InputPlanes planes = - EncodePositionForNN(input_format, history, 8, fill_empty_history); - int plane_idx = 0; - for (auto& plane : result.planes) { - plane = ReverseBitsInBytes(planes[plane_idx++].mask); - } - const auto& position = history.Last(); const auto& castlings = position.GetBoard().castlings(); // Populate castlings. @@ -374,7 +365,9 @@ V5TrainingData Node::GetV5TrainingData( uint8_t queen_side = 1; uint8_t king_side = 1; // If frc trained, send the bit mask representing rook position. - if (input_format == pblczero::NetworkFormat::INPUT_112_WITH_CASTLING_PLANE) { + if (input_format == pblczero::NetworkFormat::INPUT_112_WITH_CASTLING_PLANE || + input_format == + pblczero::NetworkFormat::INPUT_112_WITH_CANONICALIZATION) { queen_side <<= castlings.queenside_rook(); king_side <<= castlings.kingside_rook(); } @@ -385,8 +378,19 @@ V5TrainingData Node::GetV5TrainingData( result.castling_them_oo = castlings.they_can_00() ? king_side : 0; // Other params. - result.side_to_move = position.IsBlackToMove() ? 1 : 0; - result.deprecated_move_count = 0; + if (input_format == + pblczero::NetworkFormat::INPUT_112_WITH_CANONICALIZATION) { + result.side_to_move = position.GetBoard().en_passant().as_int() >> 56; + if ((transform & FlipTransform) != 0) { + result.side_to_move = ReverseBitsInBytes(result.side_to_move); + } + // Send transform in deprecated move count so rescorer can reverse it to + // calculate the actual move list from the input data. + result.deprecated_move_count = transform; + } else { + result.side_to_move = position.IsBlackToMove() ? 1 : 0; + result.deprecated_move_count = 0; + } result.rule50_count = position.GetNoCaptureNoPawnPly(); // Game result. diff --git a/src/mcts/search.cc b/src/mcts/search.cc index 358a900c3c..9465a0aecd 100644 --- a/src/mcts/search.cc +++ b/src/mcts/search.cc @@ -275,7 +275,8 @@ std::vector Search::GetVerboseStats(Node* node) const { oss << std::left << std::setw(5) << edge.GetMove(is_black_to_move).as_string(); - oss << " (" << std::setw(4) << edge.GetMove().as_nn_index() << ")"; + // TODO: should this be displaying transformed index? + oss << " (" << std::setw(4) << edge.GetMove().as_nn_index(0) << ")"; oss << " N: " << std::right << std::setw(7) << edge.GetN() << " (+" << std::setw(2) << edge.GetNInFlight() << ") "; @@ -899,7 +900,9 @@ void SearchWorker::GatherMinibatch() { // Only send non-terminal nodes to a neural network. if (!node->IsTerminal()) { picked_node.nn_queried = true; - picked_node.is_cache_hit = AddNodeToComputation(node, true); + int transform; + picked_node.is_cache_hit = AddNodeToComputation(node, true, &transform); + picked_node.probability_transform = transform; } } @@ -1183,17 +1186,31 @@ void SearchWorker::ExtendNode(Node* node) { } // Returns whether node was already in cache. -bool SearchWorker::AddNodeToComputation(Node* node, bool add_if_cached) { +bool SearchWorker::AddNodeToComputation(Node* node, bool add_if_cached, + int* transform_out) { const auto hash = history_.HashLast(params_.GetCacheHistoryLength() + 1); // If already in cache, no need to do anything. if (add_if_cached) { - if (computation_->AddInputByHash(hash)) return true; + if (computation_->AddInputByHash(hash)) { + if (transform_out) { + *transform_out = TransformForPosition( + search_->network_->GetCapabilities().input_format, history_); + } + return true; + } } else { - if (search_->cache_->ContainsKey(hash)) return true; + if (search_->cache_->ContainsKey(hash)) { + if (transform_out) { + *transform_out = TransformForPosition( + search_->network_->GetCapabilities().input_format, history_); + } + return true; + } } + int transform; auto planes = EncodePositionForNN(search_->network_->GetCapabilities().input_format, - history_, 8, params_.GetHistoryFill()); + history_, 8, params_.GetHistoryFill(), &transform); std::vector moves; @@ -1201,7 +1218,7 @@ bool SearchWorker::AddNodeToComputation(Node* node, bool add_if_cached) { // Legal moves are known, use them. moves.reserve(node->GetNumEdges()); for (const auto& edge : node->Edges()) { - moves.emplace_back(edge.GetMove().as_nn_index()); + moves.emplace_back(edge.GetMove().as_nn_index(transform)); } } else { // Cache pseudolegal moves. A bit of a waste, but faster. @@ -1210,11 +1227,12 @@ bool SearchWorker::AddNodeToComputation(Node* node, bool add_if_cached) { moves.reserve(pseudolegal_moves.size()); for (auto iter = pseudolegal_moves.begin(), end = pseudolegal_moves.end(); iter != end; ++iter) { - moves.emplace_back(iter->as_nn_index()); + moves.emplace_back(iter->as_nn_index(transform)); } } computation_->AddInput(hash, std::move(planes), std::move(moves)); + if (transform_out) *transform_out = transform; return false; } @@ -1255,7 +1273,7 @@ int SearchWorker::PrefetchIntoCache(Node* node, int budget, bool is_odd_depth) { // We are in a leaf, which is not yet being processed. if (!node || node->GetNStarted() == 0) { - if (AddNodeToComputation(node, false)) { + if (AddNodeToComputation(node, false, nullptr)) { // Make it return 0 to make it not use the slot, so that the function // tries hard to find something to cache even among unpopular moves. // In practice that slows things down a lot though, as it's not always @@ -1372,14 +1390,16 @@ void SearchWorker::FetchSingleNodeResult(NodeToProcess* node_to_process, // Calculate maximum first. float max_p = -std::numeric_limits::infinity(); for (auto edge : node->Edges()) { - max_p = - std::max(max_p, computation_->GetPVal(idx_in_computation, - edge.GetMove().as_nn_index())); + max_p = std::max(max_p, computation_->GetPVal( + idx_in_computation, + edge.GetMove().as_nn_index( + node_to_process->probability_transform))); } float total = 0.0; for (auto edge : node->Edges()) { - float p = - computation_->GetPVal(idx_in_computation, edge.GetMove().as_nn_index()); + float p = computation_->GetPVal( + idx_in_computation, + edge.GetMove().as_nn_index(node_to_process->probability_transform)); // Perform softmax and take into account policy softmax temperature T. // Note that we want to calculate (exp(p-max_p))^(1/T) = exp((p-max_p)/T). p = FastExp((p - max_p) / params_.GetPolicySoftmaxTemp()); diff --git a/src/mcts/search.h b/src/mcts/search.h index 86bbed8633..59b1bdce0e 100644 --- a/src/mcts/search.h +++ b/src/mcts/search.h @@ -285,6 +285,7 @@ class SearchWorker { bool nn_queried = false; bool is_cache_hit = false; bool is_collision = false; + int probability_transform = 0; static NodeToProcess Collision(Node* node, uint16_t depth, int collision_count) { @@ -304,7 +305,7 @@ class SearchWorker { NodeToProcess PickNodeToExtend(int collision_limit); void ExtendNode(Node* node); - bool AddNodeToComputation(Node* node, bool add_if_cached); + bool AddNodeToComputation(Node* node, bool add_if_cached, int* transform_out); int PrefetchIntoCache(Node* node, int budget, bool is_odd_depth); void FetchSingleNodeResult(NodeToProcess* node_to_process, int idx_in_computation); diff --git a/src/neural/encoder.cc b/src/neural/encoder.cc index ee1797475c..5386580ed9 100644 --- a/src/neural/encoder.cc +++ b/src/neural/encoder.cc @@ -35,17 +35,108 @@ namespace { const int kMoveHistory = 8; const int kPlanesPerBoard = 13; const int kAuxPlaneBase = kPlanesPerBoard * kMoveHistory; + +int CompareTransposing(BitBoard board, int initial_transform) { + uint64_t value = board.as_int(); + if ((initial_transform & FlipTransform) != 0) { + value = ReverseBitsInBytes(value); + } + if ((initial_transform & MirrorTransform) != 0) { + value = ReverseBytesInBytes(value); + } + auto alternative = TransposeBitsInBytes(value); + if (value < alternative) return -1; + if (value > alternative) return 1; + return 0; +} + +int ChooseTransform(const ChessBoard& board) { + // If there are any castling options no transform is valid. + // Even using FRC rules, king and queen side castle moves are not symmetrical. + if (!board.castlings().no_legal_castle()) { + return 0; + } + auto our_king = (board.kings() & board.ours()).as_int(); + int transform = NoTransform; + if ((our_king & 0x0F0F0F0F0F0F0F0FULL) != 0) { + transform |= FlipTransform; + our_king = ReverseBitsInBytes(our_king); + } + // If there are any pawns only horizontal flip is valid. + if (board.pawns().as_int() != 0) { + return transform; + } + if ((our_king & 0xFFFFFFFF00000000ULL) != 0) { + transform |= MirrorTransform; + our_king = ReverseBytesInBytes(our_king); + } + // Our king is now always in bottom right quadrant. + // Transpose for king in top right triangle, or if on diagonal whichever has + // the smaller integer value for each test scenario. + if ((our_king & 0xE0C08000ULL) != 0) { + transform |= TransposeTransform; + } else if ((our_king & 0x10204080ULL) != 0) { + auto outcome = CompareTransposing(board.ours() | board.theirs(), transform); + if (outcome == -1) return transform; + if (outcome == 1) return transform | TransposeTransform; + outcome = CompareTransposing(board.ours(), transform); + if (outcome == -1) return transform; + if (outcome == 1) return transform | TransposeTransform; + outcome = CompareTransposing(board.kings(), transform); + if (outcome == -1) return transform; + if (outcome == 1) return transform | TransposeTransform; + outcome = CompareTransposing(board.queens(), transform); + if (outcome == -1) return transform; + if (outcome == 1) return transform | TransposeTransform; + outcome = CompareTransposing(board.rooks(), transform); + if (outcome == -1) return transform; + if (outcome == 1) return transform | TransposeTransform; + outcome = CompareTransposing(board.knights(), transform); + if (outcome == -1) return transform; + if (outcome == 1) return transform | TransposeTransform; + outcome = CompareTransposing(board.bishops(), transform); + if (outcome == -1) return transform; + if (outcome == 1) return transform | TransposeTransform; + // If all piece types are symmetrical and ours is symmetrical and + // ours+theirs is symmetrical, everything is symmetrical, so transpose is a + // no-op. + } + return transform; +} } // namespace +int TransformForPosition(pblczero::NetworkFormat::InputFormat input_format, + const PositionHistory& history) { + if (input_format != + pblczero::NetworkFormat::INPUT_112_WITH_CANONICALIZATION) { + return 0; + } + const ChessBoard& board = history.Last().GetBoard(); + return ChooseTransform(board); +} + InputPlanes EncodePositionForNN( pblczero::NetworkFormat::InputFormat input_format, const PositionHistory& history, int history_planes, - FillEmptyHistory fill_empty_history) { + FillEmptyHistory fill_empty_history, int* transform_out) { InputPlanes result(kAuxPlaneBase + 8); + int transform = 0; + // Canonicalization format needs to stop early to avoid applying transform in + // history across incompatible transitions. It is also more canonical since + // history before these points is not relevant to the final result. + bool stop_early = + input_format == pblczero::NetworkFormat::INPUT_112_WITH_CANONICALIZATION; + // When stopping early, we want to know if castlings has changed, so capture + // it for the first board. + ChessBoard::Castlings castlings; { const ChessBoard& board = history.Last().GetBoard(); const bool we_are_black = board.flipped(); + if (input_format == + pblczero::NetworkFormat::INPUT_112_WITH_CANONICALIZATION) { + transform = ChooseTransform(board); + } switch (input_format) { case pblczero::NetworkFormat::INPUT_CLASSICAL_112_PLANE: { // "Legacy" input planes with: @@ -62,8 +153,10 @@ InputPlanes EncodePositionForNN( break; } - case pblczero::NetworkFormat::INPUT_112_WITH_CASTLING_PLANE: { - // - Plane 104 for positions of rooks (both white and black) which have + case pblczero::NetworkFormat::INPUT_112_WITH_CASTLING_PLANE: + case pblczero::NetworkFormat::INPUT_112_WITH_CANONICALIZATION: { + // - Plane 104 for positions of rooks (both white and black) which + // have // a-side (queenside) castling right. // - Plane 105 for positions of rooks (both white and black) which have // h-side (kingside) castling right. @@ -78,18 +171,24 @@ InputPlanes EncodePositionForNN( << cast.kingside_rook(); break; } - default: throw Exception("Unsupported input plane encoding " + std::to_string(input_format)); }; - if (we_are_black) result[kAuxPlaneBase + 4].SetAll(); + if (input_format == + pblczero::NetworkFormat::INPUT_112_WITH_CANONICALIZATION) { + result[kAuxPlaneBase + 4].mask = board.en_passant().as_int(); + } else { + if (we_are_black) result[kAuxPlaneBase + 4].SetAll(); + } result[kAuxPlaneBase + 5].Fill(history.Last().GetNoCaptureNoPawnPly()); // Plane kAuxPlaneBase + 6 used to be movecount plane, now it's all zeros. // Plane kAuxPlaneBase + 7 is all ones to help NN find board edges. result[kAuxPlaneBase + 7].SetAll(); + if (stop_early) { + castlings = board.castlings(); + } } - bool flip = false; int history_idx = history.GetLength() - 1; for (int i = 0; i < std::min(history_planes, kMoveHistory); @@ -98,6 +197,14 @@ InputPlanes EncodePositionForNN( history.GetPositionAt(history_idx < 0 ? 0 : history_idx); const ChessBoard& board = flip ? position.GetThemBoard() : position.GetBoard(); + // Castling changes can't be repeated, so we can stop early. + if (stop_early && board.castlings().as_int() != castlings.as_int()) break; + // Enpassants can't be repeated, but we do need to always send the current + // position. + if (stop_early && history_idx != history.GetLength() - 1 && + !board.en_passant().empty()) { + break; + } if (history_idx < 0 && fill_empty_history == FillEmptyHistory::NO) break; // Board may be flipped so compare with position.GetBoard(). if (history_idx < 0 && fill_empty_history == FillEmptyHistory::FEN_ONLY && @@ -136,8 +243,28 @@ InputPlanes EncodePositionForNN( } } if (history_idx > 0) flip = !flip; + // If no capture no pawn is 0, the previous was start of game, capture or + // pawn push, so no need to go back further if stopping early. + if (stop_early && position.GetNoCaptureNoPawnPly() == 0) break; } - + if (transform != NoTransform) { + // Transform all masks. + for (int i = 0; i <= kAuxPlaneBase + 4; i++) { + auto v = result[i].mask; + if (v == 0 || v == ~0ULL) continue; + if ((transform & FlipTransform) != 0) { + v = ReverseBitsInBytes(v); + } + if ((transform & MirrorTransform) != 0) { + v = ReverseBytesInBytes(v); + } + if ((transform & TransposeTransform) != 0) { + v = TransposeBitsInBytes(v); + } + result[i].mask = v; + } + } + if (transform_out) *transform_out = transform; return result; } diff --git a/src/neural/encoder.h b/src/neural/encoder.h index 22f4e0fed4..25d74812e5 100644 --- a/src/neural/encoder.h +++ b/src/neural/encoder.h @@ -35,10 +35,14 @@ namespace lczero { enum class FillEmptyHistory { NO, FEN_ONLY, ALWAYS }; +// Returns the transform that would be used in EncodePositionForNN. +int TransformForPosition(pblczero::NetworkFormat::InputFormat input_format, + const PositionHistory& history); + // Encodes the last position in history for the neural network request. InputPlanes EncodePositionForNN( pblczero::NetworkFormat::InputFormat input_format, const PositionHistory& history, int history_planes, - FillEmptyHistory fill_empty_history); + FillEmptyHistory fill_empty_history, int* transform_out); } // namespace lczero diff --git a/src/neural/encoder_test.cc b/src/neural/encoder_test.cc index 0eb9f2184d..d41074479f 100644 --- a/src/neural/encoder_test.cc +++ b/src/neural/encoder_test.cc @@ -32,7 +32,7 @@ TEST(EncodePositionForNN, EncodeStartPosition) { InputPlanes encoded_planes = EncodePositionForNN(pblczero::NetworkFormat::INPUT_CLASSICAL_112_PLANE, - history, 8, FillEmptyHistory::NO); + history, 8, FillEmptyHistory::NO, nullptr); InputPlane our_pawns_plane = encoded_planes[0]; auto our_pawns_mask = 0ull; @@ -72,6 +72,14 @@ TEST(EncodePositionForNN, EncodeStartPosition) { 1ull << (8 * their_king_row + their_king_col)); EXPECT_EQ(their_king_plane.value, 1.0f); + // Start of game, no history. + for (int i = 0; i < 7; i++) { + for (int j = 0; j < 13; j++) { + InputPlane zeroed_history = encoded_planes[13 + i * 13 + j]; + EXPECT_EQ(zeroed_history.mask, 0ull); + } + } + // Auxiliary planes // It's the start of the game, so all castlings should be allowed. @@ -97,6 +105,184 @@ TEST(EncodePositionForNN, EncodeStartPosition) { EXPECT_EQ(all_ones_plane.value, 1.0f); } +TEST(EncodePositionForNN, EncodeStartPositionFormat2) { + ChessBoard board; + PositionHistory history; + board.SetFromFen(ChessBoard::kStartposFen); + history.Reset(board, 0, 1); + + InputPlanes encoded_planes = EncodePositionForNN( + pblczero::NetworkFormat::INPUT_112_WITH_CASTLING_PLANE, history, 8, + FillEmptyHistory::NO, nullptr); + + InputPlane our_pawns_plane = encoded_planes[0]; + auto our_pawns_mask = 0ull; + for (auto i = 0; i < 8; i++) { + // First pawn is at square a2 (position 8) + // Last pawn is at square h2 (position 8 + 7 = 15) + our_pawns_mask |= 1ull << (8 + i); + } + EXPECT_EQ(our_pawns_plane.mask, our_pawns_mask); + EXPECT_EQ(our_pawns_plane.value, 1.0f); + + InputPlane our_knights_plane = encoded_planes[1]; + EXPECT_EQ(our_knights_plane.mask, (1ull << 1) | (1ull << 6)); + EXPECT_EQ(our_knights_plane.value, 1.0f); + + InputPlane our_bishops_plane = encoded_planes[2]; + EXPECT_EQ(our_bishops_plane.mask, (1ull << 2) | (1ull << 5)); + EXPECT_EQ(our_bishops_plane.value, 1.0f); + + InputPlane our_rooks_plane = encoded_planes[3]; + EXPECT_EQ(our_rooks_plane.mask, 1ull | (1ull << 7)); + EXPECT_EQ(our_rooks_plane.value, 1.0f); + + InputPlane our_queens_plane = encoded_planes[4]; + EXPECT_EQ(our_queens_plane.mask, 1ull << 3); + EXPECT_EQ(our_queens_plane.value, 1.0f); + + InputPlane our_king_plane = encoded_planes[5]; + EXPECT_EQ(our_king_plane.mask, 1ull << 4); + EXPECT_EQ(our_king_plane.value, 1.0f); + + // Sanity check opponent's pieces + InputPlane their_king_plane = encoded_planes[11]; + auto their_king_row = 7; + auto their_king_col = 4; + EXPECT_EQ(their_king_plane.mask, + 1ull << (8 * their_king_row + their_king_col)); + EXPECT_EQ(their_king_plane.value, 1.0f); + + // Start of game, no history. + for (int i = 0; i < 7; i++) { + for (int j = 0; j < 13; j++) { + InputPlane zeroed_history = encoded_planes[13 + i * 13 + j]; + EXPECT_EQ(zeroed_history.mask, 0ull); + } + } + + // Auxiliary planes + + // Queen side castling at game start. + InputPlane can_castle_plane = encoded_planes[13 * 8 + 0]; + EXPECT_EQ(can_castle_plane.mask, 1ull | (1ull << 56)); + EXPECT_EQ(can_castle_plane.value, 1.0f); + // king side castling at game start. + can_castle_plane = encoded_planes[13 * 8 + 1]; + EXPECT_EQ(can_castle_plane.mask, 1ull << 7 | (1ull << 63)); + EXPECT_EQ(can_castle_plane.value, 1.0f); + + // Zeroed castling planes. + InputPlane zeroed_castling_plane = encoded_planes[13 * 8 + 2]; + EXPECT_EQ(zeroed_castling_plane.mask, 0ull); + zeroed_castling_plane = encoded_planes[13 * 8 + 3]; + EXPECT_EQ(zeroed_castling_plane.mask, 0ull); + + InputPlane we_are_black_plane = encoded_planes[13 * 8 + 4]; + EXPECT_EQ(we_are_black_plane.mask, 0ull); + + InputPlane fifty_move_counter_plane = encoded_planes[13 * 8 + 5]; + EXPECT_EQ(fifty_move_counter_plane.mask, kAllSquaresMask); + EXPECT_EQ(fifty_move_counter_plane.value, 0.0f); + + // We no longer encode the move count, so that plane should be all zeros + InputPlane zeroed_move_count_plane = encoded_planes[13 * 8 + 6]; + EXPECT_EQ(zeroed_move_count_plane.mask, 0ull); + + InputPlane all_ones_plane = encoded_planes[13 * 8 + 7]; + EXPECT_EQ(all_ones_plane.mask, kAllSquaresMask); + EXPECT_EQ(all_ones_plane.value, 1.0f); +} + +TEST(EncodePositionForNN, EncodeStartPositionFormat3) { + ChessBoard board; + PositionHistory history; + board.SetFromFen(ChessBoard::kStartposFen); + history.Reset(board, 0, 1); + + InputPlanes encoded_planes = EncodePositionForNN( + pblczero::NetworkFormat::INPUT_112_WITH_CANONICALIZATION, history, 8, + FillEmptyHistory::NO, nullptr); + + InputPlane our_pawns_plane = encoded_planes[0]; + auto our_pawns_mask = 0ull; + for (auto i = 0; i < 8; i++) { + // First pawn is at square a2 (position 8) + // Last pawn is at square h2 (position 8 + 7 = 15) + our_pawns_mask |= 1ull << (8 + i); + } + EXPECT_EQ(our_pawns_plane.mask, our_pawns_mask); + EXPECT_EQ(our_pawns_plane.value, 1.0f); + + InputPlane our_knights_plane = encoded_planes[1]; + EXPECT_EQ(our_knights_plane.mask, (1ull << 1) | (1ull << 6)); + EXPECT_EQ(our_knights_plane.value, 1.0f); + + InputPlane our_bishops_plane = encoded_planes[2]; + EXPECT_EQ(our_bishops_plane.mask, (1ull << 2) | (1ull << 5)); + EXPECT_EQ(our_bishops_plane.value, 1.0f); + + InputPlane our_rooks_plane = encoded_planes[3]; + EXPECT_EQ(our_rooks_plane.mask, 1ull | (1ull << 7)); + EXPECT_EQ(our_rooks_plane.value, 1.0f); + + InputPlane our_queens_plane = encoded_planes[4]; + EXPECT_EQ(our_queens_plane.mask, 1ull << 3); + EXPECT_EQ(our_queens_plane.value, 1.0f); + + InputPlane our_king_plane = encoded_planes[5]; + EXPECT_EQ(our_king_plane.mask, 1ull << 4); + EXPECT_EQ(our_king_plane.value, 1.0f); + + // Sanity check opponent's pieces + InputPlane their_king_plane = encoded_planes[11]; + auto their_king_row = 7; + auto their_king_col = 4; + EXPECT_EQ(their_king_plane.mask, + 1ull << (8 * their_king_row + their_king_col)); + EXPECT_EQ(their_king_plane.value, 1.0f); + + // Start of game, no history. + for (int i = 0; i < 7; i++) { + for (int j = 0; j < 13; j++) { + InputPlane zeroed_history = encoded_planes[13 + i * 13 + j]; + EXPECT_EQ(zeroed_history.mask, 0ull); + } + } + + // Auxiliary planes + + // Queen side castling at game start. + InputPlane can_castle_plane = encoded_planes[13 * 8 + 0]; + EXPECT_EQ(can_castle_plane.mask, 1ull | (1ull << 56)); + EXPECT_EQ(can_castle_plane.value, 1.0f); + // king side castling at game start. + can_castle_plane = encoded_planes[13 * 8 + 1]; + EXPECT_EQ(can_castle_plane.mask, 1ull << 7 | (1ull << 63)); + EXPECT_EQ(can_castle_plane.value, 1.0f); + + // Zeroed castling planes. + InputPlane zeroed_castling_plane = encoded_planes[13 * 8 + 2]; + EXPECT_EQ(zeroed_castling_plane.mask, 0ull); + zeroed_castling_plane = encoded_planes[13 * 8 + 3]; + EXPECT_EQ(zeroed_castling_plane.mask, 0ull); + + InputPlane enpassant_plane = encoded_planes[13 * 8 + 4]; + EXPECT_EQ(enpassant_plane.mask, 0ull); + + InputPlane fifty_move_counter_plane = encoded_planes[13 * 8 + 5]; + EXPECT_EQ(fifty_move_counter_plane.mask, kAllSquaresMask); + EXPECT_EQ(fifty_move_counter_plane.value, 0.0f); + + // We no longer encode the move count, so that plane should be all zeros + InputPlane zeroed_move_count_plane = encoded_planes[13 * 8 + 6]; + EXPECT_EQ(zeroed_move_count_plane.mask, 0ull); + + InputPlane all_ones_plane = encoded_planes[13 * 8 + 7]; + EXPECT_EQ(all_ones_plane.mask, kAllSquaresMask); + EXPECT_EQ(all_ones_plane.value, 1.0f); +} + TEST(EncodePositionForNN, EncodeFiftyMoveCounter) { ChessBoard board; PositionHistory history; @@ -108,7 +294,7 @@ TEST(EncodePositionForNN, EncodeFiftyMoveCounter) { InputPlanes encoded_planes = EncodePositionForNN(pblczero::NetworkFormat::INPUT_CLASSICAL_112_PLANE, - history, 8, FillEmptyHistory::NO); + history, 8, FillEmptyHistory::NO, nullptr); InputPlane we_are_black_plane = encoded_planes[13 * 8 + 4]; EXPECT_EQ(we_are_black_plane.mask, kAllSquaresMask); @@ -123,7 +309,7 @@ TEST(EncodePositionForNN, EncodeFiftyMoveCounter) { encoded_planes = EncodePositionForNN(pblczero::NetworkFormat::INPUT_CLASSICAL_112_PLANE, - history, 8, FillEmptyHistory::NO); + history, 8, FillEmptyHistory::NO, nullptr); we_are_black_plane = encoded_planes[13 * 8 + 4]; EXPECT_EQ(we_are_black_plane.mask, 0ull); @@ -133,6 +319,218 @@ TEST(EncodePositionForNN, EncodeFiftyMoveCounter) { EXPECT_EQ(fifty_move_counter_plane.value, 2.0f); } +TEST(EncodePositionForNN, EncodeFiftyMoveCounterFormat3) { + ChessBoard board; + PositionHistory history; + board.SetFromFen(ChessBoard::kStartposFen); + history.Reset(board, 0, 1); + + // 1. Nf3 + history.Append(Move("g1f3", false)); + + InputPlanes encoded_planes = EncodePositionForNN( + pblczero::NetworkFormat::INPUT_112_WITH_CANONICALIZATION, history, 8, + FillEmptyHistory::NO, nullptr); + + InputPlane enpassant_plane = encoded_planes[13 * 8 + 4]; + EXPECT_EQ(enpassant_plane.mask, 0ull); + + InputPlane fifty_move_counter_plane = encoded_planes[13 * 8 + 5]; + EXPECT_EQ(fifty_move_counter_plane.mask, kAllSquaresMask); + EXPECT_EQ(fifty_move_counter_plane.value, 1.0f); + + // 1. Nf3 Nf6 + history.Append(Move("g8f6", true)); + + encoded_planes = EncodePositionForNN( + pblczero::NetworkFormat::INPUT_112_WITH_CANONICALIZATION, history, 8, + FillEmptyHistory::NO, nullptr); + + enpassant_plane = encoded_planes[13 * 8 + 4]; + EXPECT_EQ(enpassant_plane.mask, 0ull); + + fifty_move_counter_plane = encoded_planes[13 * 8 + 5]; + EXPECT_EQ(fifty_move_counter_plane.mask, kAllSquaresMask); + EXPECT_EQ(fifty_move_counter_plane.value, 2.0f); +} + +TEST(EncodePositionForNN, EncodeEndGameFormat1) { + ChessBoard board; + PositionHistory history; + board.SetFromFen("3r4/4k3/8/1K6/8/8/8/8 w - - 0 1"); + history.Reset(board, 0, 1); + + int transform; + InputPlanes encoded_planes = + EncodePositionForNN(pblczero::NetworkFormat::INPUT_CLASSICAL_112_PLANE, + history, 8, FillEmptyHistory::NO, &transform); + + EXPECT_EQ(transform, NoTransform); + + InputPlane our_king_plane = encoded_planes[5]; + EXPECT_EQ(our_king_plane.mask, 1ull << 33); + EXPECT_EQ(our_king_plane.value, 1.0f); + InputPlane their_king_plane = encoded_planes[11]; + EXPECT_EQ(their_king_plane.mask, 1ull << 52); + EXPECT_EQ(their_king_plane.value, 1.0f); +} + +TEST(EncodePositionForNN, EncodeEndGameFormat3) { + ChessBoard board; + PositionHistory history; + board.SetFromFen("3r4/4k3/8/1K6/8/8/8/8 w - - 0 1"); + history.Reset(board, 0, 1); + + int transform; + InputPlanes encoded_planes = EncodePositionForNN( + pblczero::NetworkFormat::INPUT_112_WITH_CANONICALIZATION, history, 8, + FillEmptyHistory::NO, &transform); + + EXPECT_EQ(transform, FlipTransform | MirrorTransform | TransposeTransform); + + InputPlane our_king_plane = encoded_planes[5]; + EXPECT_EQ(our_king_plane.mask, 1ull << 12); + EXPECT_EQ(our_king_plane.value, 1.0f); + InputPlane their_king_plane = encoded_planes[11]; + EXPECT_EQ(their_king_plane.mask, 1ull << 38); + EXPECT_EQ(their_king_plane.value, 1.0f); +} + +TEST(EncodePositionForNN, EncodeEndGameKingOnDiagonalFormat3) { + ChessBoard board; + PositionHistory history; + board.SetFromFen("3r4/4k3/2K5/8/8/8/8/8 w - - 0 1"); + history.Reset(board, 0, 1); + + int transform; + InputPlanes encoded_planes = EncodePositionForNN( + pblczero::NetworkFormat::INPUT_112_WITH_CANONICALIZATION, history, 8, + FillEmptyHistory::NO, &transform); + + // After mirroring transforms, our king is on diagonal and other pieces are + // all below the diagonal, so transposing will increase the value of ours | + // theirs. + EXPECT_EQ(transform, FlipTransform | MirrorTransform); + + InputPlane our_king_plane = encoded_planes[5]; + EXPECT_EQ(our_king_plane.mask, 1ull << 21); + EXPECT_EQ(our_king_plane.value, 1.0f); + InputPlane their_king_plane = encoded_planes[11]; + EXPECT_EQ(their_king_plane.mask, 1ull << 11); + EXPECT_EQ(their_king_plane.value, 1.0f); +} + +TEST(EncodePositionForNN, EncodeEnpassantFormat3) { + ChessBoard board; + PositionHistory history; + board.SetFromFen(ChessBoard::kStartposFen); + history.Reset(board, 0, 1); + // Move to en passant. + history.Append(Move("e2e4", false)); + history.Append(Move("g2g3", false)); + history.Append(Move("e4e5", false)); + history.Append(Move("f2f4", false)); + + InputPlanes encoded_planes = EncodePositionForNN( + pblczero::NetworkFormat::INPUT_112_WITH_CANONICALIZATION, history, 8, + FillEmptyHistory::NO, nullptr); + + InputPlane enpassant_plane = encoded_planes[13 * 8 + 4]; + EXPECT_EQ(enpassant_plane.mask, 1ull << 61); + + // Pawn move, no history. + for (int i = 0; i < 7; i++) { + for (int j = 0; j < 13; j++) { + InputPlane zeroed_history = encoded_planes[13 + i * 13 + j]; + EXPECT_EQ(zeroed_history.mask, 0ull); + } + } + + // Boring move. + history.Append(Move("g1f3", false)); + + encoded_planes = EncodePositionForNN( + pblczero::NetworkFormat::INPUT_112_WITH_CANONICALIZATION, history, 8, + FillEmptyHistory::NO, nullptr); + + // No more en passant bit. + enpassant_plane = encoded_planes[13 * 8 + 4]; + EXPECT_EQ(enpassant_plane.mask, 0ull); + + // Previous was en passant, no history. + for (int i = 0; i < 7; i++) { + for (int j = 0; j < 13; j++) { + InputPlane zeroed_history = encoded_planes[13 + i * 13 + j]; + EXPECT_EQ(zeroed_history.mask, 0ull); + } + } + + // Another boring move. + history.Append(Move("g1f3", false)); + + encoded_planes = EncodePositionForNN( + pblczero::NetworkFormat::INPUT_112_WITH_CANONICALIZATION, history, 8, + FillEmptyHistory::NO, nullptr); + + // Should be one plane of history. + for (int i = 0; i < 7; i++) { + for (int j = 0; j < 13; j++) { + InputPlane zeroed_history = encoded_planes[13 + i * 13 + j]; + // 13th plane of first layer is repeats and there are none, so it should + // be empty. + if (i == 0 && j < 12) { + EXPECT_NE(zeroed_history.mask, 0ull); + } else { + EXPECT_EQ(zeroed_history.mask, 0ull); + } + } + } +} + +TEST(EncodePositionForNN, EncodeEarlyGameFlipFormat3) { + ChessBoard board; + PositionHistory history; + board.SetFromFen(ChessBoard::kStartposFen); + history.Reset(board, 0, 1); + // Move to break castling and king offside. + history.Append(Move("e2e4", false)); + history.Append(Move("e2e4", false)); + history.Append(Move("e1e2", false)); + history.Append(Move("e1e2", false)); + history.Append(Move("e2d3", false)); + // Their king offside, but not ours. + + int transform; + InputPlanes encoded_planes = EncodePositionForNN( + pblczero::NetworkFormat::INPUT_112_WITH_CANONICALIZATION, history, 8, + FillEmptyHistory::NO, &transform); + + EXPECT_EQ(transform, NoTransform); + + InputPlane our_king_plane = encoded_planes[5]; + EXPECT_EQ(our_king_plane.mask, 1ull << 12); + EXPECT_EQ(our_king_plane.value, 1.0f); + InputPlane their_king_plane = encoded_planes[11]; + EXPECT_EQ(their_king_plane.mask, 1ull << 43); + EXPECT_EQ(their_king_plane.value, 1.0f); + + history.Append(Move("e2e3", false)); + + // Our king offside, but theirs is not. + encoded_planes = EncodePositionForNN( + pblczero::NetworkFormat::INPUT_112_WITH_CANONICALIZATION, history, 8, + FillEmptyHistory::NO, &transform); + + EXPECT_EQ(transform, FlipTransform); + + our_king_plane = encoded_planes[5]; + EXPECT_EQ(our_king_plane.mask, 1ull << 20); + EXPECT_EQ(our_king_plane.value, 1.0f); + their_king_plane = encoded_planes[11]; + EXPECT_EQ(their_king_plane.mask, 1ull << 43); + EXPECT_EQ(their_king_plane.value, 1.0f); +} + } // namespace lczero int main(int argc, char** argv) { diff --git a/src/utils/bititer.h b/src/utils/bititer.h index 7372706827..f464c0d11f 100644 --- a/src/utils/bititer.h +++ b/src/utils/bititer.h @@ -52,6 +52,41 @@ inline unsigned long GetLowestBit(std::uint64_t value) { #endif } +enum BoardTransform { + NoTransform = 0, + // Horizontal mirror, ReverseBitsInBytes + FlipTransform = 1, + // Vertical mirror, ReverseBytesInBytes + MirrorTransform = 2, + // Diagonal transpose A1 to H8, TransposeBitsInBytes. + TransposeTransform = 4, +}; + +inline uint64_t ReverseBitsInBytes(uint64_t v) { + v = ((v >> 1) & 0x5555555555555555ull) | ((v & 0x5555555555555555ull) << 1); + v = ((v >> 2) & 0x3333333333333333ull) | ((v & 0x3333333333333333ull) << 2); + v = ((v >> 4) & 0x0F0F0F0F0F0F0F0Full) | ((v & 0x0F0F0F0F0F0F0F0Full) << 4); + return v; +} + +inline uint64_t ReverseBytesInBytes(uint64_t v) { + v = (v & 0x00000000FFFFFFFF) << 32 | (v & 0xFFFFFFFF00000000) >> 32; + v = (v & 0x0000FFFF0000FFFF) << 16 | (v & 0xFFFF0000FFFF0000) >> 16; + v = (v & 0x00FF00FF00FF00FF) << 8 | (v & 0xFF00FF00FF00FF00) >> 8; + return v; +} + +// Transpose across the diagonal connecting bit 7 to bit 56. +inline uint64_t TransposeBitsInBytes(uint64_t v) { + v = (v & 0xAA00AA00AA00AA00ULL) >> 9 | (v & 0x0055005500550055ULL) << 9 | + (v & 0x55AA55AA55AA55AAULL); + v = (v & 0xCCCC0000CCCC0000ULL) >> 18 | (v & 0x0000333300003333ULL) << 18 | + (v & 0x3333CCCC3333CCCCULL); + v = (v & 0xF0F0F0F000000000ULL) >> 36 | (v & 0x000000000F0F0F0FULL) << 36 | + (v & 0x0F0F0F0FF0F0F0F0ULL); + return v; +} + // Iterates over all set bits of the value, lower to upper. The value of // dereferenced iterator is bit number (lower to upper, 0 bazed) template From 2065319ac88299a13234904b9db04b0c93aa5ce2 Mon Sep 17 00:00:00 2001 From: borg323 <39573933+borg323@users.noreply.github.com> Date: Thu, 9 Apr 2020 08:32:45 +0300 Subject: [PATCH 099/151] update readme and a meson error message (#1185) * update readme and a meson error message * further readme update Co-authored-by: borg323 --- README.md | 11 ++++++++--- meson.build | 2 +- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 6864730997..8058eb018a 100644 --- a/README.md +++ b/README.md @@ -38,7 +38,7 @@ Aside from the git submodule, lc0 requires the Meson build system and at least o Backend support includes (in theory) any CBLAS-compatible library for CPU usage, such as OpenBLAS or Intel's DNNL or MKL. For GPUs, OpenCL and CUDA+cudnn are supported, while DX-12 can be used in Windows 10 with latest drivers. -Finally, lc0 requires a compiler supporting C++17. Minimal versions seem to be g++ v8.0, clang v4.0 (with C++17 stdlib) or Visual Studio 2017. +Finally, lc0 requires a compiler supporting C++17. Minimal versions seem to be g++ v8.0, clang v5.0 (with C++17 stdlib) or Visual Studio 2017. Given those basics, the OS and backend specific instructions are below. @@ -84,7 +84,8 @@ in ("Deep Learning"). For Ubuntu 18.04 you need the latest version of meson, g++-8 and clang-6.0 before performing the steps above: - sudo apt-get install gcc-8 g++-8 clang-6.0 ninja-build pkg-config protobuf-compiler libprotobuf-dev meson + sudo apt-get install gcc-8 g++-8 clang-6.0 ninja-build pkg-config + pip3 install meson --user CC=clang-6.0 CXX=clang++-6.0 INSTALL_PREFIX=~/.local ./build.sh Make sure that `~/.local/bin` is in your `PATH` environment variable. You can now type `lc0 --help` and start. @@ -95,13 +96,17 @@ For Ubuntu 16.04 you need the latest version of meson, ninja and also gcc-8.0 be sudo add-apt-repository ppa:ubuntu-toolchain-r/test sudo apt-get update - sudo apt-get install gcc-8 g++-8 protobuf-compiler libprotobuf-dev + sudo apt-get install gcc-8 g++-8 pip3 install meson --user pip3 install ninja --user CC=gcc-8 CXX=g++-8 INSTALL_PREFIX=~/.local ./build.sh Make sure that `~/.local/bin` is in your `PATH` environment variable. You can now type `lc0 --help` and start. +If you want to build with clang-6.0 you still need g++-8 for the library. Replace the last line above with: + + sudo apt-get install clang-6.0 + CC=clang-6.0 CXX=clang++-6.0 INSTALL_PREFIX=~/.local ./build.sh #### openSUSE (all versions) diff --git a/meson.build b/meson.build index 53fceb8dab..bdb922c403 100644 --- a/meson.build +++ b/meson.build @@ -22,7 +22,7 @@ cc = meson.get_compiler('cpp') if not cc.has_header('optional') or not cc.has_header('string_view') or not cc.has_header('charconv') error('Lc0 requires a compiler supporting C++17, for example g++ v8.0, ' + - 'clang v4.0 or later (with C++17 stdlib) and Visual Studio 2017 or ' + + 'clang v5.0 or later (with C++17 stdlib) and Visual Studio 2017 or ' + 'later.') endif From 944c1bd16c86a42e5b21d4c30fd5e7cdacd66a8e Mon Sep 17 00:00:00 2001 From: Tilps Date: Thu, 9 Apr 2020 19:26:37 +1000 Subject: [PATCH 100/151] Update changelog in preparation for 0.25.0-rc1 (#1186) --- changelog.txt | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 66 insertions(+), 1 deletion(-) diff --git a/changelog.txt b/changelog.txt index c556411c8f..9d31062c04 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,4 +1,69 @@ -v0.24.0-rc1 (2020-02-23) +v0.25.0-rc1 (2020-04-09) +~~~~~~~ + +* Now requires a c++17 supporting compilation environment to build. +* Support for Moves Left Head based networks. Includes options to adjust search + to favour shorter/longer wins/losses based on the moves left head output. +* Mate score reporting is now possible, and move selection will prefer shorter + mates over longer ones when they are proven. +* Training now outputs v5 format data. This passes the moves left information + back to training. This also includes support for multiple sub formats, + including the existing standard, a new variant which can encode FRC960 + castling, and also a further extension of that which tries to make training + data cannonical, so there aren't multiple positions that are trivially + equivalent with different network inputs. +* Benchmark now includes a suite of 34 positions to test by default instead of + just start position. +* Tensorflow backend works once more, almost just as hard to compile as it used + to be though. +* `--noise` flag is gone, use `--noise-epsilon=0.25` to get the old behavior. +* Some bug fixes related to drawscore. +* Selfplay mode now defaults to the same value as match play for + `--root-has-own-cpuct-params` (true). +* Windows build script has been modernized. +* Separate Eigen backend option for CPU. +* Random backend no longer requires a network. +* Random backend supports producing training data of any input format sub type. +* Integer parameters now give better error messages when given invalid values. + +v0.24.1 (2020-03-15) +~~~~~~~ + +* Fix issues where logitq was being passed as drawscore and logitq wasn't + passed to some GetQ calls. Causing major performance issues when either + setting was non-default. + +v0.24.0 (2020-03-11) +~~~~~~~ + +* New parameter `--max-out-of-order-evals-factor` replaces + `--max-out-of-order-evals` that was introduced in v0.24.0-rc3 and provides + the factor to multiply the maximum batch size to set maximum number + out-of-order evals per batch. The default value of 1.0 keeps the behavior + of previous releases. +* Bug fix for hangs with very early stop command from non-conforming UCI hosts. + +v0.24.0-rc3 (2020-03-08) +~~~~~~~~~~~ + +* New parameter `--max-out-of-order-evals` to set maximum number out-of-order + evals per batch (was equal to the batch size before). +* It's now possible to embed networks into the binary. It allows easier builds + of .apk for Android. +* New parameter `--smart-pruning-minimum-batches` to only allow smart pruning + to stop after at least k batches, preventing insta-moves on slow backends. + +v0.24.0-rc2 (2020-03-01) +~~~~~~~~~~~ + +* All releases are now bundled with network id591226 (and the file date is old + enough so it has a lower priority than networks that you already may have + in your directory). +* Added a 'backendbench' mode to benchmark NN evaluation performance without + search. +* Android builds are added to the official releases. + +v0.24.0-rc1 (2020-02-23) ~~~~~~~~~~~ * Introduced DirectX12 backend. From 4123ae8ee4bf39a7edece32ceae8001a886ccbe3 Mon Sep 17 00:00:00 2001 From: Tilps Date: Thu, 9 Apr 2020 20:54:04 +1000 Subject: [PATCH 101/151] Add missed change log entry. (#1188) --- changelog.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/changelog.txt b/changelog.txt index 9d31062c04..b3770fd805 100644 --- a/changelog.txt +++ b/changelog.txt @@ -20,6 +20,8 @@ * Some bug fixes related to drawscore. * Selfplay mode now defaults to the same value as match play for `--root-has-own-cpuct-params` (true). +* Some advanced time management parameters are now accessed via the new + `--time-manager` parameter instead of individual parameters. * Windows build script has been modernized. * Separate Eigen backend option for CPU. * Random backend no longer requires a network. From 86633cf2e4122448d5e8ef50c6c14de6d0bcabd1 Mon Sep 17 00:00:00 2001 From: Tilps Date: Thu, 9 Apr 2020 20:54:15 +1000 Subject: [PATCH 102/151] Bump version to 0.26.0-dev for branching 0.25. (#1187) --- src/version.inc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/version.inc b/src/version.inc index 04b7196f08..66af37067a 100644 --- a/src/version.inc +++ b/src/version.inc @@ -1,4 +1,4 @@ #define LC0_VERSION_MAJOR 0 -#define LC0_VERSION_MINOR 25 +#define LC0_VERSION_MINOR 26 #define LC0_VERSION_PATCH 0 #define LC0_VERSION_POSTFIX "dev" From f54fa0934d3beda5baecf5b97e7b305c9ec2bc83 Mon Sep 17 00:00:00 2001 From: Tilps Date: Fri, 10 Apr 2020 00:29:28 +1000 Subject: [PATCH 103/151] Increase maximum maximum collision events. (#1189) --- src/mcts/params.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mcts/params.cc b/src/mcts/params.cc index 17eb3c4aef..48bf323e86 100644 --- a/src/mcts/params.cc +++ b/src/mcts/params.cc @@ -285,7 +285,7 @@ void SearchParams::Populate(OptionsParser* options) { options->Add(kFpuValueAtRootId, -100.0f, 100.0f) = 1.0f; options->Add(kCacheHistoryLengthId, 0, 7) = 0; options->Add(kPolicySoftmaxTempId, 0.1f, 10.0f) = 1.607f; - options->Add(kMaxCollisionEventsId, 1, 1024) = 32; + options->Add(kMaxCollisionEventsId, 1, 65536) = 32; options->Add(kMaxCollisionVisitsId, 1, 1000000) = 9999; options->Add(kOutOfOrderEvalId) = true; options->Add(kMaxOutOfOrderEvalsId, 0.0f, 100.0f) = 1.0f; From a56709c503c002f708f7e4cf399ca19d239df442 Mon Sep 17 00:00:00 2001 From: Tilps Date: Mon, 13 Apr 2020 09:53:24 +1000 Subject: [PATCH 104/151] Increase possible range for moves left hidden parameters. (#1198) --- src/mcts/params.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mcts/params.cc b/src/mcts/params.cc index 48bf323e86..29f247c53e 100644 --- a/src/mcts/params.cc +++ b/src/mcts/params.cc @@ -305,9 +305,9 @@ void SearchParams::Populate(OptionsParser* options) { options->Add(kMovesLeftMaxEffectId, 0.0f, 1.0f) = 0.0f; options->Add(kMovesLeftThresholdId, 0.0f, 1.0f) = 1.0f; options->Add(kMovesLeftSlopeId, 0.0f, 1.0f) = 0.001f; - options->Add(kMovesLeftConstantFactorId, 0.0f, 1.0f) = 1.0f; - options->Add(kMovesLeftScaledFactorId, 0.0f, 1.0f) = 0.0f; - options->Add(kMovesLeftQuadraticFactorId, 0.0f, 1.0f) = 0.0f; + options->Add(kMovesLeftConstantFactorId, -1.0f, 1.0f) = 1.0f; + options->Add(kMovesLeftScaledFactorId, -1.0f, 1.0f) = 0.0f; + options->Add(kMovesLeftQuadraticFactorId, -1.0f, 1.0f) = 0.0f; options->Add(kShortSightednessId, 0.0f, 1.0f) = 0.0f; options->Add(kDisplayCacheUsageId) = false; options->Add(kMaxConcurrentSearchersId, 0, 128) = 1; From 1e649cb43111dd078e9fcadb76dafb255dbfa867 Mon Sep 17 00:00:00 2001 From: Tilps Date: Thu, 16 Apr 2020 17:46:42 +1000 Subject: [PATCH 105/151] Rename training data variables for clarity, and fix bug with training data exposed. (#1203) * Rename training data variables for clarity, and fix bug with training data exposed. * Review feedback. --- src/mcts/node.cc | 13 ++++++++----- src/neural/writer.h | 11 +++++++++-- src/selfplay/game.cc | 6 +++++- 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/src/mcts/node.cc b/src/mcts/node.cc index 0fd185e4c4..41f8a0c365 100644 --- a/src/mcts/node.cc +++ b/src/mcts/node.cc @@ -380,16 +380,19 @@ V5TrainingData Node::GetV5TrainingData( // Other params. if (input_format == pblczero::NetworkFormat::INPUT_112_WITH_CANONICALIZATION) { - result.side_to_move = position.GetBoard().en_passant().as_int() >> 56; + result.side_to_move_or_enpassant = + position.GetBoard().en_passant().as_int() >> 56; if ((transform & FlipTransform) != 0) { - result.side_to_move = ReverseBitsInBytes(result.side_to_move); + result.side_to_move_or_enpassant = + ReverseBitsInBytes(result.side_to_move_or_enpassant); } // Send transform in deprecated move count so rescorer can reverse it to // calculate the actual move list from the input data. - result.deprecated_move_count = transform; + result.invariance_info = + transform | (position.IsBlackToMove() ? (1u << 7) : 0u); } else { - result.side_to_move = position.IsBlackToMove() ? 1 : 0; - result.deprecated_move_count = 0; + result.side_to_move_or_enpassant = position.IsBlackToMove() ? 1 : 0; + result.invariance_info = 0; } result.rule50_count = position.GetNoCaptureNoPawnPly(); diff --git a/src/neural/writer.h b/src/neural/writer.h index 337209656a..c926983174 100644 --- a/src/neural/writer.h +++ b/src/neural/writer.h @@ -26,7 +26,9 @@ */ #include + #include + #include "utils/cppattributes.h" #pragma once @@ -44,9 +46,14 @@ struct V5TrainingData { uint8_t castling_us_oo; uint8_t castling_them_ooo; uint8_t castling_them_oo; - uint8_t side_to_move; + // For input type 3 contains enpassant column as a mask. + uint8_t side_to_move_or_enpassant; uint8_t rule50_count; - uint8_t deprecated_move_count; // left in to keep 8 int8 fields. + // For input type 3 contains a bit field indicating the transform that was + // used and the original side to move info. + // Side to move is in the top bit, transform in the lower bits. + // In versions prior to v5 this spot contained an unused move count field. + uint8_t invariance_info; int8_t result; float root_q; float best_q; diff --git a/src/selfplay/game.cc b/src/selfplay/game.cc index c5611868fd..36723f4584 100644 --- a/src/selfplay/game.cc +++ b/src/selfplay/game.cc @@ -281,7 +281,11 @@ void SelfPlayGame::WriteTrainingData(TrainingDataWriter* writer) const { // different approach. float m_estimate = training_data_.back().best_m + training_data_.size() - 1; for (auto chunk : training_data_) { - const bool black_to_move = chunk.side_to_move; + bool black_to_move = chunk.side_to_move_or_enpassant; + if (chunk.input_format == + pblczero::NetworkFormat::INPUT_112_WITH_CANONICALIZATION) { + black_to_move = (chunk.invariance_info & (1u << 7)) != 0; + } if (game_result_ == GameResult::WHITE_WON) { chunk.result = black_to_move ? -1 : 1; } else if (game_result_ == GameResult::BLACK_WON) { From bcb9eb1687ef7b426880fa47ec88c5214147408f Mon Sep 17 00:00:00 2001 From: Alexis Olson Date: Fri, 17 Apr 2020 13:05:08 -0500 Subject: [PATCH 106/151] Rename no_capture variable and functions to rule50 (#1207) --- src/chess/board.cc | 8 ++++---- src/chess/board.h | 4 ++-- src/chess/position.cc | 24 ++++++++++++------------ src/chess/position.h | 8 ++++---- src/mcts/node.cc | 2 +- src/mcts/search.cc | 4 ++-- src/neural/encoder.cc | 4 ++-- src/syzygy/syzygy.cc | 6 +++--- 8 files changed, 30 insertions(+), 30 deletions(-) diff --git a/src/chess/board.cc b/src/chess/board.cc index e72ed2f914..400b3ac965 100644 --- a/src/chess/board.cc +++ b/src/chess/board.cc @@ -969,7 +969,7 @@ MoveList ChessBoard::GenerateLegalMoves() const { return result; } -void ChessBoard::SetFromFen(const std::string& fen, int* no_capture_ply, +void ChessBoard::SetFromFen(const std::string& fen, int* rule50_ply, int* moves) { Clear(); int row = 7; @@ -980,10 +980,10 @@ void ChessBoard::SetFromFen(const std::string& fen, int* no_capture_ply, string who_to_move; string castlings; string en_passant; - int no_capture_halfmoves; + int rule50_halfmoves; int total_moves; fen_str >> board >> who_to_move >> castlings >> en_passant >> - no_capture_halfmoves >> total_moves; + rule50_halfmoves >> total_moves; if (!fen_str) throw Exception("Bad fen string: " + fen); @@ -1096,7 +1096,7 @@ void ChessBoard::SetFromFen(const std::string& fen, int* no_capture_ply, if (who_to_move == "b" || who_to_move == "B") { Mirror(); } - if (no_capture_ply) *no_capture_ply = no_capture_halfmoves; + if (rule50_ply) *rule50_ply = rule50_halfmoves; if (moves) *moves = total_moves; } diff --git a/src/chess/board.h b/src/chess/board.h index f53278f801..939f533581 100644 --- a/src/chess/board.h +++ b/src/chess/board.h @@ -67,10 +67,10 @@ class ChessBoard { static const BitBoard kPawnMask; // Sets position from FEN string. - // If @no_capture_ply and @moves are not nullptr, they are filled with number + // If @rule50_ply and @moves are not nullptr, they are filled with number // of moves without capture and number of full moves since the beginning of // the game. - void SetFromFen(const std::string& fen, int* no_capture_ply = nullptr, + void SetFromFen(const std::string& fen, int* rule50_ply = nullptr, int* moves = nullptr); // Nullifies the whole structure. void Clear(); diff --git a/src/chess/position.cc b/src/chess/position.cc index e747398053..15873952f4 100644 --- a/src/chess/position.cc +++ b/src/chess/position.cc @@ -32,17 +32,17 @@ namespace lczero { Position::Position(const Position& parent, Move m) - : no_capture_ply_(parent.no_capture_ply_ + 1), + : rule50_ply_(parent.rule50_ply_ + 1), ply_count_(parent.ply_count_ + 1) { them_board_ = parent.us_board_; - const bool capture = them_board_.ApplyMove(m); + const bool is_zeroing = them_board_.ApplyMove(m); us_board_ = them_board_; us_board_.Mirror(); - if (capture) no_capture_ply_ = 0; + if (is_zeroing) rule50_ply_ = 0; } -Position::Position(const ChessBoard& board, int no_capture_ply, int game_ply) - : no_capture_ply_(no_capture_ply), repetitions_(0), ply_count_(game_ply) { +Position::Position(const ChessBoard& board, int rule50_ply, int game_ply) + : rule50_ply_(rule50_ply), repetitions_(0), ply_count_(game_ply) { us_board_ = board; them_board_ = board; them_board_.Mirror(); @@ -67,17 +67,17 @@ GameResult PositionHistory::ComputeGameResult() const { } if (!board.HasMatingMaterial()) return GameResult::DRAW; - if (Last().GetNoCaptureNoPawnPly() >= 100) return GameResult::DRAW; + if (Last().GetRule50Ply() >= 100) return GameResult::DRAW; if (Last().GetGamePly() >= 450) return GameResult::DRAW; if (Last().GetRepetitions() >= 2) return GameResult::DRAW; return GameResult::UNDECIDED; } -void PositionHistory::Reset(const ChessBoard& board, int no_capture_ply, +void PositionHistory::Reset(const ChessBoard& board, int rule50_ply, int game_ply) { positions_.clear(); - positions_.emplace_back(board, no_capture_ply, game_ply); + positions_.emplace_back(board, rule50_ply, game_ply); } void PositionHistory::Append(Move m) { @@ -91,14 +91,14 @@ void PositionHistory::Append(Move m) { int PositionHistory::ComputeLastMoveRepetitions() const { const auto& last = positions_.back(); // TODO(crem) implement hash/cache based solution. - if (last.GetNoCaptureNoPawnPly() < 4) return 0; + if (last.GetRule50Ply() < 4) return 0; for (int idx = positions_.size() - 3; idx >= 0; idx -= 2) { const auto& pos = positions_[idx]; if (pos.GetBoard() == last.GetBoard()) { return 1 + pos.GetRepetitions(); } - if (pos.GetNoCaptureNoPawnPly() < 2) return 0; + if (pos.GetRule50Ply() < 2) return 0; } return 0; } @@ -107,7 +107,7 @@ bool PositionHistory::DidRepeatSinceLastZeroingMove() const { for (auto iter = positions_.rbegin(), end = positions_.rend(); iter != end; ++iter) { if (iter->GetRepetitions() > 0) return true; - if (iter->GetNoCaptureNoPawnPly() == 0) return false; + if (iter->GetRule50Ply() == 0) return false; } return false; } @@ -119,7 +119,7 @@ uint64_t PositionHistory::HashLast(int positions) const { if (!positions--) break; hash = HashCat(hash, iter->Hash()); } - return HashCat(hash, Last().GetNoCaptureNoPawnPly()); + return HashCat(hash, Last().GetRule50Ply()); } } // namespace lczero diff --git a/src/chess/position.h b/src/chess/position.h index f8a90067e1..8f35ec10d8 100644 --- a/src/chess/position.h +++ b/src/chess/position.h @@ -38,7 +38,7 @@ class Position { // From parent position and move. Position(const Position& parent, Move m); // From particular position. - Position(const ChessBoard& board, int no_capture_ply, int game_ply); + Position(const ChessBoard& board, int rule50_ply, int game_ply); uint64_t Hash() const; bool IsBlackToMove() const { return us_board_.flipped(); } @@ -54,7 +54,7 @@ class Position { void SetRepetitions(int repetitions) { repetitions_ = repetitions; } // Number of ply with no captures and pawn moves. - int GetNoCaptureNoPawnPly() const { return no_capture_ply_; } + int GetRule50Ply() const { return rule50_ply_; } // Gets board from the point of view of player to move. const ChessBoard& GetBoard() const { return us_board_; } @@ -70,7 +70,7 @@ class Position { ChessBoard them_board_; // How many half-moves without capture or pawn move was there. - int no_capture_ply_ = 0; + int rule50_ply_ = 0; // How many repetitions this position had before. For new positions it's 0. int repetitions_; // number of half-moves since beginning of the game. @@ -102,7 +102,7 @@ class PositionHistory { int GetLength() const { return positions_.size(); } // Resets the position to a given state. - void Reset(const ChessBoard& board, int no_capture_ply, int game_ply); + void Reset(const ChessBoard& board, int rule50_ply, int game_ply); // Appends a position to history. void Append(Move m); diff --git a/src/mcts/node.cc b/src/mcts/node.cc index 41f8a0c365..8144638dcf 100644 --- a/src/mcts/node.cc +++ b/src/mcts/node.cc @@ -394,7 +394,7 @@ V5TrainingData Node::GetV5TrainingData( result.side_to_move_or_enpassant = position.IsBlackToMove() ? 1 : 0; result.invariance_info = 0; } - result.rule50_count = position.GetNoCaptureNoPawnPly(); + result.rule50_count = position.GetRule50Ply(); // Game result. if (game_result == GameResult::WHITE_WON) { diff --git a/src/mcts/search.cc b/src/mcts/search.cc index 9465a0aecd..0a03c49409 100644 --- a/src/mcts/search.cc +++ b/src/mcts/search.cc @@ -1138,7 +1138,7 @@ void SearchWorker::ExtendNode(Node* node) { return; } - if (history_.Last().GetNoCaptureNoPawnPly() >= 100) { + if (history_.Last().GetRule50Ply() >= 100) { node->MakeTerminal(GameResult::DRAW); return; } @@ -1150,7 +1150,7 @@ void SearchWorker::ExtendNode(Node* node) { // Neither by-position or by-rule termination, but maybe it's a TB position. if (search_->syzygy_tb_ && board.castlings().no_legal_castle() && - history_.Last().GetNoCaptureNoPawnPly() == 0 && + history_.Last().GetRule50Ply() == 0 && (board.ours() | board.theirs()).count() <= search_->syzygy_tb_->max_cardinality()) { ProbeState state; diff --git a/src/neural/encoder.cc b/src/neural/encoder.cc index 5386580ed9..f8f914f2f4 100644 --- a/src/neural/encoder.cc +++ b/src/neural/encoder.cc @@ -181,7 +181,7 @@ InputPlanes EncodePositionForNN( } else { if (we_are_black) result[kAuxPlaneBase + 4].SetAll(); } - result[kAuxPlaneBase + 5].Fill(history.Last().GetNoCaptureNoPawnPly()); + result[kAuxPlaneBase + 5].Fill(history.Last().GetRule50Ply()); // Plane kAuxPlaneBase + 6 used to be movecount plane, now it's all zeros. // Plane kAuxPlaneBase + 7 is all ones to help NN find board edges. result[kAuxPlaneBase + 7].SetAll(); @@ -245,7 +245,7 @@ InputPlanes EncodePositionForNN( if (history_idx > 0) flip = !flip; // If no capture no pawn is 0, the previous was start of game, capture or // pawn push, so no need to go back further if stopping early. - if (stop_early && position.GetNoCaptureNoPawnPly() == 0) break; + if (stop_early && position.GetRule50Ply() == 0) break; } if (transform != NoTransform) { // Transform all masks. diff --git a/src/syzygy/syzygy.cc b/src/syzygy/syzygy.cc index 90a74c77ce..e2de06a08e 100644 --- a/src/syzygy/syzygy.cc +++ b/src/syzygy/syzygy.cc @@ -1598,7 +1598,7 @@ int SyzygyTablebase::probe_dtz(const Position& pos, ProbeState* result) { int min_DTZ = 0xFFFF; for (const Move& move : pos.GetBoard().GenerateLegalMoves()) { Position next_pos = Position(pos, move); - const bool zeroing = next_pos.GetNoCaptureNoPawnPly() == 0; + const bool zeroing = next_pos.GetRule50Ply() == 0; // For zeroing moves we want the dtz of the move _before_ doing it, // otherwise we will get the dtz of the next move sequence. Search the // position after the move to get the score sign (because even in a winning @@ -1629,7 +1629,7 @@ bool SyzygyTablebase::root_probe(const Position& pos, bool has_repeated, ProbeState result; auto root_moves = pos.GetBoard().GenerateLegalMoves(); // Obtain 50-move counter for the root position - const int cnt50 = pos.GetNoCaptureNoPawnPly(); + const int cnt50 = pos.GetRule50Ply(); // Check whether a position was repeated since the last zeroing move. const bool rep = has_repeated; int dtz; @@ -1640,7 +1640,7 @@ bool SyzygyTablebase::root_probe(const Position& pos, bool has_repeated, for (auto& m : root_moves) { Position next_pos = Position(pos, m); // Calculate dtz for the current move counting from the root position - if (next_pos.GetNoCaptureNoPawnPly() == 0) { + if (next_pos.GetRule50Ply() == 0) { // In case of a zeroing move, dtz is one of -101/-1/0/1/101 const WDLScore wdl = static_cast(-probe_wdl(next_pos, &result)); dtz = dtz_before_zeroing(wdl); From 97837107675c7da971c820f9e4afe5920934cffd Mon Sep 17 00:00:00 2001 From: Tilps Date: Sat, 18 Apr 2020 04:06:00 +1000 Subject: [PATCH 107/151] Fix for starting fens that differ only in 50 move ply. (#1204) --- src/mcts/node.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/mcts/node.cc b/src/mcts/node.cc index 8144638dcf..bef82ac8c6 100644 --- a/src/mcts/node.cc +++ b/src/mcts/node.cc @@ -471,7 +471,9 @@ bool NodeTree::ResetToPosition(const std::string& starting_fen, int no_capture_ply; int full_moves; starting_board.SetFromFen(starting_fen, &no_capture_ply, &full_moves); - if (gamebegin_node_ && history_.Starting().GetBoard() != starting_board) { + if (gamebegin_node_ && + (history_.Starting().GetBoard() != starting_board || + history_.Starting().GetNoCaptureNoPawnPly() != no_capture_ply)) { // Completely different position. DeallocateTree(); } From 809749bcf02c1268485c6d5108eb57d362de9e26 Mon Sep 17 00:00:00 2001 From: Ed Lee Date: Fri, 17 Apr 2020 12:27:14 -0700 Subject: [PATCH 108/151] Fix merge conflict between #1204 and #1207. (#1211) --- src/mcts/node.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mcts/node.cc b/src/mcts/node.cc index bef82ac8c6..cad8187f3c 100644 --- a/src/mcts/node.cc +++ b/src/mcts/node.cc @@ -473,7 +473,7 @@ bool NodeTree::ResetToPosition(const std::string& starting_fen, starting_board.SetFromFen(starting_fen, &no_capture_ply, &full_moves); if (gamebegin_node_ && (history_.Starting().GetBoard() != starting_board || - history_.Starting().GetNoCaptureNoPawnPly() != no_capture_ply)) { + history_.Starting().GetRule50Ply() != no_capture_ply)) { // Completely different position. DeallocateTree(); } From 2f46f4d7451b02a84b75e734827c43fb9d9b126d Mon Sep 17 00:00:00 2001 From: Ed Lee Date: Fri, 17 Apr 2020 13:55:14 -0700 Subject: [PATCH 109/151] Set bounds for parents that cannot win or cannot lose to help find more draws. (#1160) Introduces bit fields to Node using uint8_t enums to track bounds and terminal type. --- src/chess/position.cc | 6 +++ src/chess/position.h | 4 +- src/mcts/node.cc | 10 +++- src/mcts/node.h | 25 ++++++++-- src/mcts/search.cc | 111 +++++++++++++++++++++++++++--------------- src/mcts/search.h | 2 + 6 files changed, 113 insertions(+), 45 deletions(-) diff --git a/src/chess/position.cc b/src/chess/position.cc index 15873952f4..d9162357f1 100644 --- a/src/chess/position.cc +++ b/src/chess/position.cc @@ -54,6 +54,12 @@ uint64_t Position::Hash() const { std::string Position::DebugString() const { return us_board_.DebugString(); } +GameResult operator-(const GameResult& res) { + return res == GameResult::BLACK_WON + ? GameResult::WHITE_WON + : res == GameResult::WHITE_WON ? GameResult::BLACK_WON : res; +} + GameResult PositionHistory::ComputeGameResult() const { const auto& board = Last().GetBoard(); auto legal_moves = board.GenerateLegalMoves(); diff --git a/src/chess/position.h b/src/chess/position.h index 8f35ec10d8..988990529b 100644 --- a/src/chess/position.h +++ b/src/chess/position.h @@ -77,7 +77,9 @@ class Position { int ply_count_ = 0; }; -enum class GameResult { UNDECIDED, WHITE_WON, DRAW, BLACK_WON }; +// These are ordered so max() prefers the best result. +enum class GameResult : uint8_t { UNDECIDED, BLACK_WON, DRAW, WHITE_WON }; +GameResult operator-(const GameResult& res); class PositionHistory { public: diff --git a/src/mcts/node.cc b/src/mcts/node.cc index cad8187f3c..ea97edd368 100644 --- a/src/mcts/node.cc +++ b/src/mcts/node.cc @@ -214,11 +214,14 @@ std::string Node::DebugString() const { << " Parent:" << parent_ << " Index:" << index_ << " Child:" << child_.get() << " Sibling:" << sibling_.get() << " WL:" << wl_ << " N:" << n_ << " N_:" << n_in_flight_ - << " Edges:" << edges_.size(); + << " Edges:" << edges_.size() + << " Bounds:" << static_cast(lower_bound_) - 2 << "," + << static_cast(upper_bound_) - 2; return oss.str(); } void Node::MakeTerminal(GameResult result, float plies_left, Terminal type) { + SetBounds(result, result); terminal_type_ = type; m_ = plies_left; if (result == GameResult::DRAW) { @@ -257,6 +260,11 @@ void Node::MakeNotTerminal() { } } +void Node::SetBounds(GameResult lower, GameResult upper) { + lower_bound_ = lower; + upper_bound_ = upper; +} + bool Node::TryStartScoreUpdate() { if (n_ == 0 && n_in_flight_ > 0) return false; ++n_in_flight_; diff --git a/src/mcts/node.h b/src/mcts/node.h index 54650e4f1c..217a5ceec2 100644 --- a/src/mcts/node.h +++ b/src/mcts/node.h @@ -131,10 +131,15 @@ class Node { using Iterator = Edge_Iterator; using ConstIterator = Edge_Iterator; - enum class Terminal : uint8_t { NonTerminal, Terminal, Tablebase }; + enum class Terminal : uint8_t { NonTerminal, EndOfGame, Tablebase }; // Takes pointer to a parent node and own index in a parent. - Node(Node* parent, uint16_t index) : parent_(parent), index_(index) {} + Node(Node* parent, uint16_t index) + : parent_(parent), + index_(index), + terminal_type_(Terminal::NonTerminal), + lower_bound_(GameResult::BLACK_WON), + upper_bound_(GameResult::WHITE_WON) {} // Allocates a new edge and a new node. The node has to be no edges before // that. @@ -166,13 +171,16 @@ class Node { // Returns whether the node is known to be draw/lose/win. bool IsTerminal() const { return terminal_type_ != Terminal::NonTerminal; } bool IsTbTerminal() const { return terminal_type_ == Terminal::Tablebase; } + typedef std::pair Bounds; + Bounds GetBounds() const { return {lower_bound_, upper_bound_}; } uint16_t GetNumEdges() const { return edges_.size(); } // Makes the node terminal and sets it's score. void MakeTerminal(GameResult result, float plies_left = 0.0f, - Terminal type = Terminal::Terminal); + Terminal type = Terminal::EndOfGame); // Makes the node not terminal and updates its visits. void MakeNotTerminal(); + void SetBounds(GameResult lower, GameResult upper); // If this node is not in the process of being expanded by another thread // (which can happen only if n==0 and n-in-flight==1), mark the node as @@ -301,9 +309,12 @@ class Node { // Index of this node is parent's edge list. uint16_t index_; - // 1 byte fields. + // Bit fields using parts of uint8_t fields initialized in the constructor. // Whether or not this node end game (with a winning of either sides or draw). - Terminal terminal_type_ = Terminal::NonTerminal; + Terminal terminal_type_ : 2; + // Best and worst result for this node. + GameResult lower_bound_ : 2; + GameResult upper_bound_ : 2; // TODO(mooskagh) Unfriend NodeTree. friend class NodeTree; @@ -372,6 +383,10 @@ class EdgeAndNode { // Whether the node is known to be terminal. bool IsTerminal() const { return node_ ? node_->IsTerminal() : false; } bool IsTbTerminal() const { return node_ ? node_->IsTbTerminal() : false; } + Node::Bounds GetBounds() const { + return node_ ? node_->GetBounds() + : Node::Bounds{GameResult::BLACK_WON, GameResult::WHITE_WON}; + } // Edge related getters. float GetP() const { return edge_->GetP(); } diff --git a/src/mcts/search.cc b/src/mcts/search.cc index 0a03c49409..517090fb29 100644 --- a/src/mcts/search.cc +++ b/src/mcts/search.cc @@ -320,7 +320,16 @@ std::vector Search::GetVerboseStats(Node* node) const { } oss << ") "; - if (edge.IsTerminal()) oss << "(T) "; + const auto [edge_lower, edge_upper] = edge.GetBounds(); + oss << (edge_lower == edge_upper + ? "(T) " + : edge_lower == GameResult::DRAW && + edge_upper == GameResult::WHITE_WON + ? "(W) " + : edge_lower == GameResult::BLACK_WON && + edge_upper == GameResult::DRAW + ? "(L) " + : ""); infos.emplace_back(oss.str()); } return infos; @@ -1448,8 +1457,8 @@ void SearchWorker::DoBackupUpdateSingleNode( return; } - // For the first visit to a terminal, maybe convert ancestors to terminal too. - auto can_convert = + // For the first visit to a terminal, maybe update parent bounds too. + auto update_parent_bounds = params_.GetStickyEndgames() && node->IsTerminal() && !node->GetN(); // Backup V value up to a root. After 1 visit, V = Q. @@ -1473,40 +1482,9 @@ void SearchWorker::DoBackupUpdateSingleNode( // Nothing left to do without ancestors to update. if (!p) break; - // Convert parents to terminals except the root or those already converted. - can_convert = can_convert && p != search_->root_node_ && !p->IsTerminal(); - - // A non-winning terminal move needs all other moves to be similar. - auto all_losing = true; - auto found_tb = n->IsTbTerminal(); - float losing_m = 0.0f; - if (can_convert && v <= 0.0f) { - for (const auto& edge : p->Edges()) { - // Default_wl doesn't matter as WL is only used if IsTerminal is true. - const auto WL = edge.GetWL(0.0f); - can_convert = can_convert && edge.IsTerminal() && WL <= 0.0f; - if (!can_convert) break; - all_losing = all_losing && WL < 0.0f; - found_tb = found_tb || edge.IsTbTerminal(); - losing_m = std::max(losing_m, edge.GetM(0.0f)); - } - } - - // Convert the parent to a terminal loss if at least one move is winning or - // to a terminal win if all moves are losing; otherwise there's a mix of - // draws and losing, so at best it's a draw. - if (can_convert) { - // Doesn't give the correct distance to mate because siblings are not - // considered but more accurate than doing nothing. This shouldn't - // underestimate the distance to mate since at worst we miss shorter - // moves. - float terminal_m = std::max(losing_m, m) + 1.0f; - p->MakeTerminal( - v > 0.0f ? GameResult::BLACK_WON - : all_losing ? GameResult::WHITE_WON : GameResult::DRAW, - terminal_m, - found_tb ? Node::Terminal::Tablebase : Node::Terminal::Terminal); - } + // Try setting parent bounds except the root or those already terminal. + update_parent_bounds = update_parent_bounds && p != search_->root_node_ && + !p->IsTerminal() && MaybeSetBounds(p, m); // Q will be flipped for opponent. v = -v; @@ -1524,7 +1502,64 @@ void SearchWorker::DoBackupUpdateSingleNode( search_->total_playouts_ += node_to_process.multivisit; search_->cum_depth_ += node_to_process.depth * node_to_process.multivisit; search_->max_depth_ = std::max(search_->max_depth_, node_to_process.depth); -} // namespace lczero +} + +bool SearchWorker::MaybeSetBounds(Node* p, float m) const { + auto losing_m = 0.0f; + auto prefer_tb = false; + + // Determine the maximum (lower, upper) bounds across all children. + // (-1,-1) Loss (initial and lowest bounds) + // (-1, 0) Can't Win + // (-1, 1) Regular node + // ( 0, 0) Draw + // ( 0, 1) Can't Lose + // ( 1, 1) Win (highest bounds) + auto lower = GameResult::BLACK_WON; + auto upper = GameResult::BLACK_WON; + for (const auto& edge : p->Edges()) { + const auto [edge_lower, edge_upper] = edge.GetBounds(); + lower = std::max(edge_lower, lower); + upper = std::max(edge_upper, upper); + + // Checkmate is the best, so short-circuit. + const auto is_tb = edge.IsTbTerminal(); + if (edge_lower == GameResult::WHITE_WON && !is_tb) { + prefer_tb = false; + break; + } else if (edge_upper == GameResult::BLACK_WON) { + // Track the longest loss. + losing_m = std::max(losing_m, edge.GetM(0.0f)); + } + prefer_tb = prefer_tb || is_tb; + } + + // The parent's bounds are flipped from the children (-max(U), -max(L)) + // aggregated as if it was a single child (forced move) of the same bound. + // Loss (-1,-1) -> ( 1, 1) Win + // Can't Win (-1, 0) -> ( 0, 1) Can't Lose + // Regular (-1, 1) -> (-1, 1) Regular + // Draw ( 0, 0) -> ( 0, 0) Draw + // Can't Lose ( 0, 1) -> (-1, 0) Can't Win + // Win ( 1, 1) -> (-1,-1) Loss + + // Nothing left to do for ancestors if the parent would be a regular node. + if (lower == GameResult::BLACK_WON && upper == GameResult::WHITE_WON) { + return false; + } else if (lower == upper) { + // Search can stop at the parent if the bounds can't change anymore, so make + // it terminal preferring shorter wins and longer losses. + p->MakeTerminal( + -upper, + (upper == GameResult::BLACK_WON ? std::max(losing_m, m) : m) + 1.0f, + prefer_tb ? Node::Terminal::Tablebase : Node::Terminal::EndOfGame); + } else { + p->SetBounds(-upper, -lower); + } + + // Bounds were set, so indicate we should check the parent too. + return true; +} // 7. Update the Search's status and progress information. //~~~~~~~~~~~~~~~~~~~~ diff --git a/src/mcts/search.h b/src/mcts/search.h index 59b1bdce0e..07478802a6 100644 --- a/src/mcts/search.h +++ b/src/mcts/search.h @@ -310,6 +310,8 @@ class SearchWorker { void FetchSingleNodeResult(NodeToProcess* node_to_process, int idx_in_computation); void DoBackupUpdateSingleNode(const NodeToProcess& node_to_process); + // Returns whether a node's bounds were set based on its children. + bool MaybeSetBounds(Node* p, float m) const; Search* const search_; // List of nodes to process. From 6135e6bad57460ae88c22d7d35a512a78911dfed Mon Sep 17 00:00:00 2001 From: borg323 <39573933+borg323@users.noreply.github.com> Date: Sat, 18 Apr 2020 22:30:38 +0300 Subject: [PATCH 110/151] workaround for partial c++17 compiler support (#1210) --- meson.build | 8 +++++++- src/utils/optionsparser.cc | 22 +++++++++++++++++++++- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/meson.build b/meson.build index bdb922c403..b35da097f3 100644 --- a/meson.build +++ b/meson.build @@ -20,12 +20,18 @@ project('lc0', 'cpp', cc = meson.get_compiler('cpp') -if not cc.has_header('optional') or not cc.has_header('string_view') or not cc.has_header('charconv') +if not cc.has_header('optional') or not cc.has_header('string_view') error('Lc0 requires a compiler supporting C++17, for example g++ v8.0, ' + 'clang v5.0 or later (with C++17 stdlib) and Visual Studio 2017 or ' + 'later.') endif +if not cc.has_header('charconv') + warning('Your compiler or library does not have full C++17 support. ' + + 'See the README for compilers that are known to be working. ' + + 'This will become an error in the future.') +endif + if cc.get_id() == 'clang' # Thread safety annotation add_project_arguments('-Wthread-safety', language : 'cpp') diff --git a/src/utils/optionsparser.cc b/src/utils/optionsparser.cc index ab6fa9fb35..e356a87fc4 100644 --- a/src/utils/optionsparser.cc +++ b/src/utils/optionsparser.cc @@ -30,12 +30,17 @@ #include #include #include -#include #include "utils/commandline.h" #include "utils/configfile.h" #include "utils/logging.h" #include "utils/string.h" +#if __has_include() +#include +#else +#define NO_CHARCONV +#endif + namespace lczero { namespace { const int kHelpIndent = 15; @@ -460,6 +465,7 @@ void IntOption::SetVal(OptionsDict* dict, const ValueType& val) const { dict->Set(GetId(), val); } +#ifndef NO_CHARCONV int IntOption::ValidateIntString(const std::string& val) const { int result; const auto end = val.data() + val.size(); @@ -474,6 +480,20 @@ int IntOption::ValidateIntString(const std::string& val) const { return result; } } +#else +int IntOption::ValidateIntString(const std::string& val) const { + char *end; + errno = 0; + int result = std::strtol(val.c_str(), &end, 10); + if (errno == ERANGE) { + throw Exception("Flag '--" + GetLongFlag() + "' is out of range."); + } else if (val.length() == 0 || *end != '\0') { + throw Exception("Flag '--" + GetLongFlag() + "' value is invalid."); + } else { + return result; + } +} +#endif ///////////////////////////////////////////////////////////////// // FloatOption From 186c2a2986df482b3187e0ab4ccbf8d008395fac Mon Sep 17 00:00:00 2001 From: borg323 <39573933+borg323@users.noreply.github.com> Date: Mon, 20 Apr 2020 13:33:51 +0300 Subject: [PATCH 111/151] build improvements (#1194) --- build.cmd | 12 ++++++------ build.sh | 5 +++++ meson.build | 8 +++++++- 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/build.cmd b/build.cmd index 50d4f308d0..8342b9bc04 100644 --- a/build.cmd +++ b/build.cmd @@ -25,12 +25,12 @@ echo Deleting build directory: rd /s build if exist "C:\Program Files (x86)\Microsoft Visual Studio\2019" ( - call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvarsall.bat" amd64 - set MSBuild=msbuild + where /q cl + if errorlevel 1 call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvarsall.bat" amd64 set backend=vs2019 ) else ( - call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat" amd64 - set MSBuild="C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\MSBuild\15.0\Bin\MSBuild.exe" + where /q cl + if errorlevel 1 call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat" amd64 set backend=vs2017 ) @@ -58,5 +58,5 @@ pause cd build -%MSBuild% /m /p:Configuration=Release /p:Platform=x64 /p:WholeProgramOptimization=true ^ -/p:PreferredToolArchitecture=x64 lc0.sln /filelogger \ No newline at end of file +msbuild /m /p:Configuration=Release /p:Platform=x64 /p:WholeProgramOptimization=true ^ +/p:PreferredToolArchitecture=x64 lc0.sln /filelogger diff --git a/build.sh b/build.sh index eff36d18a3..deca6e99db 100755 --- a/build.sh +++ b/build.sh @@ -16,6 +16,11 @@ esac BUILDDIR=build/${BUILDTYPE} +if ! hash meson 2>/dev/null && [ -x ${HOME}/.local/bin/meson ] +then + export PATH=${PATH}:${HOME}/.local/bin +fi + if [ -f ${BUILDDIR}/build.ninja ] then meson configure ${BUILDDIR} -Dbuildtype=${BUILDTYPE} -Dprefix=${INSTALL_PREFIX:-/usr/local} "$@" diff --git a/meson.build b/meson.build index b35da097f3..d2b130fd45 100644 --- a/meson.build +++ b/meson.build @@ -464,7 +464,13 @@ endif ## ~~~~ # Pick latest from https://wrapdb.mesonbuild.com/zlib and put into # subprojects/zlib.wrap - deps += dependency('zlib', fallback: ['zlib', 'zlib_dep']) + if host_machine.system() == 'windows' + # In several cases where a zlib dependency was detected on windows, it + # caused trouble (crashes or failed builds). Better safe than sorry. + deps += subproject('zlib').get_variable('zlib_dep') + else + deps += dependency('zlib', fallback: ['zlib', 'zlib_dep']) + endif ## ~~~~~~~~ ## Profiler From afd087551bb2523e171bd42f80a49db8cc119aa4 Mon Sep 17 00:00:00 2001 From: borg323 <39573933+borg323@users.noreply.github.com> Date: Tue, 21 Apr 2020 19:36:56 +0300 Subject: [PATCH 112/151] add option to start counting time with 'go' (#1216) --- src/engine.cc | 12 +++++++++++- src/engine.h | 3 +++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/src/engine.cc b/src/engine.cc index e76d5cede1..9146334f57 100644 --- a/src/engine.cc +++ b/src/engine.cc @@ -58,6 +58,10 @@ const OptionId kUciChess960{ "Castling moves are encoded as \"king takes rook\"."}; const OptionId kShowWDL{"show-wdl", "UCI_ShowWDL", "Show win, draw and lose probability."}; +const OptionId kStrictUciTiming{"strict-uci-timing", "StrictTiming", + "The UCI host compensates for lag, waits for " + "the 'readyok' reply before sending 'go' and " + "only then starts timing."}; MoveList StringsToMovelist(const std::vector& moves, const ChessBoard& board) { @@ -92,6 +96,9 @@ void EngineController::PopulateOptions(OptionsParser* options) { ConfigFile::PopulateOptions(options); PopulateTimeManagementOptions(RunType::kUci, options); + + options->Add(kStrictUciTiming) = false; + options->HideOption(kStrictUciTiming); } void EngineController::ResetMoveTimer() { @@ -125,6 +132,9 @@ void EngineController::UpdateFromUciOptions() { // Cache size. cache_.SetCapacity(options_.Get(kNNCacheSizeId)); + + // Check whether we can update the move timer in "Go". + strict_uci_timing_ = options_.Get(kStrictUciTiming); } void EngineController::EnsureReady() { @@ -217,7 +227,7 @@ void EngineController::Go(const GoParams& params) { // hence have the same start time like this behaves, or should we check start // time hasn't changed since last call to go and capture the new start time // now? - if (!move_start_time_) ResetMoveTimer(); + if (strict_uci_timing_ || !move_start_time_) ResetMoveTimer(); go_params_ = params; std::unique_ptr responder = diff --git a/src/engine.h b/src/engine.h index b2d518eeae..bb1732f0f8 100644 --- a/src/engine.h +++ b/src/engine.h @@ -109,6 +109,9 @@ class EngineController { GoParams go_params_; std::optional move_start_time_; + + // If true we can reset move_start_time_ in "Go". + bool strict_uci_timing_; }; class EngineLoop : public UciLoop { From 2444baf997c09e2117dd937bb4bd273cfcd36326 Mon Sep 17 00:00:00 2001 From: cn4750 Date: Tue, 21 Apr 2020 20:54:41 -0400 Subject: [PATCH 113/151] Fix oversight in #1060 to allow book-length amount of games (#1227) The limit on the `games` setting was not adjusted to allow for the -2 setting. --- src/selfplay/tournament.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/selfplay/tournament.cc b/src/selfplay/tournament.cc index 400b406fdb..8d15381016 100644 --- a/src/selfplay/tournament.cc +++ b/src/selfplay/tournament.cc @@ -92,7 +92,7 @@ void SelfPlayTournament::PopulateOptions(OptionsParser* options) { SearchParams::Populate(options); options->Add(kShareTreesId) = true; - options->Add(kTotalGamesId, -1, 999999) = -1; + options->Add(kTotalGamesId, -2, 999999) = -1; options->Add(kParallelGamesId, 1, 256) = 8; options->Add(kPlayoutsId, -1, 999999999) = -1; options->Add(kVisitsId, -1, 999999999) = -1; From e60bf41e8b5b6fbe6b8e11bf62f1d7a988f9119f Mon Sep 17 00:00:00 2001 From: Tilps Date: Thu, 23 Apr 2020 00:07:36 +1000 Subject: [PATCH 114/151] Fix castling in pgn parsing for chess 960 start positions. (#1220) * Fix castling in pgn parsing for chess 960 start positions. * Cleanup. * Review feedback. --- src/chess/pgn.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/chess/pgn.h b/src/chess/pgn.h index e1629d3083..7fae7867d4 100644 --- a/src/chess/pgn.h +++ b/src/chess/pgn.h @@ -186,10 +186,14 @@ class PgnReader { } else if (san[0] == 'O' && san.size() > 2 && san[1] == '-' && san[2] == 'O') { Move m; + auto king_board = board.kings() & board.ours(); + BoardSquare king_sq(GetLowestBit(king_board.as_int())); if (san.size() > 4 && san[3] == '-' && san[4] == 'O') { - m = Move(BoardSquare(0, 4), BoardSquare(0, 2)); + m = Move(BoardSquare(0, king_sq.col()), + BoardSquare(0, board.castlings().queenside_rook())); } else { - m = Move(BoardSquare(0, 4), BoardSquare(0, 6)); + m = Move(BoardSquare(0, king_sq.col()), + BoardSquare(0, board.castlings().kingside_rook())); } return m; } From 163d241aecf1a54833fd9359edfd19de2663445b Mon Sep 17 00:00:00 2001 From: borg323 <39573933+borg323@users.noreply.github.com> Date: Wed, 22 Apr 2020 17:15:55 +0300 Subject: [PATCH 115/151] allow overriding nvcc default c++ compiler (#1217) Co-authored-by: borg323 --- meson.build | 3 +++ meson_options.txt | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/meson.build b/meson.build index d2b130fd45..be7b9d6458 100644 --- a/meson.build +++ b/meson.build @@ -403,6 +403,9 @@ if get_option('build_backends') else cuda_arguments += ['--std=c++14', '-Xcompiler', '-fPIC'] endif + if get_option('nvcc_ccbin') != '' + cuda_arguments += ['-ccbin=' + get_option('nvcc_ccbin')] + endif foreach x : get_option('cudnn_include') cuda_arguments += ['-I', x] endforeach diff --git a/meson_options.txt b/meson_options.txt index 1a2f93f81f..98e60c3e10 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -122,3 +122,8 @@ option('embed', type: 'boolean', value: false, description: 'Use embedded net by default') + +option('nvcc_ccbin', + type: 'string', + value: '', + description: 'Override C++ compiler used by cuda nvcc') From 674de83c44554cf53e8690d927ca17567aab4177 Mon Sep 17 00:00:00 2001 From: borg323 <39573933+borg323@users.noreply.github.com> Date: Wed, 22 Apr 2020 17:16:14 +0300 Subject: [PATCH 116/151] Improve uci input error checking (#1223) Co-authored-by: borg323 --- src/chess/board.cc | 4 ++++ src/chess/board_test.cc | 21 +++++++++++++++++++++ src/chess/uciloop.cc | 2 ++ src/utils/optionsparser.cc | 24 +++++++++++++++++++++--- 4 files changed, 48 insertions(+), 3 deletions(-) diff --git a/src/chess/board.cc b/src/chess/board.cc index 400b3ac965..08dde6f8be 100644 --- a/src/chess/board.cc +++ b/src/chess/board.cc @@ -990,6 +990,7 @@ void ChessBoard::SetFromFen(const std::string& fen, int* rule50_ply, for (char c : board) { if (c == '/') { --row; + if (row < 0) throw Exception("Bad fen string (too many rows): " + fen); col = 0; continue; } @@ -997,6 +998,7 @@ void ChessBoard::SetFromFen(const std::string& fen, int* rule50_ply, col += c - '0'; continue; } + if (col >= 8) throw Exception("Bad fen string (too many columns): " + fen); if (std::isupper(c)) { // White piece. @@ -1095,6 +1097,8 @@ void ChessBoard::SetFromFen(const std::string& fen, int* rule50_ply, if (who_to_move == "b" || who_to_move == "B") { Mirror(); + } else if (who_to_move != "w" && who_to_move != "W") { + throw Exception("Bad fen string (side to move): " + fen); } if (rule50_ply) *rule50_ply = rule50_halfmoves; if (moves) *moves = total_moves; diff --git a/src/chess/board_test.cc b/src/chess/board_test.cc index a0995e9456..65b46f44b8 100644 --- a/src/chess/board_test.cc +++ b/src/chess/board_test.cc @@ -2223,6 +2223,27 @@ TEST(ChessBoard, CastlingIsSameMove) { EXPECT_FALSE(board.IsSameMove("e2c2", "e2a2")); } +namespace { +void TestInvalid(std::string fen) { + ChessBoard board; + try { + board.SetFromFen(fen); + FAIL() << "Invalid Fen accepted: " + fen + "\n"; + } catch (...) { + SUCCEED(); + } +} +} // namespace + + +TEST(ChessBoard, InvalidFEN) { + TestInvalid("rnbqkbnr/ppppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1"); + TestInvalid("rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR/8 w KQkq - 0 1"); + TestInvalid("rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR g KQkq - 0 1"); + TestInvalid("rnbqkbnr/ppp2ppp/4p3/3pP3/8/8/PPPP1PPP/RNBQKBNR w KQkq i6 0 3"); + TestInvalid("rnbqkbnr/ppp2ppp/4p3/3pP3/8/8/PPPP1PPP/RNBQKBNR w KQkq A6 0 3"); +} + } // namespace lczero int main(int argc, char** argv) { diff --git a/src/chess/uciloop.cc b/src/chess/uciloop.cc index c4f11603dd..ee7c5e56df 100644 --- a/src/chess/uciloop.cc +++ b/src/chess/uciloop.cc @@ -116,6 +116,8 @@ int GetNumeric(const std::unordered_map& params, return std::stoi(str); } catch (std::invalid_argument&) { throw Exception("invalid value " + str); + } catch (const std::out_of_range&) { + throw Exception("out of range value " + str); } } diff --git a/src/utils/optionsparser.cc b/src/utils/optionsparser.cc index e356a87fc4..bdaa9a60a4 100644 --- a/src/utils/optionsparser.cc +++ b/src/utils/optionsparser.cc @@ -503,13 +503,25 @@ FloatOption::FloatOption(const OptionId& id, float min, float max) : Option(id), min_(min), max_(max) {} void FloatOption::SetValue(const std::string& value, OptionsDict* dict) { - SetVal(dict, std::stof(value)); + try { + SetVal(dict, std::stof(value)); + } catch (std::invalid_argument&) { + throw Exception("invalid value " + value); + } catch (const std::out_of_range&) { + throw Exception("out of range value " + value); + } } bool FloatOption::ProcessLongFlag(const std::string& flag, const std::string& value, OptionsDict* dict) { if (flag == GetLongFlag()) { - SetVal(dict, std::stof(value)); + try { + SetVal(dict, std::stof(value)); + } catch (std::invalid_argument&) { + throw Exception("invalid value " + value); + } catch (const std::out_of_range&) { + throw Exception("out of range value " + value); + } return true; } return false; @@ -518,7 +530,13 @@ bool FloatOption::ProcessLongFlag(const std::string& flag, bool FloatOption::ProcessShortFlagWithValue(char flag, const std::string& value, OptionsDict* dict) { if (flag == GetShortFlag()) { - SetVal(dict, std::stof(value)); + try { + SetVal(dict, std::stof(value)); + } catch (std::invalid_argument&) { + throw Exception("invalid value " + value); + } catch (const std::out_of_range&) { + throw Exception("out of range value " + value); + } return true; } return false; From c65895d7d71b6983c2d86096042b3a5d541b7ab2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Forst=C3=A9n?= Date: Wed, 22 Apr 2020 21:57:47 +0300 Subject: [PATCH 117/151] Update q to cp conversion formula (#1193) --- src/mcts/params.cc | 1 + src/mcts/search.cc | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/mcts/params.cc b/src/mcts/params.cc index 29f247c53e..f9b6d64d0f 100644 --- a/src/mcts/params.cc +++ b/src/mcts/params.cc @@ -295,6 +295,7 @@ void SearchParams::Populate(OptionsParser* options) { options->Add(kPerPvCountersId) = false; std::vector score_type = {"centipawn", "centipawn_with_drawscore", + "centipawn_2019", "centipawn_2018", "win_percentage", "Q", diff --git a/src/mcts/search.cc b/src/mcts/search.cc index 517090fb29..b54ca5a0c5 100644 --- a/src/mcts/search.cc +++ b/src/mcts/search.cc @@ -145,9 +145,11 @@ void Search::SendUciInfo() REQUIRES(nodes_mutex_) { std::round(edge.GetM(0.0f)) / 2 + (edge.IsTbTerminal() ? 101 : 1), wl); } else if (score_type == "centipawn_with_drawscore") { - uci_info.score = 295 * q / (1 - 0.976953126 * std::pow(q, 14)); + uci_info.score = 90 * tan(1.5637541897 * q); } else if (score_type == "centipawn") { - uci_info.score = 295 * wl / (1 - 0.976953126 * std::pow(q, 14)); + uci_info.score = 90 * tan(1.5637541897 * wl); + } else if (score_type == "centipawn_2019") { + uci_info.score = 295 * wl / (1 - 0.976953126 * std::pow(wl, 14)); } else if (score_type == "centipawn_2018") { uci_info.score = 290.680623072 * tan(1.548090806 * wl); } else if (score_type == "win_percentage") { From 1eb0d56e44b11082e4de634e78fb22328108aeef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Leandro=20=C3=81lvarez=20Gonz=C3=A1lez?= Date: Wed, 22 Apr 2020 15:39:45 -0400 Subject: [PATCH 118/151] Update the rpi section of README (#1190) --- README.md | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 8058eb018a..b9a16a5e98 100644 --- a/README.md +++ b/README.md @@ -160,6 +160,8 @@ Now download the lc0 source, if you haven't already done so, following the instr ### Raspberry Pi +You'll need to be running the latest raspbian "buster". + 1. Install OpenBLAS ``` @@ -177,17 +179,10 @@ pip3 install meson pip3 install ninja ``` -3. Install clang +3. Install compiler and standard libraries ``` -wget http://releases.llvm.org/6.0.0/clang+llvm-6.0.0-armv7a-linux-gnueabihf.tar.xz -tar -xf clang+llvm-6.0.0-armv7a-linux-gnueabihf.tar.xz -rm clang+llvm-6.0.0-armv7a-linux-gnueabihf.tar.xz -mv clang+llvm-6.0.0-armv7a-linux-gnueabihf clang_6.0.0 -sudo mv clang_6.0.0 /usr/local -echo 'export PATH=/usr/local/clang_6.0.0/bin:~/.local/bin:$PATH' >> .bashrc -echo 'export LD_LIBRARY_PATH=/usr/local/clang_6.0.0/lib:$LD_LIBRARY_PATH' >> .bashrc -source .bashrc +sudo apt install clang-6.0 libstdc++-8-dev ``` 4. Clone lc0 and compile @@ -196,7 +191,7 @@ source .bashrc git clone https://github.com/LeelaChessZero/lc0.git cd lc0 git submodule update --init --recursive -CC=clang CXX=clang++ ./build.sh -Ddefault_library=static +CC=clang-6.0 CXX=clang++-6.0 ./build.sh -Ddefault_library=static ``` 5. The resulting binary will be in build/release From e0b6ef380c8ac6bfa381d14883435941aef0c982 Mon Sep 17 00:00:00 2001 From: Alexander Lyashuk Date: Thu, 23 Apr 2020 09:56:15 +0200 Subject: [PATCH 119/151] Time management refactoring (#1195) * Appended files. * Compiles. * Compiles again. * Make smart pruning use smoothed nps. * Seems to be fully implemented. * Mistype. * One more bug. * Found discrepancy with documentaiton. * Bugfixes. * Don't smooth nps during the first move. * Too large default for timeuse decay. * Bugfix. * Fix build. * Relax defaults a bit. Add fixed to logging. * Remove "smooth" to "smooth-experimental" for now. --- .gitignore | 1 + meson.build | 1 + src/engine.cc | 2 +- src/mcts/search.cc | 8 + src/mcts/search.h | 1 + src/mcts/stoppers/common.cc | 5 +- src/mcts/stoppers/factory.cc | 4 + src/mcts/stoppers/legacy.cc | 43 +++-- src/mcts/stoppers/legacy.h | 3 + src/mcts/stoppers/smooth.cc | 344 ++++++++++++++++++++++++++++++++++ src/mcts/stoppers/smooth.h | 38 ++++ src/mcts/stoppers/stoppers.cc | 4 +- src/mcts/stoppers/timemgr.cc | 9 + src/mcts/stoppers/timemgr.h | 10 +- 14 files changed, 446 insertions(+), 27 deletions(-) create mode 100644 src/mcts/stoppers/smooth.cc create mode 100644 src/mcts/stoppers/smooth.h diff --git a/.gitignore b/.gitignore index c9c30f98f8..3fe357c9e3 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ *.swp .clang_complete .DS_Store +.clangd/ build/ compile_commands.json CUDA_NN/ diff --git a/meson.build b/meson.build index be7b9d6458..65591353a8 100644 --- a/meson.build +++ b/meson.build @@ -131,6 +131,7 @@ files += [ 'src/mcts/stoppers/common.cc', 'src/mcts/stoppers/factory.cc', 'src/mcts/stoppers/legacy.cc', + 'src/mcts/stoppers/smooth.cc', 'src/mcts/stoppers/stoppers.cc', 'src/mcts/stoppers/timemgr.cc', 'src/neural/cache.cc', diff --git a/src/engine.cc b/src/engine.cc index 9146334f57..3ad1320e4e 100644 --- a/src/engine.cc +++ b/src/engine.cc @@ -261,7 +261,7 @@ void EngineController::Go(const GoParams& params) { responder = std::make_unique(std::move(responder)); } - auto stopper = time_manager_->GetStopper(params, tree_->HeadPosition()); + auto stopper = time_manager_->GetStopper(params, *tree_.get()); search_ = std::make_unique( *tree_, network_.get(), std::move(responder), StringsToMovelist(params.searchmoves, tree_->HeadPosition().GetBoard()), diff --git a/src/mcts/search.cc b/src/mcts/search.cc index b54ca5a0c5..fd90a48f00 100644 --- a/src/mcts/search.cc +++ b/src/mcts/search.cc @@ -215,6 +215,13 @@ int64_t Search::GetTimeSinceStart() const { .count(); } +int64_t Search::GetTimeSinceFirstBatch() const { + if (!nps_start_time_) return 0; + return std::chrono::duration_cast( + std::chrono::steady_clock::now() - *nps_start_time_) + .count(); +} + // Root is depth 0, i.e. even depth. float Search::GetDrawScore(bool is_odd_depth) const { return (is_odd_depth ? params_.GetOpponentDrawScore() @@ -698,6 +705,7 @@ void Search::PopulateCommonIterationStats(IterationStats* stats) { stats->time_since_movestart = GetTimeSinceStart(); SharedMutex::SharedLock nodes_lock(nodes_mutex_); + stats->time_since_first_batch = GetTimeSinceFirstBatch(); if (!nps_start_time_ && total_playouts_ > 0) { nps_start_time_ = std::chrono::steady_clock::now(); } diff --git a/src/mcts/search.h b/src/mcts/search.h index 07478802a6..b64ee1b14b 100644 --- a/src/mcts/search.h +++ b/src/mcts/search.h @@ -109,6 +109,7 @@ class Search { EdgeAndNode GetBestRootChildWithTemperature(float temperature) const; int64_t GetTimeSinceStart() const; + int64_t GetTimeSinceFirstBatch() const; void MaybeTriggerStop(const IterationStats& stats, StoppersHints* hints); void MaybeOutputInfo(); void SendUciInfo(); // Requires nodes_mutex_ to be held. diff --git a/src/mcts/stoppers/common.cc b/src/mcts/stoppers/common.cc index a4e076b430..74cb02c99a 100644 --- a/src/mcts/stoppers/common.cc +++ b/src/mcts/stoppers/common.cc @@ -152,10 +152,9 @@ class CommonTimeManager : public TimeManager { private: std::unique_ptr GetStopper(const GoParams& params, - const Position& position) override { + const NodeTree& tree) override { auto result = std::make_unique(); - if (child_mgr_) - result->AddStopper(child_mgr_->GetStopper(params, position)); + if (child_mgr_) result->AddStopper(child_mgr_->GetStopper(params, tree)); PopulateCommonUciStoppers(result.get(), options_, params, move_overhead_); return result; } diff --git a/src/mcts/stoppers/factory.cc b/src/mcts/stoppers/factory.cc index ed58f7b4e2..5c79c0326f 100644 --- a/src/mcts/stoppers/factory.cc +++ b/src/mcts/stoppers/factory.cc @@ -31,6 +31,7 @@ #include "factory.h" #include "mcts/stoppers/legacy.h" +#include "mcts/stoppers/smooth.h" #include "mcts/stoppers/stoppers.h" #include "utils/exception.h" @@ -70,6 +71,9 @@ std::unique_ptr MakeTimeManager(const OptionsDict& options) { if (managers[0] == "legacy") { time_manager = MakeLegacyTimeManager(move_overhead, tm_options.GetSubdict("legacy")); + } else if (managers[0] == "smooth-experimental") { + time_manager = MakeSmoothTimeManager( + move_overhead, tm_options.GetSubdict("smooth-experimental")); } if (!time_manager) { throw Exception("Unknown time manager: [" + managers[0] + "]"); diff --git a/src/mcts/stoppers/legacy.cc b/src/mcts/stoppers/legacy.cc index 2ad2c764b7..4ad7ec8a39 100644 --- a/src/mcts/stoppers/legacy.cc +++ b/src/mcts/stoppers/legacy.cc @@ -25,10 +25,30 @@ Program grant you additional permission to convey the resulting work. */ +#include "mcts/stoppers/legacy.h" + #include "mcts/stoppers/stoppers.h" namespace lczero { +float ComputeEstimatedMovesToGo(int ply, float midpoint, float steepness) { + // An analysis of chess games shows that the distribution of game lengths + // looks like a log-logistic distribution. The mean residual time function + // calculates how many more moves are expected in the game given that we are + // at the current ply. Given that this function can be expensive to compute, + // we calculate the median residual time function instead. This is derived and + // shown to be similar to the mean residual time in "Some Useful Properties of + // Log-Logistic Random Variables for Health Care Simulations" (Clark & + // El-Taha, 2015). + // midpoint: The median length of games. + // steepness: How quickly the function drops off from its maximum value, + // around the midpoint. + const float move = ply / 2.0f; + return midpoint * std::pow(1 + 2 * std::pow(move / midpoint, steepness), + 1 / steepness) - + move; +} + namespace { class LegacyStopper : public TimeLimitStopper { @@ -53,7 +73,7 @@ class LegacyTimeManager : public TimeManager { time_curve_steepness_(params.GetOrDefault("steepness", 7.0f)), spend_saved_time_(params.GetOrDefault("immediate-use", 1.0f)) {} std::unique_ptr GetStopper(const GoParams& params, - const Position& position) override; + const NodeTree& tree) override; private: const int64_t move_overhead_; @@ -65,26 +85,9 @@ class LegacyTimeManager : public TimeManager { int64_t time_spared_ms_ = 0; }; -float ComputeEstimatedMovesToGo(int ply, float midpoint, float steepness) { - // An analysis of chess games shows that the distribution of game lengths - // looks like a log-logistic distribution. The mean residual time function - // calculates how many more moves are expected in the game given that we are - // at the current ply. Given that this function can be expensive to compute, - // we calculate the median residual time function instead. This is derived and - // shown to be similar to the mean residual time in "Some Useful Properties of - // Log-Logistic Random Variables for Health Care Simulations" (Clark & - // El-Taha, 2015). - // midpoint: The median length of games. - // steepness: How quickly the function drops off from its maximum value, - // around the midpoint. - const float move = ply / 2.0f; - return midpoint * std::pow(1 + 2 * std::pow(move / midpoint, steepness), - 1 / steepness) - - move; -} - std::unique_ptr LegacyTimeManager::GetStopper( - const GoParams& params, const Position& position) { + const GoParams& params, const NodeTree& tree) { + const Position& position = tree.HeadPosition(); const bool is_black = position.IsBlackToMove(); const std::optional& time = (is_black ? params.btime : params.wtime); // If no time limit is given, don't stop on this condition. diff --git a/src/mcts/stoppers/legacy.h b/src/mcts/stoppers/legacy.h index 9fff527063..8a72d47666 100644 --- a/src/mcts/stoppers/legacy.h +++ b/src/mcts/stoppers/legacy.h @@ -27,10 +27,13 @@ #pragma once +#include "mcts/stoppers/timemgr.h" #include "utils/optionsdict.h" namespace lczero { +float ComputeEstimatedMovesToGo(int ply, float midpoint, float steepness); + std::unique_ptr MakeLegacyTimeManager(int64_t move_overhead, const OptionsDict& params); diff --git a/src/mcts/stoppers/smooth.cc b/src/mcts/stoppers/smooth.cc new file mode 100644 index 0000000000..dec6e7e9d5 --- /dev/null +++ b/src/mcts/stoppers/smooth.cc @@ -0,0 +1,344 @@ +/* + This file is part of Leela Chess Zero. + Copyright (C) 2020 The LCZero Authors + + Leela Chess is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Leela Chess is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Leela Chess. If not, see . + + Additional permission under GNU GPL version 3 section 7 + + If you modify this Program, or any covered work, by linking or + combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA + Toolkit and the NVIDIA CUDA Deep Neural Network library (or a + modified version of those libraries), containing parts covered by the + terms of the respective license agreement, the licensors of this + Program grant you additional permission to convey the resulting work. +*/ + +#include "mcts/stoppers/smooth.h" + +#include +#include + +#include "mcts/stoppers/legacy.h" +#include "mcts/stoppers/stoppers.h" +#include "utils/mutex.h" + +namespace lczero { +namespace { + +class Params { + public: + Params(const OptionsDict& /* params */, int64_t move_overhead); + + using MovesLeftEstimator = std::function; + + // Which fraction of the tree is reuse after a full move. Initial guess. + float initial_tree_reuse() const { return initial_tree_reuse_; } + // Do not allow tree reuse expectation to go above this value. + float max_tree_reuse() const { return max_tree_reuse_; } + // Number of moves needed to update tree reuse estimation halfway. + float tree_reuse_halfupdate_moves() const { + return tree_reuse_halfupdate_moves_; + } + // Number of seconds to update nps estimation halfway. + float nps_halfupdate_seconds() const { return nps_halfupdate_seconds_; } + // Fraction of the allocated time the engine uses, initial estimation. + float initial_smartpruning_timeuse() const { + return initial_smartpruning_timeuse_; + } + // Do not allow timeuse estimation to fall below this. + float min_smartpruning_timeuse() const { return min_smartpruning_timeuse_; } + // Number of moves to update timeuse estimation halfway. + float smartpruning_timeuse_halfupdate_moves() const { + return smartpruning_timeuse_halfupdate_moves_; + } + // Fraction of a total available move time that is allowed to use for a single + // move. + float max_single_move_time_fraction() const { + return max_single_move_time_fraction_; + } + // Move overhead. + int64_t move_overhead_ms() const { return move_overhead_ms_; } + // Returns a function function that estimates remaining moves. + MovesLeftEstimator moves_left_estimator() const { + return moves_left_estimator_; + } + + private: + const int64_t move_overhead_ms_; + const float initial_tree_reuse_; + const float max_tree_reuse_; + const float tree_reuse_halfupdate_moves_; + const float nps_halfupdate_seconds_; + const float initial_smartpruning_timeuse_; + const float min_smartpruning_timeuse_; + const float smartpruning_timeuse_halfupdate_moves_; + const float max_single_move_time_fraction_; + const MovesLeftEstimator moves_left_estimator_; +}; + +Params::MovesLeftEstimator CreateMovesLeftEstimator(const OptionsDict& params) { + // The only estimator we have now is MLE-legacy (Moves left estimator). + const OptionsDict& mle_dict = params.HasSubdict("mle-legacy") + ? params.GetSubdict("mle-legacy") + : params; + return [midpoint = mle_dict.GetOrDefault("midpoint", 51.5f), + steepness = mle_dict.GetOrDefault("steepness", 7.0f)]( + const NodeTree& tree) { + const auto ply = tree.HeadPosition().GetGamePly(); + return ComputeEstimatedMovesToGo(ply, midpoint, steepness); + }; +} + +Params::Params(const OptionsDict& params, int64_t move_overhead) + : move_overhead_ms_(move_overhead), + initial_tree_reuse_(params.GetOrDefault("init-tree-reuse", 0.5f)), + max_tree_reuse_(params.GetOrDefault("max-tree-reuse", 0.8f)), + tree_reuse_halfupdate_moves_( + params.GetOrDefault("tree-reuse-update-rate", 3.0f)), + nps_halfupdate_seconds_( + params.GetOrDefault("nps-update-rate", 5.0f)), + initial_smartpruning_timeuse_( + params.GetOrDefault("init-timeuse", 0.5f)), + min_smartpruning_timeuse_( + params.GetOrDefault("min-timeuse", 0.2f)), + smartpruning_timeuse_halfupdate_moves_( + params.GetOrDefault("timeuse-update-rate", 3.0f)), + max_single_move_time_fraction_( + params.GetOrDefault("max-move-budget", 0.3f)), + moves_left_estimator_(CreateMovesLeftEstimator(params)) {} + +// Returns the updated value of @from, towards @to by the number of halves +// equal to number of @steps in @value. E.g. if value=1*step, returns +// (from+to)/2, if value=2*step, return (1*from + 3*to)/4, if +// value=3*step, return (1*from + 7*to)/7, if value=0, returns from. +float ExponentialDecay(float from, float to, float step, float value) { + return to - (to - from) * std::pow(0.5f, value / step); +} + +class SmoothTimeManager; + +class SmoothStopper : public TimeLimitStopper { + public: + SmoothStopper(int64_t deadline_ms, SmoothTimeManager* manager); + + private: + bool ShouldStop(const IterationStats& stats, StoppersHints* hints) override; + void OnSearchDone(const IterationStats& stats) override; + + SmoothTimeManager* const manager_; +}; + +class SmoothTimeManager : public TimeManager { + public: + SmoothTimeManager(int64_t move_overhead, const OptionsDict& params) + : params_(params, move_overhead) {} + + float UpdateNps(int64_t time_since_movestart_ms, + int64_t nodes_since_movestart) { + Mutex::Lock lock(mutex_); + if (nps_is_reliable_ && time_since_movestart_ms <= last_time_) { + const float nps = + 1000.0f * nodes_since_movestart / time_since_movestart_ms; + nps_ = ExponentialDecay(nps_, nps, params_.nps_halfupdate_seconds(), + (time_since_movestart_ms - last_time_) / 1000.0f); + } else if (time_since_movestart_ms > 0) { + nps_ = 1000.0f * nodes_since_movestart / time_since_movestart_ms; + } + last_time_ = time_since_movestart_ms; + return nps_; + } + + void UpdateEndOfMoveStats(int64_t total_move_time, int64_t total_nodes) { + Mutex::Lock lock(mutex_); + // Whatever is in nps_ after the first move, is truth now. + nps_is_reliable_ = true; + // How different was this move from an average move + const float this_move_time_fraction = + avg_ms_per_move_ <= 0.0f ? 0.0f : total_move_time / avg_ms_per_move_; + // Update time_use estimation. + const float this_move_time_use = total_move_time / move_allocated_time_ms_; + // Recompute expected move time for logging. + const float expected_move_time = move_allocated_time_ms_ * timeuse_; + timeuse_ = ExponentialDecay(timeuse_, this_move_time_use, + params_.smartpruning_timeuse_halfupdate_moves(), + this_move_time_fraction); + if (timeuse_ < params_.min_smartpruning_timeuse()) { + timeuse_ = params_.min_smartpruning_timeuse(); + } + // Remember final number of nodes for tree reuse estimation. + last_move_final_nodes_ = total_nodes; + + LOGFILE << std::fixed + << "Updating endmove stats. actual_move_time=" << total_move_time + << "ms, allocated_move_time=" << move_allocated_time_ms_ + << "ms (ratio=" << this_move_time_use + << "), expected_move_time=" << expected_move_time + << "ms. New time_use=" << timeuse_ + << ", update_rate=" << this_move_time_fraction + << " (avg_move_time=" << avg_ms_per_move_ << "ms)."; + } + + private: + std::unique_ptr GetStopper(const GoParams& params, + const NodeTree& tree) override { + const Position& position = tree.HeadPosition(); + const bool is_black = position.IsBlackToMove(); + const std::optional& time = + (is_black ? params.btime : params.wtime); + // If no time limit is given, don't stop on this condition. + if (params.infinite || params.ponder || !time) return nullptr; + + Mutex::Lock lock(mutex_); + + const auto current_nodes = tree.GetCurrentHead()->GetN(); + if (last_move_final_nodes_ && last_time_ && avg_ms_per_move_ >= 0.0f) { + UpdateTreeReuseFactor(current_nodes); + } + + last_time_ = 0; + + // Get remaining moves estimation. + float remaining_moves = params_.moves_left_estimator()(tree); + + // If the number of moves remaining until the time control are less than + // the estimated number of moves left in the game, then use the number of + // moves until the time control instead. + if (params.movestogo && + *params.movestogo > 0 && // Ignore non-standard uci command. + *params.movestogo < remaining_moves) { + remaining_moves = *params.movestogo; + } + + const std::optional& inc = is_black ? params.binc : params.winc; + const int increment = inc ? std::max(int64_t(0), *inc) : 0; + + // Total time, including increments, until time control. + const auto total_remaining_ms = + std::max(0.0f, *time + increment * (remaining_moves - 1) - + params_.move_overhead_ms()); + + // Total remaining nodes that we'll have chance to compute in a game. + const float remaining_game_nodes = total_remaining_ms * nps_ / 1000.0f; + // Total (fresh) nodes, in average, to processed per move. + const float avg_nodes_per_move = remaining_game_nodes / remaining_moves; + // Average time that will be spent per move. + avg_ms_per_move_ = total_remaining_ms / remaining_moves; + // As some part of a tree is usually reused, we can aim to a larger target. + const float nodes_per_move_including_reuse = + avg_nodes_per_move / (1.0f - tree_reuse_); + // Subtract what we already have, and get what we need to compute. + const float move_estimate_nodes = + nodes_per_move_including_reuse - current_nodes; + // This is what time we think will be really spent thinking. + const float expected_movetime_ms = move_estimate_nodes / nps_ * 1000.0f; + // This is what is the actual budget as we hope that the search will be + // shorter due to smart pruning. + move_allocated_time_ms_ = expected_movetime_ms / timeuse_; + + if (move_allocated_time_ms_ > + *time * params_.max_single_move_time_fraction()) { + move_allocated_time_ms_ = *time * params_.max_single_move_time_fraction(); + } + + LOGFILE << std::fixed << "allocated_move_time=" << move_allocated_time_ms_ + << "ms, expected_move_time=" << expected_movetime_ms + << "ms, timeuse=" << timeuse_ + << ", expected_total_nodes=" << nodes_per_move_including_reuse + << "(new=" << move_estimate_nodes << " + reused=" << current_nodes + << "), avg_total_nodes_per_move=" << nodes_per_move_including_reuse + << "(fresh=" << avg_nodes_per_move << ", reuse_rate=" << tree_reuse_ + << "), remaining_game_nodes=" << remaining_game_nodes + << ", remaining_moves=" << remaining_moves + << ", total_remaining_ms=" << total_remaining_ms + << ", nps=" << nps_; + + return std::make_unique(move_allocated_time_ms_, this); + } + + void UpdateTreeReuseFactor(int64_t new_move_nodes) REQUIRES(mutex_) { + // How different was this move from an average move + const float this_move_time_fraction = + avg_ms_per_move_ <= 0.0f ? 0.0f : last_time_ / avg_ms_per_move_; + + const float this_move_tree_reuse = + static_cast(new_move_nodes) / last_move_final_nodes_; + tree_reuse_ = ExponentialDecay(tree_reuse_, this_move_tree_reuse, + params_.tree_reuse_halfupdate_moves(), + this_move_time_fraction); + if (tree_reuse_ > params_.max_tree_reuse()) { + tree_reuse_ = params_.max_tree_reuse(); + } + LOGFILE << std::fixed + << "Updating tree reuse. last_move_nodes=" << last_move_final_nodes_ + << ", this_move_nodes=" << new_move_nodes + << " (tree_reuse=" << this_move_tree_reuse + << "). avg_tree_reuse=" << tree_reuse_ + << ", update_rate=" << this_move_time_fraction + << " (avg_move_time=" << avg_ms_per_move_ + << "ms, actual_move_time=" << last_time_ << "ms)"; + } + + const Params params_; + + Mutex mutex_; + // Fraction of a tree which usually survives a full move (and is reused). + float tree_reuse_ GUARDED_BY(mutex_) = params_.initial_tree_reuse(); + // Current NPS estimation. + float nps_ GUARDED_BY(mutex_) = 20000.0f; + // NPS is unreliable until the end of the first move. + bool nps_is_reliable_ GUARDED_BY(mutex_) = false; + // Fraction of a allocated time usually used. + float timeuse_ GUARDED_BY(mutex_) = params_.initial_smartpruning_timeuse(); + + // Average amount of time per move. Used to compute ratio for timeuse and + // tree reuse updates. + float avg_ms_per_move_ GUARDED_BY(mutex_) = 0.0f; + // Total amount of time allocated for the current move. Used to update + // timeuse_ when the move ends. + float move_allocated_time_ms_ GUARDED_BY(mutex_) = 0.0f; + // Total amount of nodes in the end of the previous search. Used to compute + // tree reuse factor when a new search starts. + int64_t last_move_final_nodes_ GUARDED_BY(mutex_) = 0; + // Time of the last report, since the beginning of the move. + int64_t last_time_ GUARDED_BY(mutex_) = 0; + + // According to the recent calculations, how much time should be spent in + // average per move. + float last_expected_movetime_ms_ GUARDED_BY(mutex_) = 0.0f; +}; + +SmoothStopper::SmoothStopper(int64_t deadline_ms, SmoothTimeManager* manager) + : TimeLimitStopper(deadline_ms), manager_(manager) {} + +bool SmoothStopper::ShouldStop(const IterationStats& stats, + StoppersHints* hints) { + const auto nps = manager_->UpdateNps(stats.time_since_first_batch, + stats.nodes_since_movestart); + hints->UpdateEstimatedNps(nps); + return TimeLimitStopper::ShouldStop(stats, hints); +} + +void SmoothStopper::OnSearchDone(const IterationStats& stats) { + manager_->UpdateEndOfMoveStats(stats.time_since_movestart, stats.total_nodes); +} + +} // namespace + +std::unique_ptr MakeSmoothTimeManager(int64_t move_overhead, + const OptionsDict& params) { + return std::make_unique(move_overhead, params); +} + +} // namespace lczero \ No newline at end of file diff --git a/src/mcts/stoppers/smooth.h b/src/mcts/stoppers/smooth.h new file mode 100644 index 0000000000..2e68e83667 --- /dev/null +++ b/src/mcts/stoppers/smooth.h @@ -0,0 +1,38 @@ +/* + This file is part of Leela Chess Zero. + Copyright (C) 2020 The LCZero Authors + + Leela Chess is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Leela Chess is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Leela Chess. If not, see . + + Additional permission under GNU GPL version 3 section 7 + + If you modify this Program, or any covered work, by linking or + combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA + Toolkit and the NVIDIA CUDA Deep Neural Network library (or a + modified version of those libraries), containing parts covered by the + terms of the respective license agreement, the licensors of this + Program grant you additional permission to convey the resulting work. +*/ + +#pragma once + +#include "mcts/stoppers/timemgr.h" +#include "utils/optionsdict.h" + +namespace lczero { + +std::unique_ptr MakeSmoothTimeManager(int64_t move_overhead, + const OptionsDict& params); + +} // namespace lczero \ No newline at end of file diff --git a/src/mcts/stoppers/stoppers.cc b/src/mcts/stoppers/stoppers.cc index 6c3a9cf08b..36ae8a9b72 100644 --- a/src/mcts/stoppers/stoppers.cc +++ b/src/mcts/stoppers/stoppers.cc @@ -203,7 +203,9 @@ bool SmartPruningStopper::ShouldStop(const IterationStats& stats, const auto nodes = stats.nodes_since_movestart + kSmartPruningToleranceNodes; const auto time = stats.time_since_movestart - *first_eval_time_; - const auto nps = 1000LL * nodes / time + 1; + // If nps is populated by someone who knows better, use it. Otherwise use the + // value calculated here. + const auto nps = hints->GetEstimatedNps().value_or(1000LL * nodes / time + 1); const double remaining_time_s = hints->GetEstimatedRemainingTimeMs() / 1000.0; const auto remaining_playouts = diff --git a/src/mcts/stoppers/timemgr.cc b/src/mcts/stoppers/timemgr.cc index 88d952c6cc..caa4461e77 100644 --- a/src/mcts/stoppers/timemgr.cc +++ b/src/mcts/stoppers/timemgr.cc @@ -26,6 +26,7 @@ */ #include "mcts/stoppers/timemgr.h" + #include "mcts/stoppers/stoppers.h" namespace lczero { @@ -47,12 +48,20 @@ int64_t StoppersHints::GetEstimatedRemainingPlayouts() const { return std::max(decltype(remaining_playouts_){1}, remaining_playouts_); } +void StoppersHints::UpdateEstimatedNps(float v) { estimated_nps_ = v; } + +std::optional StoppersHints::GetEstimatedNps() const { + return estimated_nps_; +} + void StoppersHints::Reset() { // Slightly more than 3 years. remaining_time_ms_ = 100000000000; // Type for N in nodes is currently uint32_t, so set limit in order not to // overflow it. remaining_playouts_ = 4000000000; + // NPS is not known. + estimated_nps_.reset(); } } // namespace lczero \ No newline at end of file diff --git a/src/mcts/stoppers/timemgr.h b/src/mcts/stoppers/timemgr.h index ba8554041f..51d15351d0 100644 --- a/src/mcts/stoppers/timemgr.h +++ b/src/mcts/stoppers/timemgr.h @@ -29,9 +29,11 @@ #include #include +#include #include #include "chess/uciloop.h" +#include "mcts/node.h" #include "utils/optionsdict.h" namespace lczero { @@ -40,6 +42,7 @@ namespace lczero { // It is expected that this structure will grow. struct IterationStats { int64_t time_since_movestart = 0; + int64_t time_since_first_batch = 0; int64_t total_nodes = 0; int64_t nodes_since_movestart = 0; int64_t batches_since_movestart = 0; @@ -60,10 +63,13 @@ class StoppersHints { int64_t GetEstimatedRemainingTimeMs() const; void UpdateEstimatedRemainingRemainingPlayouts(int64_t v); int64_t GetEstimatedRemainingPlayouts() const; + void UpdateEstimatedNps(float v); + std::optional GetEstimatedNps() const; private: int64_t remaining_time_ms_; int64_t remaining_playouts_; + std::optional estimated_nps_; }; // Interface for search stopper. @@ -88,8 +94,8 @@ class SearchStopper { class TimeManager { public: virtual ~TimeManager() = default; - virtual std::unique_ptr GetStopper( - const GoParams& params, const Position& position) = 0; + virtual std::unique_ptr GetStopper(const GoParams& params, + const NodeTree& tree) = 0; }; } // namespace lczero From aa13c5f4c5fab2bfbe429a5f53aa1cbbbff114ea Mon Sep 17 00:00:00 2001 From: Alexis Olson Date: Thu, 23 Apr 2020 04:41:07 -0500 Subject: [PATCH 120/151] MLH verbose stats - Issue 1200 (#1230) * Add M effect logic to output section * Fix missing prefixes and semicolons * Some fixes. * Slight format improvement? Co-authored-by: Tilps --- src/mcts/search.cc | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/src/mcts/search.cc b/src/mcts/search.cc index fd90a48f00..0af52a9291 100644 --- a/src/mcts/search.cc +++ b/src/mcts/search.cc @@ -261,7 +261,15 @@ std::vector Search::GetVerboseStats(Node* node) const { const float U_coeff = cpuct * std::sqrt(std::max(node->GetChildrenVisits(), 1u)); const bool logit_q = params_.GetLogitQ(); - + const float m_slope = params_.GetMovesLeftSlope(); + const float m_cap = params_.GetMovesLeftMaxEffect(); + const float a = params_.GetMovesLeftConstantFactor(); + const float b = params_.GetMovesLeftScaledFactor(); + const float c = params_.GetMovesLeftQuadraticFactor(); + const bool do_moves_left_adjustment = + network_->GetCapabilities().moves_left != + pblczero::NetworkFormat::MOVES_LEFT_NONE && + (std::abs(node->GetQ(0.0f)) > params_.GetMovesLeftThreshold()); std::vector edges; for (const auto& edge : node->Edges()) edges.push_back(edge); @@ -283,6 +291,12 @@ std::vector Search::GetVerboseStats(Node* node) const { oss << std::left << std::setw(5) << edge.GetMove(is_black_to_move).as_string(); + + float Q = edge.GetQ(fpu, draw_score, logit_q); + float M_effect = do_moves_left_adjustment + ? (std::clamp(m_slope * edge.GetM(0.0f), -m_cap, m_cap) * + std::copysign(1.0f, -Q) * (a + b * std::abs(Q) + c * Q * Q)) + : 0.0f; // TODO: should this be displaying transformed index? oss << " (" << std::setw(4) << edge.GetMove().as_nn_index(0) << ")"; @@ -311,8 +325,8 @@ std::vector Search::GetVerboseStats(Node* node) const { oss << "(U: " << std::setw(6) << std::setprecision(5) << edge.GetU(U_coeff) << ") "; - oss << "(Q+U: " << std::setw(8) << std::setprecision(5) - << edge.GetQ(fpu, draw_score, logit_q) + edge.GetU(U_coeff) << ") "; + oss << "(S: " << std::setw(8) << std::setprecision(5) + << Q + edge.GetU(U_coeff) + M_effect << ") "; oss << "(V: "; std::optional v; From c4c92f6996e49f5a43f7937c563a3ebf3777b465 Mon Sep 17 00:00:00 2001 From: Naphthalin <40385638+Naphthalin@users.noreply.github.com> Date: Thu, 23 Apr 2020 12:50:22 +0200 Subject: [PATCH 121/151] Start TempDecay only after a given number of moves (#1212) * Added TempDecayStartMove for starting temp decay only after a given number of moves. This allows keeping initial game up for a few moves and still use decay. * Doesn't allow temperature to fall below endgame temp during temp decay. Still allows initial temp to be below endgame temp. * Doesn't allow temperature to fall below endgame temp during temp decay. Still allows initial temp to be below endgame temp. * Hide temp options * renamed TempDecayStartMove to TempDecayDelayMoves Co-authored-by: Alexis Olson --- src/mcts/params.cc | 19 ++++++++++++++++--- src/mcts/params.h | 4 ++++ src/mcts/search.cc | 17 ++++++++++++----- 3 files changed, 32 insertions(+), 8 deletions(-) diff --git a/src/mcts/params.cc b/src/mcts/params.cc index f9b6d64d0f..7b239d0ecd 100644 --- a/src/mcts/params.cc +++ b/src/mcts/params.cc @@ -91,9 +91,14 @@ const OptionId SearchParams::kTemperatureId{ "while making the move."}; const OptionId SearchParams::kTempDecayMovesId{ "tempdecay-moves", "TempDecayMoves", - "Reduce temperature for every move from the game start to this number of " - "moves, decreasing linearly from initial temperature to 0. A value of 0 " - "disables tempdecay."}; + "Reduce temperature for every move after the first move, decreasing " + "linearly over this number of moves from initial temperature to 0. " + "A value of 0 disables tempdecay."}; +const OptionId SearchParams::kTempDecayDelayMovesId{ + "tempdecay-delay-moves", "TempDecayDelayMoves", + "Delay the linear decrease of temperature by this number of moves, " + "decreasing linearly from initial temperature to 0. A value of 0 starts " + "tempdecay after the first move."}; const OptionId SearchParams::kTemperatureCutoffMoveId{ "temp-cutoff-move", "TempCutoffMove", "Move number, starting from which endgame temperature is used rather " @@ -268,6 +273,7 @@ void SearchParams::Populate(OptionsParser* options) { options->Add(kRootHasOwnCpuctParamsId) = true; options->Add(kTemperatureId, 0.0f, 100.0f) = 0.0f; options->Add(kTempDecayMovesId, 0, 100) = 0; + options->Add(kTempDecayDelayMovesId, 0, 100) = 0; options->Add(kTemperatureCutoffMoveId, 0, 1000) = 0; options->Add(kTemperatureEndgameId, 0.0f, 100.0f) = 0.0f; options->Add(kTemperatureWinpctCutoffId, 0.0f, 100.0f) = 100.0f; @@ -322,6 +328,13 @@ void SearchParams::Populate(OptionsParser* options) { options->HideOption(kLogLiveStatsId); options->HideOption(kDisplayCacheUsageId); options->HideOption(kRootHasOwnCpuctParamsId); + options->HideOption(kTemperatureId); + options->HideOption(kTempDecayMovesId); + options->HideOption(kTempDecayDelayMovesId); + options->HideOption(kTemperatureCutoffMoveId); + options->HideOption(kTemperatureEndgameId); + options->HideOption(kTemperatureWinpctCutoffId); + options->HideOption(kTemperatureVisitOffsetId); options->HideOption(kMovesLeftConstantFactorId); options->HideOption(kMovesLeftScaledFactorId); options->HideOption(kMovesLeftQuadraticFactorId); diff --git a/src/mcts/params.h b/src/mcts/params.h index 4ed15a71f8..0073a6da50 100644 --- a/src/mcts/params.h +++ b/src/mcts/params.h @@ -59,6 +59,9 @@ class SearchParams { return options_.Get(kTemperatureVisitOffsetId); } int GetTempDecayMoves() const { return options_.Get(kTempDecayMovesId); } + int GetTempDecayDelayMoves() const { + return options_.Get(kTempDecayDelayMovesId); + } int GetTemperatureCutoffMove() const { return options_.Get(kTemperatureCutoffMoveId); } @@ -123,6 +126,7 @@ class SearchParams { static const OptionId kRootHasOwnCpuctParamsId; static const OptionId kTemperatureId; static const OptionId kTempDecayMovesId; + static const OptionId kTempDecayDelayMovesId; static const OptionId kTemperatureCutoffMoveId; static const OptionId kTemperatureEndgameId; static const OptionId kTemperatureWinpctCutoffId; diff --git a/src/mcts/search.cc b/src/mcts/search.cc index 0af52a9291..c9a6f82f37 100644 --- a/src/mcts/search.cc +++ b/src/mcts/search.cc @@ -499,15 +499,22 @@ void Search::EnsureBestMoveKnown() REQUIRES(nodes_mutex_) float temperature = params_.GetTemperature(); const int cutoff_move = params_.GetTemperatureCutoffMove(); + const int decay_delay_moves = params_.GetTempDecayDelayMoves(); + const int decay_moves = params_.GetTempDecayMoves(); const int moves = played_history_.Last().GetGamePly() / 2; + if (cutoff_move && (moves + 1) >= cutoff_move) { temperature = params_.GetTemperatureEndgame(); - } else if (temperature && params_.GetTempDecayMoves()) { - if (moves >= params_.GetTempDecayMoves()) { + } else if (temperature && decay_moves) { + if (moves >= decay_delay_moves + decay_moves) { temperature = 0.0; - } else { - temperature *= static_cast(params_.GetTempDecayMoves() - moves) / - params_.GetTempDecayMoves(); + } else if (moves >= decay_delay_moves) { + temperature *= static_cast + (decay_delay_moves + decay_moves - moves) / decay_moves; + } + // don't allow temperature to decay below endgame temperature + if (temperature < params_.GetTemperatureEndgame()) { + temperature = params_.GetTemperatureEndgame(); } } From ebbbaf70e6f0aa4bf328107c67931b7f1a76a290 Mon Sep 17 00:00:00 2001 From: Tilps Date: Fri, 24 Apr 2020 10:57:15 +1000 Subject: [PATCH 122/151] Changelog for 0.25.0-rc2. (#1233) * Changelog for 0.25.0-rc2. * Add one more PR to the changelog. --- changelog.txt | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/changelog.txt b/changelog.txt index b3770fd805..733c78375e 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,4 +1,30 @@ -v0.25.0-rc1 (2020-04-09) +v0.25.0-rc2 (2020-04-23) +~~~~~~~ + +* Increased upper limit for maximum collision events. +* Allow negative values for some of the extended moves left head parameters. +* Fix a critical bug in training data generation for input type 3. +* Fix for switching between positions in uci mode that only differ by 50 move + rule in initial fen. +* Some refinements of certainty propagation. +* Better support for c++17 implementations that are missing charconv. +* Option to more accurately apply time management for uci hosts using + cuteseal or similar timing techniques. +* Fix for selfplay mode to allow exactly book length total games. +* Fix for selfplay opening books with castling moves starting from chess960 fens. +* Add build option to override nvcc compiler. +* Improved validity checking for some uci input parameters. +* Updated the Q to CP conversion formula to better fit recent T60 net outputs to + expectations. +* Add a new experimental time manager. +* Bug fix for the Q+U in verbose move stats. It is now called S: and contains + the total score, including any moves left based effect if applicable. +* New temperature decay option to allow to delay the start of decay. +* All temperature options have been hidden by default. +* New optional cuda backend convolution implementation. Off by default for + cudnn-fp16 until an issue with cublas performance on some gpus is resolved. + +v0.25.0-rc1 (2020-04-09) ~~~~~~~ * Now requires a c++17 supporting compilation environment to build. From 9b3c7992aef13a477b084fdaf3304fea5ed9e4b4 Mon Sep 17 00:00:00 2001 From: Ankan Banerjee Date: Fri, 24 Apr 2020 06:27:30 +0530 Subject: [PATCH 123/151] Cuda winograd (#1228) * custom winograd convolution for cuda backends * custom winograd fixes - fix a bug to make it work for non-SE networks - enable by default only with fp32. * address review comments * remove random line in comment * remove unused constants - W,H are hardcoded to 8 - because there are assumptions in the code based on that. No point in defining constants. --- src/neural/cuda/common_kernels.cu | 25 ++ src/neural/cuda/fp16_kernels.cu | 48 ++-- src/neural/cuda/kernels.h | 14 + src/neural/cuda/layers.cc | 186 ++++++++++++++ src/neural/cuda/layers.h | 44 ++++ src/neural/cuda/network_cudnn.cc | 154 +++++++---- src/neural/cuda/winograd_helper.inc | 385 ++++++++++++++++++++++++++++ 7 files changed, 793 insertions(+), 63 deletions(-) create mode 100644 src/neural/cuda/winograd_helper.inc diff --git a/src/neural/cuda/common_kernels.cu b/src/neural/cuda/common_kernels.cu index 309b4582e5..ff55b8931d 100644 --- a/src/neural/cuda/common_kernels.cu +++ b/src/neural/cuda/common_kernels.cu @@ -28,6 +28,7 @@ #include #include "cuda_common.h" +#include "winograd_helper.inc" namespace lczero { namespace cudnn_backend { @@ -545,6 +546,8 @@ void PolicyMap(int N, T* output, const T* input, const short* indices, // Template instantiation. template void copyTypeConverted(half* op, float* ip, int N); template void copyTypeConverted(float* op, half* ip, int N); +template void copyTypeConverted(float* op, float* ip, int N); +template void copyTypeConverted(half* op, half* ip, int N); template void batchNorm(float* output, const float* input, const float* skipInput, int N, int C, int H, @@ -588,5 +591,27 @@ template void PolicyMap(int N, half* output, const half* input, const short* indices, int inputSize, int usedSize, int outputSize); +template void FilterTransform(int N, int C, float* transformedFilter, + const float* filter); + +template void InputTransform(int N, int C, float* transformed_input, + const float* input); + +template void OutputTransform( + int N, int C, int se_K, float* output, const float* input, + const float* skip, const float* bias, const float* w1, const float* b1, + const float* w2, const float* b2); + +template void OutputTransform( + int N, int C, int se_K, float* output, const float* input, + const float* skip, const float* bias, const float* w1, const float* b1, + const float* w2, const float* b2); + +template void OutputTransform( + int N, int C, int se_K, float* output, const float* input, + const float* skip, const float* bias, const float* w1, const float* b1, + const float* w2, const float* b2); + + } // namespace cudnn_backend } // namespace lczero diff --git a/src/neural/cuda/fp16_kernels.cu b/src/neural/cuda/fp16_kernels.cu index e832a51417..9e8adbeeae 100644 --- a/src/neural/cuda/fp16_kernels.cu +++ b/src/neural/cuda/fp16_kernels.cu @@ -26,7 +26,7 @@ */ #include "cuda_common.h" - +#include "winograd_helper.inc" namespace lczero { namespace cudnn_backend { @@ -35,8 +35,6 @@ namespace cudnn_backend { // fp16-specific kernels used by certain layers // ///////////////////////////////////////////////////////////////////////////// - - // SE layer implementation using single fused kernel. // N blocks. @@ -44,17 +42,15 @@ namespace cudnn_backend { // 'HWC' input data processed by thread block. // Each thread processes 8x8 elements. // K is the no. of outputs of first fully connected layer (same as no. of inputs -// for second fully connected layer). +// for second fully connected layer). // The kernel assumes K <= C. -#define readw1(row, col) (w1[(row)*K + (col)]) -#define readw2(row, col) (w2[(row)*2 * C + (col)]) - template __global__ void SE_Layer_NHWC(half* output, const half* skip, const half* input, const half* w1, const half* b1, const half* w2, - const half* b2, const half *bPrev) { + const half* b2, const half* bPrev) { const int elementsPerThread = 64; // 8x8 board + const int se_K = K; int n = blockIdx.x; int c = threadIdx.x; @@ -68,8 +64,8 @@ __global__ void SE_Layer_NHWC(half* output, const half* skip, const half* input, half bias = 0; if (bPrev) bias = bPrev[c]; - // 1. Global avg (1 avg per thread). - #pragma unroll +// 1. Global avg (1 avg per thread). +#pragma unroll for (int i = 0; i < elementsPerThread; i++) { int localIndex = i * C + c; int inputIndex = n * C * elementsPerThread + localIndex; @@ -87,7 +83,7 @@ __global__ void SE_Layer_NHWC(half* output, const half* skip, const half* input, if (c < K) { S = 0; - #pragma unroll +#pragma unroll for (int i = 0; i < C; i++) { S += sharedData[i] * readw1(i, c); } @@ -104,7 +100,7 @@ __global__ void SE_Layer_NHWC(half* output, const half* skip, const half* input, // 3. Second fully connected layer. S = 0; half B = 0; - #pragma unroll +#pragma unroll for (int i = 0; i < K; i++) { half val = sharedData[i]; S += val * readw2(i, c); @@ -116,8 +112,8 @@ __global__ void SE_Layer_NHWC(half* output, const half* skip, const half* input, // Sigmoid (only on the scale part). S = (half)(1.0f / (1.0f + exp(-(float)(S)))); - // 4. Scale, and add skip connection, perform relu, and write to output. - #pragma unroll +// 4. Scale, and add skip connection, perform relu, and write to output. +#pragma unroll for (int i = 0; i < elementsPerThread; i++) { int localIndex = i * C + c; int inputIndex = n * C * elementsPerThread + localIndex; @@ -180,7 +176,7 @@ bool Se_Fp16_NHWC(int N, int C, int numFc1Out, half* output, const half* skip, <<>>(output, skip, input, w1, b1, w2, b2, bPrev); } else if (C == 320) { SE_Layer_NHWC<320, 64> - <<>>(output, skip, input, w1, b1, w2, b2, bPrev); + <<>>(output, skip, input, w1, b1, w2, b2, bPrev); } else { // TODO: support other channel counts. return false; @@ -193,5 +189,27 @@ bool Se_Fp16_NHWC(int N, int C, int numFc1Out, half* output, const half* skip, return true; } +template void FilterTransform(int N, int C, half* transformedFilter, + const half* filter); + + +template void InputTransform(int N, int C, half* transformed_input, + const half* input); + +template void OutputTransform( + int N, int C, int se_K, half* output, const half* input, const half* skip, + const half* bias, const half* w1, const half* b1, const half* w2, + const half* b2); + +template void OutputTransform( + int N, int C, int se_K, half* output, const half* input, const half* skip, + const half* bias, const half* w1, const half* b1, const half* w2, + const half* b2); + +template void OutputTransform( + int N, int C, int se_K, half* output, const half* input, const half* skip, + const half* bias, const half* w1, const half* b1, const half* w2, + const half* b2); + } // namespace cudnn_backend } // namespace lczero diff --git a/src/neural/cuda/kernels.h b/src/neural/cuda/kernels.h index 6fce88e31f..eba4ad6c6e 100644 --- a/src/neural/cuda/kernels.h +++ b/src/neural/cuda/kernels.h @@ -82,5 +82,19 @@ template void PolicyMap(int N, T* output, const T* input, const short* indices, int inputSize, int usedSize, int outputSize); + +// Custom winograd helper functions +template +void FilterTransform(int N, int C, T* transformedFilter, const T* filter); + +template +void InputTransform(int N, int C, T* transformedInput, const T* input); + +template +void OutputTransform(int N, int C, int se_K, T* output, const T* input, + const T* skip, const T* bias, const T* w1, const T* b1, + const T* w2, const T* b2); + + } // namespace cudnn_backend } // namespace lczero diff --git a/src/neural/cuda/layers.cc b/src/neural/cuda/layers.cc index 6c2f7094a1..44c7bfbc27 100644 --- a/src/neural/cuda/layers.cc +++ b/src/neural/cuda/layers.cc @@ -31,6 +31,8 @@ #include "cuda_common.h" #include "kernels.h" namespace lczero { +//void dumpTensor(void* memory, int elements, const char* message, bool fp16 = false); + namespace cudnn_backend { // Use Single kernel for entire SE operation. @@ -704,6 +706,186 @@ PolicyMapLayer::~PolicyMapLayer() { ReportCUDAErrors(cudaFree(weights_)); } +template +FusedWinogradConvSELayer::FusedWinogradConvSELayer( + BaseLayer* ip, int C, int H, int W, int Cin, bool relu, bool bias, + bool skip_add, bool se, int se_k) + : BaseLayer(C, H, W, ip), + c_input_(Cin), + use_relu_(relu), + use_bias_(bias), + skip_add_(skip_add), + has_se_(se), + se_k_(se_k) { + // Allocate memory for weights (filter tensor) and biases. + const size_t weight_size = sizeof(DataType) * c_input_ * C * 3 * 3; + ReportCUDAErrors(cudaMalloc(&weights_, weight_size)); + + if (use_bias_) { + const size_t blas_size = sizeof(DataType) * C; + ReportCUDAErrors(cudaMalloc(&biases_, blas_size)); + } + + // 6x6 transformed filter size, for 3x3 convolution + ReportCUDAErrors(cudaMalloc(&transformed_weights_, weight_size * 4)); + + if (has_se_) { + const size_t num_weights1 = C * se_k_; + const size_t num_weights2 = num_weights1 * 2; + const size_t num_biases1 = se_k_; + const size_t num_biases2 = 2 * C; + + const size_t weight_size1 = sizeof(DataType) * num_weights1; + const size_t weight_size2 = sizeof(DataType) * num_weights2; + const size_t biases_size1 = sizeof(DataType) * num_biases1; + const size_t biases_size2 = sizeof(DataType) * num_biases2; + + ReportCUDAErrors(cudaMalloc(&w1_, weight_size1)); + ReportCUDAErrors(cudaMalloc(&w2_, weight_size2)); + ReportCUDAErrors(cudaMalloc(&b1_, biases_size1)); + ReportCUDAErrors(cudaMalloc(&b2_, biases_size2)); + } +} + +template +void FusedWinogradConvSELayer::LoadWeights(float* pfilter, + float* pBias, + void* scratch) { + const size_t weight_size = sizeof(float) * c_input_ * C * 3 * 3; + const size_t blas_size = sizeof(float) * C; + + // first copy from CPU memory to scratch space in GPU memory + // and then do the type conversion using a kernel + assert(scratch); + ReportCUDAErrors( + cudaMemcpy(scratch, pfilter, weight_size, cudaMemcpyHostToDevice)); + copyTypeConverted((DataType*)weights_, (float*)scratch, C * c_input_ * 3 * 3); + + if (pBias) { + ReportCUDAErrors( + cudaMemcpy(scratch, pBias, blas_size, cudaMemcpyHostToDevice)); + copyTypeConverted((DataType*)biases_, (float*)scratch, C); + } + + // run winograd transform kernel for the filter + FilterTransform(C, c_input_, transformed_weights_, weights_); +} + +// TODO: Do this on the GPU to improve network load time! +static inline void CpuTranspose(float* op, float* ip, size_t rows, size_t cols) { + for (size_t i = 0; i < rows; i++) + for (size_t j = 0; j < cols; j++) op[j * rows + i] = ip[i * cols + j]; +} + +template +void FusedWinogradConvSELayer::LoadSEWeights(float* w1, float* b1, + float* w2, float* b2, + void* scratch) { + const size_t num_weights1 = C * se_k_; + const size_t num_weights2 = num_weights1 * 2; + const size_t num_biases1 = se_k_; + const size_t num_biases2 = 2 * C; + + // The shader uses transposed weight matrices. + std::vector temp_transposed(num_weights2); + + CpuTranspose(temp_transposed.data(), w1, se_k_, C); + ReportCUDAErrors(cudaMemcpy(scratch, temp_transposed.data(), num_weights1*sizeof(float), + cudaMemcpyHostToDevice)); + copyTypeConverted((DataType*)w1_, (float*)scratch, num_weights1); + + CpuTranspose(temp_transposed.data(), w2, 2 * C, se_k_); + ReportCUDAErrors(cudaMemcpy(scratch, temp_transposed.data(), + num_weights2 * sizeof(float), + cudaMemcpyHostToDevice)); + copyTypeConverted((DataType*)w2_, (float*)scratch, num_weights2); + + + + ReportCUDAErrors(cudaMemcpy(scratch, b1, num_biases1 * sizeof(float), + cudaMemcpyHostToDevice)); + copyTypeConverted((DataType*)b1_, (float*)scratch, num_biases1); + + ReportCUDAErrors(cudaMemcpy(scratch, b2, num_biases2 * sizeof(float), + cudaMemcpyHostToDevice)); + copyTypeConverted((DataType*)b2_, (float*)scratch, num_biases2); +} + + +void cublasRowMjaorMatrixMul(const half* A, const half* B, half* Out, int M, + int N, int K, int batchSize, cublasHandle_t cublas, + int algo = -1) { + half halfOne = (half)1.0f; + half halfZero = (half)0.0f; + + // dimensions of matrix A = M x K + // dimensions of matrix B = K x N + // dimensions of output = M x N + + // cublas supports only col major output + // to multiply row major matrices, use the trick below + ReportCUBLASErrors(cublasGemmStridedBatchedEx( + cublas, CUBLAS_OP_N, CUBLAS_OP_N, N, M, K, &halfOne, B, CUDA_R_16F, N, + N * K, A, CUDA_R_16F, K, K * M, &halfZero, Out, CUDA_R_16F, N, N * M, + batchSize, CUDA_R_16F, cublasGemmAlgo_t(algo))); +} + +void cublasRowMjaorMatrixMul(const float* A, const float* B, float* Out, int M, + int N, int K, int batchSize, cublasHandle_t cublas, + int algo = -1) { + float floatOne = 1.0f; + float floatZero = 0.0f; + ReportCUBLASErrors(cublasGemmStridedBatchedEx( + cublas, CUBLAS_OP_N, CUBLAS_OP_N, N, M, K, &floatOne, B, CUDA_R_32F, N, + N * K, A, CUDA_R_32F, K, K * M, &floatZero, Out, CUDA_R_32F, N, N * M, + batchSize, CUDA_R_32F, cublasGemmAlgo_t(algo))); +} + +template +void FusedWinogradConvSELayer::Eval( + int N, DataType* output, const DataType* input, const DataType* input2, + void* scratch, size_t scratch_size, cudnnHandle_t /*cudnn*/, + cublasHandle_t cublas) { + + // Split the scratch space into two parts - use first part for holding + // transformed input and second part for transformed output. + DataType* transformed_input = (DataType*)scratch; + DataType* transformed_output = + transformed_input + scratch_size / (2 * sizeof(DataType)); + + InputTransform(N, C, transformed_input, input); + + cublasRowMjaorMatrixMul(transformed_input, transformed_weights_, transformed_output, N*4, C, c_input_, 36, cublas); + + if (has_se_ && use_relu_ && use_bias_ && skip_add_) + OutputTransform( + N, C, se_k_, output, transformed_output, input2, biases_, w1_, b1_, w2_, + b2_); + else if (!has_se_ && use_relu_ && use_bias_ && !skip_add_) + OutputTransform( + N, C, 0, output, transformed_output, nullptr, biases_, nullptr, nullptr, + nullptr, nullptr); + else if (!has_se_ && use_relu_ && use_bias_ && skip_add_) + OutputTransform( + N, C, 0, output, transformed_output, input2, biases_, nullptr, nullptr, + nullptr, nullptr); + else + throw Exception("unsupported network type!"); +} + +template +FusedWinogradConvSELayer::~FusedWinogradConvSELayer() { + ReportCUDAErrors(cudaFree(weights_)); + ReportCUDAErrors(cudaFree(transformed_weights_)); + if (use_bias_) ReportCUDAErrors(cudaFree(biases_)); + if (has_se_) { + ReportCUDAErrors(cudaFree(w1_)); + ReportCUDAErrors(cudaFree(w2_)); + ReportCUDAErrors(cudaFree(b1_)); + ReportCUDAErrors(cudaFree(b2_)); + } +} + // Template instantiation. template class ConvLayer; template class ConvLayer; @@ -720,6 +902,10 @@ template class SELayer; template class PolicyMapLayer; template class PolicyMapLayer; +template class FusedWinogradConvSELayer; +template class FusedWinogradConvSELayer; + + // Misc error handling stuff. void CudnnError(cudnnStatus_t status, const char* file, const int& line) { if (status != CUDNN_STATUS_SUCCESS) { diff --git a/src/neural/cuda/layers.h b/src/neural/cuda/layers.h index 7886557d74..239fd3e621 100644 --- a/src/neural/cuda/layers.h +++ b/src/neural/cuda/layers.h @@ -200,5 +200,49 @@ class SELayer : public BaseLayer { bool addPrevLayerBias_; }; + +// Multi-pass Winograd Conv fused with (optional) SE +template +class FusedWinogradConvSELayer : public BaseLayer { + using BaseLayer::C; + using BaseLayer::H; + using BaseLayer::W; + using BaseLayer::GetC; + using BaseLayer::GetH; + using BaseLayer::GetW; + using BaseLayer::nhwc_; + + public: + FusedWinogradConvSELayer(BaseLayer* ip, int C, int H, int W, + int Cin, bool relu, bool bias, bool skipAdd, + bool se, int se_k); + + ~FusedWinogradConvSELayer(); + void LoadWeights(float* pfilter, float* pBias, void* scratch); + void LoadSEWeights(float* w1, float* b1, float* w2, float* b2, void *scratch); + void Eval(int N, DataType* output, const DataType* input, + const DataType* input2, + void* scratch, size_t scratch_size, + cudnnHandle_t cudnn, cublasHandle_t cublas) override; + + private: + const int c_input_; + const bool use_relu_; + const bool use_bias_; + const bool skip_add_; + const bool has_se_; + const int se_k_; + + DataType* biases_ = nullptr; + DataType* weights_ = nullptr; + DataType* transformed_weights_ = nullptr; // After winograd transform. + + // Weights and Biases for (optional) SE. + DataType* w1_; + DataType* w2_; + DataType* b1_; + DataType* b2_; +}; + } // namespace cudnn_backend } // namespace lczero diff --git a/src/neural/cuda/network_cudnn.cc b/src/neural/cuda/network_cudnn.cc index de7d101b0b..875a8c67b5 100644 --- a/src/neural/cuda/network_cudnn.cc +++ b/src/neural/cuda/network_cudnn.cc @@ -49,7 +49,7 @@ static constexpr int kNumOutputPolicy = 1858; #if 0 // debug code to dump allocation in GPU memory -void dumpTensor(void *memory, int elements, char *message, bool fp16 = false) +void dumpTensor(void *memory, int elements, const char *message, bool fp16 = false) { printf("\n%s\n", message); int elementSize = (int) (fp16 ? sizeof(half) : sizeof(float)); @@ -266,11 +266,26 @@ class CudnnNetwork : public Network { // Override if forced from backend option if (!options.IsDefault("nhwc")) nhwc_ = options.Get("nhwc"); - - if (nhwc_) - ReportCUBLASErrors(cublasSetMathMode(cublas_, CUBLAS_TENSOR_OP_MATH)); } + // Always try to set tensor math (won't have any effect on GPUs that don't + // support it). + ReportCUBLASErrors(cublasSetMathMode(cublas_, CUBLAS_TENSOR_OP_MATH)); + + // Check if we want to enable our custom winograd ? + constexpr bool fp16 = std::is_same::value; + + // Use our custom winograd impl for fp32. + // TODO: Enable this for fp16 too once it's tested to be faster. + use_custom_winograd_ = !fp16; + + // Override if set in backend-opts. + if (!options.IsDefault("custom_winograd")) + use_custom_winograd_ = options.Get("custom_winograd"); + + // Winograd needs nchw tensor layout. + if (use_custom_winograd_) nhwc_ = false; + const int kNumInputPlanes = kInputPlanes; const int kNumFilters = weights.input.biases.size(); @@ -295,9 +310,7 @@ class CudnnNetwork : public Network { const int maxChannels = std::max(kInputPlanes, kNumFilters); - const cudnnDataType_t datatype = std::is_same::value - ? CUDNN_DATA_HALF - : CUDNN_DATA_FLOAT; + const cudnnDataType_t datatype = fp16 ? CUDNN_DATA_HALF : CUDNN_DATA_FLOAT; const cudnnTensorFormat_t layout = nhwc_ ? CUDNN_TENSOR_NHWC : CUDNN_TENSOR_NCHW; @@ -328,6 +341,14 @@ class CudnnNetwork : public Network { const int maxWeightSize = 128 * 1024 * 1024; if (scratch_size_ < maxWeightSize) scratch_size_ = maxWeightSize; + if (use_custom_winograd_) { + // Need additional space for transformed input/outputs which are 36/16 + // times size (4x4 block transformed into 6x6). + const size_t transformed_tensor_size = + (size_t)(max_batch_size_ * kNumFilters * 64 * (36.0 / 16.0)); + scratch_size_ = std::max(scratch_size_, 2 * transformed_tensor_size); + } + ReportCUDAErrors(cudaMalloc(&scratch_mem_, scratch_size_)); #ifdef DEBUG_RAW_NPS CERR << "allocated " << scratch_size_ << " bytes for scratch memory"; @@ -346,35 +367,61 @@ class CudnnNetwork : public Network { // Residual block. for (size_t block = 0; block < weights.residual.size(); block++) { - auto conv1 = std::make_unique>( - getLastLayer(), kNumFilters, 8, 8, 3, kNumFilters, true, true); - conv1->LoadWeights(&weights.residual[block].conv1.weights[0], - &weights.residual[block].conv1.biases[0], - scratch_mem_); - network_.emplace_back(std::move(conv1)); - - // Relu and bias of second convolution is handled by SELayer. - bool useReluAndBias = weights.residual[block].has_se ? false : true; - - auto conv2 = std::make_unique>( - getLastLayer(), kNumFilters, 8, 8, 3, kNumFilters, useReluAndBias, - useReluAndBias); - conv2->LoadWeights( - &weights.residual[block].conv2.weights[0], - useReluAndBias ? &weights.residual[block].conv2.biases[0] : nullptr, - scratch_mem_); - network_.emplace_back(std::move(conv2)); - - if (weights.residual[block].has_se) { - int numFCOut = weights.residual[block].se.b1.size(); - auto se = std::make_unique>(getLastLayer(), numFCOut, - false); - se->LoadWeights(&weights.residual[block].se.w1[0], - &weights.residual[block].se.b1[0], - &weights.residual[block].se.w2[0], - &weights.residual[block].se.b2[0], - &weights.residual[block].conv2.biases[0], scratch_mem_); - network_.emplace_back(std::move(se)); + if (use_custom_winograd_) { + auto conv1 = std::make_unique>( + getLastLayer(), kNumFilters, 8, 8, kNumFilters, true, true, false, + false, 0); + conv1->LoadWeights(&weights.residual[block].conv1.weights[0], + &weights.residual[block].conv1.biases[0], + scratch_mem_); + network_.emplace_back(std::move(conv1)); + + bool has_se = weights.residual[block].has_se; + int se_k = weights.residual[block].se.b1.size(); + auto conv2 = std::make_unique>( + getLastLayer(), kNumFilters, 8, 8, kNumFilters, true, true, true, + has_se, se_k); + conv2->LoadWeights(&weights.residual[block].conv2.weights[0], + &weights.residual[block].conv2.biases[0], + scratch_mem_); + if (has_se) + conv2->LoadSEWeights(&weights.residual[block].se.w1[0], + &weights.residual[block].se.b1[0], + &weights.residual[block].se.w2[0], + &weights.residual[block].se.b2[0], scratch_mem_); + network_.emplace_back(std::move(conv2)); + } else { + auto conv1 = std::make_unique>( + getLastLayer(), kNumFilters, 8, 8, 3, kNumFilters, true, true); + conv1->LoadWeights(&weights.residual[block].conv1.weights[0], + &weights.residual[block].conv1.biases[0], + scratch_mem_); + network_.emplace_back(std::move(conv1)); + + // Relu and bias of second convolution is handled by SELayer. + bool useReluAndBias = weights.residual[block].has_se ? false : true; + + auto conv2 = std::make_unique>( + getLastLayer(), kNumFilters, 8, 8, 3, kNumFilters, useReluAndBias, + useReluAndBias); + conv2->LoadWeights( + &weights.residual[block].conv2.weights[0], + useReluAndBias ? &weights.residual[block].conv2.biases[0] : nullptr, + scratch_mem_); + network_.emplace_back(std::move(conv2)); + + if (weights.residual[block].has_se) { + int numFCOut = weights.residual[block].se.b1.size(); + auto se = std::make_unique>(getLastLayer(), + numFCOut, false); + se->LoadWeights(&weights.residual[block].se.w1[0], + &weights.residual[block].se.b1[0], + &weights.residual[block].se.w2[0], + &weights.residual[block].se.b2[0], + &weights.residual[block].conv2.biases[0], + scratch_mem_); + network_.emplace_back(std::move(se)); + } } } @@ -547,22 +594,30 @@ class CudnnNetwork : public Network { scratch_mem_, scratch_size_, cudnn_, cublas_); // conv1 - // For SE Resnet, skip connection is added after SE (and bias is added as - // part of SE). - if (has_se_) { - network_[l++]->Eval(batchSize, tensor_mem_[1], tensor_mem_[0], nullptr, - scratch_mem_, scratch_size_, cudnn_, - cublas_); // conv2 - } else { + if (use_custom_winograd_) { network_[l++]->Eval(batchSize, tensor_mem_[2], tensor_mem_[0], tensor_mem_[2], scratch_mem_, scratch_size_, cudnn_, cublas_); // conv2 - } + } else { + // For SE Resnet, skip connection is added after SE (and bias is added + // as part of SE). + if (has_se_) { + network_[l++]->Eval(batchSize, tensor_mem_[1], tensor_mem_[0], + nullptr, scratch_mem_, scratch_size_, cudnn_, + cublas_); // conv2 + } else { + network_[l++]->Eval(batchSize, tensor_mem_[2], tensor_mem_[0], + tensor_mem_[2], scratch_mem_, scratch_size_, + cudnn_, + cublas_); // conv2 + } - if (has_se_) { - network_[l++]->Eval(batchSize, tensor_mem_[2], tensor_mem_[1], - tensor_mem_[2], scratch_mem_, scratch_size_, cudnn_, - cublas_); // SE layer + if (has_se_) { + network_[l++]->Eval(batchSize, tensor_mem_[2], tensor_mem_[1], + tensor_mem_[2], scratch_mem_, scratch_size_, + cudnn_, + cublas_); // SE layer + } } } @@ -759,6 +814,9 @@ class CudnnNetwork : public Network { bool nhwc_; // do we want to use nhwc layout (fastest with fp16 with tensor // cores) + bool use_custom_winograd_; // Custom winograd convolution implementation for + // convolutions of the residual tower. + // Currently only one NN Eval can happen a time (we can fix this if needed // by allocating more memory). mutable std::mutex lock_; diff --git a/src/neural/cuda/winograd_helper.inc b/src/neural/cuda/winograd_helper.inc new file mode 100644 index 0000000000..ebb1d95d4f --- /dev/null +++ b/src/neural/cuda/winograd_helper.inc @@ -0,0 +1,385 @@ +/* + This file is part of Leela Chess Zero. + Copyright (C) 2020 The LCZero Authors + + Leela Chess is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Leela Chess is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Leela Chess. If not, see . + + Additional permission under GNU GPL version 3 section 7 + + If you modify this Program, or any covered work, by linking or + combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA + Toolkit and the NVIDIA CUDA Deep Neural Network library (or a + modified version of those libraries), containing parts covered by the + terms of the respective license agreement, the licensors of this + Program grant you additional permission to convey the resulting work. +*/ + +namespace lczero { +namespace cudnn_backend { + +template +__device__ __forceinline__ void matrixMul_gpu_serial(T* c, const T* a, + const T* b) { +#pragma unroll + for (int i = 0; i < M; ++i) +#pragma unroll + for (int j = 0; j < N; ++j) { + T S = 0; +#pragma unroll + for (int k = 0; k < K; ++k) S += a[i * K + k] * b[k * N + j]; + c[i * N + j] = S; + } +} + +template +__device__ __forceinline__ void FilterTransform4x4(T* transformed_filter, + const T* filter) { + // transform applied to filter (of size 3x3) + T G[6 * 3] = {1.0f / 4, 0, 0, -1.0f / 6, -1.0f / 6, + -1.0f / 6, -1.0f / 6, 1.0f / 6, -1.0f / 6, 1.0f / 24, + 1.0f / 12, 1.0f / 6, 1.0f / 24, -1.0f / 12, 1.0f / 6, + 0, 0, 1}; + + T Gt[3 * 6] = {1.0f / 4, -1.0f / 6, -1.0f / 6, 1.0f / 24, 1.0f / 24, 0, + 0, -1.0f / 6, 1.0f / 6, 1.0f / 12, -1.0f / 12, 0, + 0, -1.0f / 6, -1.0f / 6, 1.0f / 6, 1.0f / 6, 1}; + + T temp_filter[6 * 3]; + matrixMul_gpu_serial(temp_filter, G, filter); + matrixMul_gpu_serial(transformed_filter, temp_filter, Gt); +} + +template +__device__ __forceinline__ void InputTransform4x4(T* transformedInput, + const T* input) { + // transform applied to input tile (of size 4x4) + const T Bt[6 * 6] = {4, 0, -5, 0, 1, 0, 0, -4, -4, 1, 1, 0, + 0, 4, -4, -1, 1, 0, 0, -2, -1, 2, 1, 0, + 0, 2, -1, -2, 1, 0, 0, 4, 0, -5, 0, 1}; + + const T B[6 * 6] = {4, 0, 0, 0, 0, 0, 0, -4, 4, -2, 2, 4, + -5, -4, -4, -1, -1, 0, 0, 1, -1, 2, -2, -5, + 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1}; + + T tempIp1[6 * 6]; + matrixMul_gpu_serial(tempIp1, Bt, input); + matrixMul_gpu_serial(transformedInput, tempIp1, B); +} + +template +__device__ __forceinline__ void OutputTransform4x4( + T* output, const T* transformedOutput) { + // transform applied to result + const T At[4 * 6] = {1, 1, 1, 1, 1, 0, 0, 1, -1, 2, -2, 0, + 0, 1, 1, 4, 4, 0, 0, 1, -1, 8, -8, 1}; + + const T A[6 * 4] = {1, 0, 0, 0, 1, 1, 1, 1, 1, -1, 1, -1, + 1, 2, 4, 8, 1, -2, 4, -8, 0, 0, 0, 1}; + + T tempOp[4 * 6]; + matrixMul_gpu_serial(tempOp, At, transformedOutput); + matrixMul_gpu_serial(output, tempOp, A); +} + +#define FILTER_IDX_NCHW(k, c, h, w) ((k)*C * S * R + (c)*S * R + (h)*R + w) +template +__global__ void filterTransform_kernel(int K, int C, int elements, + T* transformed_filter, const T* filter) { + int tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid >= elements) return; + + constexpr int S = 3; + constexpr int R = 3; + + int c = tid % C; + int k = tid / C; + + T filter_tile[3][3]; + T transformed_tile[6][6]; + + // read input from memory + for (int s = 0; s < S; s++) + for (int r = 0; r < R; r++) { + filter_tile[s][r] = filter[FILTER_IDX_NCHW(k, c, s, r)]; + } + + // transform it + FilterTransform4x4(&(transformed_tile[0][0]), &(filter_tile[0][0])); + + // write to output (output is in HWCK layout) + for (int i = 0; i < 6; i++) + for (int j = 0; j < 6; j++) { + transformed_filter[i * 6 * C * K + j * C * K + c * K + k] = + transformed_tile[i][j]; + } +} + + +// index in intermediate/temp tensor +// W, H == 6 here! (6x6 transformed blocks) +// N also includes part of dimension (2x2) +#define GemmN (N * 4) +#define INDEX_NCHW(n, c, h, w) ((n)*C * 8 * 8 + (c)*8 * 8 + (h)*8 + w) +#define TEMP_INDEX_HWNC(h, w, n, c) \ + ((h)*6 * GemmN * C + (w)*GemmN * C + (n)*C + c) + +// 'C' threads per block +// 'N' blocks +// every thread transforms an entire board/plane (8x8 elements) +// - producing 4 x 6x6 elements +template +__global__ void InputTransform_kernel(int N, int C, const T* input, T* output) { + int c = threadIdx.x; + int n = blockIdx.x; + + T board[8][8]; + + const bool fp16 = std::is_same::value; + +// read the board (a row at a time for fp16) +#pragma unroll + for (int y = 0; y < 8; y++) { + *((uint4*)(&board[y][0])) = *((uint4*)(&input[INDEX_NCHW(n, c, y, 0)])); + if (!fp16) + *((uint4*)(&board[y][4])) = *((uint4*)(&input[INDEX_NCHW(n, c, y, 4)])); + } + + // top-left + { + T inEl[6][6] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + +#pragma unroll + for (int i = 0; i < 5; i++) +#pragma unroll + for (int j = 0; j < 5; j++) inEl[i + 1][j + 1] = board[i][j]; + + InputTransform4x4(&inEl[0][0], &inEl[0][0]); + +#pragma unroll + for (int y = 0; y < 6; y++) +#pragma unroll + for (int x = 0; x < 6; x++) + output[TEMP_INDEX_HWNC(y, x, n * 4 + 0, c)] = inEl[y][x]; + } + + // top-right + { + T inEl[6][6] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + +#pragma unroll + for (int i = 0; i < 5; i++) +#pragma unroll + for (int j = 0; j < 5; j++) inEl[i + 1][j] = board[i][j + 3]; + + InputTransform4x4(&inEl[0][0], &inEl[0][0]); + +#pragma unroll + for (int y = 0; y < 6; y++) +#pragma unroll + for (int x = 0; x < 6; x++) + output[TEMP_INDEX_HWNC(y, x, n * 4 + 1, c)] = inEl[y][x]; + } + + // bottom-left + { + T inEl[6][6] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + +#pragma unroll + for (int i = 0; i < 5; i++) +#pragma unroll + for (int j = 0; j < 5; j++) inEl[i][j + 1] = board[i + 3][j]; + + InputTransform4x4(&inEl[0][0], &inEl[0][0]); + +#pragma unroll + for (int y = 0; y < 6; y++) +#pragma unroll + for (int x = 0; x < 6; x++) + output[TEMP_INDEX_HWNC(y, x, n * 4 + 2, c)] = inEl[y][x]; + } + + // bottom-right + { + T inEl[6][6] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + +#pragma unroll + for (int i = 0; i < 5; i++) +#pragma unroll + for (int j = 0; j < 5; j++) inEl[i][j] = board[i + 3][j + 3]; + + InputTransform4x4(&inEl[0][0], &inEl[0][0]); + +#pragma unroll + for (int y = 0; y < 6; y++) +#pragma unroll + for (int x = 0; x < 6; x++) + output[TEMP_INDEX_HWNC(y, x, n * 4 + 3, c)] = inEl[y][x]; + } +} + +#define readw1(row, col) (w1[(row)*se_K + (col)]) +#define readw2(row, col) (w2[(row)*2 * C + (col)]) + +// input is in transformed space (HWNC layout) +// output is NCHW +// 'C' threads per block +// 'N' blocks +// every thread generates an entire board/plane (8x8 elements) +template +__global__ void OutputTransform_kernel(int N, int C, int se_K, T* output, + const T* input, const T* skip, + const T* bias, const T* w1, const T* b1, + const T* w2, const T* b2) { + const bool fp16 = std::is_same::value; + + int k = threadIdx.x; + int n = blockIdx.x; + + T board[8][8]; + T b = bias[k]; + +#pragma unroll + for (int hStart = 0; hStart < 8; hStart += 4) +#pragma unroll + for (int wStart = 0; wStart < 8; wStart += 4) { + // i) read to per thread registers (for doing output transform) + int shln = n * 4 + (hStart / 4) * 2 + (wStart / 4); + T outElTransformed[6][6]; +#pragma unroll + for (int y = 0; y < 6; y++) +#pragma unroll + for (int x = 0; x < 6; x++) + outElTransformed[y][x] = input[TEMP_INDEX_HWNC(y, x, shln, k)]; + + // ii) transform it + T outEl[4][4]; + OutputTransform4x4(&outEl[0][0], &outElTransformed[0][0]); + +#pragma unroll + for (int y = 0; y < 4; y++) +#pragma unroll + for (int x = 0; x < 4; x++) board[hStart + y][wStart + x] = outEl[y][x]; + } + + // Add bias, and compute the average for SE. + float S = 0; + float B = 0; + +#pragma unroll + for (int y = 0; y < 8; y++) +#pragma unroll + for (int x = 0; x < 8; x++) { + if (use_bias) board[y][x] += b; + if (use_se) S += (float)board[y][x]; + } + + if (use_se) { + __shared__ float shared_data[1024]; + float avg = S / 64; + shared_data[k] = avg; + __syncthreads(); + + // First fully-connected layer for SE + if (k < se_K) { + S = 0; + for (int i = 0; i < C; i++) { + S += shared_data[i] * float(readw1(i, k)); + } + S += (float)b1[k]; + if (S < 0) S = 0; // relu + shared_data[k] = S; + } + __syncthreads(); + + // Second fully-connected layer for SE + S = 0; + for (int i = 0; i < se_K; i++) { + float val = shared_data[i]; + S += val * float(readw2(i, k)); + B += val * float(readw2(i, k + C)); + } + S += (float)b2[k]; + B += (float)b2[k + C]; + + // Sigmoid (only on the scale part). + S = 1.0f / (1.0f + exp(-S)); + } + + // Scale/bias, add skip connection, perform relu, and write to output. + for (int h = 0; h < 8; h++) { + if (use_se) +#pragma unroll + for (int w = 0; w < 8; w++) board[h][w] = (T)(float(board[h][w]) * S + B); + + // residual add + if (use_skip) { + T skipInp[8]; + *((uint4*)(&skipInp[0])) = *((uint4*)(&skip[INDEX_NCHW(n, k, h, 0)])); + if (!fp16) + *((uint4*)(&skipInp[4])) = *((uint4*)(&skip[INDEX_NCHW(n, k, h, 4)])); +#pragma unroll + for (int w = 0; w < 8; w++) board[h][w] += skipInp[w]; + } + + // relu + if (relu) { +#pragma unroll + for (int w = 0; w < 8; w++) + if (board[h][w] < (T)0) board[h][w] = 0; + } + + // Write to output (use 128 bit writes to store one row a time) + *((uint4*)(&output[INDEX_NCHW(n, k, h, 0)])) = *((uint4*)&board[h][0]); + if (!fp16) + *((uint4*)(&output[INDEX_NCHW(n, k, h, 4)])) = *((uint4*)&board[h][4]); + } +} + +template +void FilterTransform(int N, int C, T* transformedFilter, const T* filter) { + // Each thread processes entire filter block (input 3x3 elements -> output 6x6 + // elements) + const int kBlockSize = 64; + const int kBlocks = DivUp(N * C, kBlockSize); + + filterTransform_kernel<<>>(N, C, N * C, + transformedFilter, filter); + + ReportCUDAErrors(cudaGetLastError()); +} + +template +void InputTransform(int N, int C, T* transformed_input, const T* input) { + // Each thread processes entire chess board (input 8x8 elements -> outputs + // 2x2, 6x6 elements) + InputTransform_kernel<<>>(N, C, input, transformed_input); + ReportCUDAErrors(cudaGetLastError()); +} + +template +void OutputTransform(int N, int C, int se_K, T* output, const T* input, + const T* skip, const T* bias, const T* w1, const T* b1, + const T* w2, const T* b2) { + // Each thread processes entire chess board + OutputTransform_kernel + <<>>(N, C, se_K, output, input, skip, bias, w1, b1, w2, b2); + ReportCUDAErrors(cudaGetLastError()); +} + +} // namespace cudnn_backend +} // namespace lczero From 7f7802e9795a70db3cf26f6db4ab7dc87ad67a5f Mon Sep 17 00:00:00 2001 From: Ankan Banerjee Date: Sun, 26 Apr 2020 18:21:43 +0530 Subject: [PATCH 124/151] cuda winograd fixes (#1238) * cuda winograd fixes - don't typecast directly to half datatype in CPU side code as older CUDA runtime doesn't support that. - don't use gemmEx version on GPUs older than Maxwell generation (not supported). - modify the check to enable custom_winograd setting. It should be faster in most cases - except presently on RTX GPUs when using fp16. --- src/neural/cuda/layers.cc | 47 +++++++++++++++++++----------- src/neural/cuda/layers.h | 9 ++++-- src/neural/cuda/network_cudnn.cc | 50 +++++++++++++++++++++++--------- 3 files changed, 74 insertions(+), 32 deletions(-) diff --git a/src/neural/cuda/layers.cc b/src/neural/cuda/layers.cc index 44c7bfbc27..9dacb69060 100644 --- a/src/neural/cuda/layers.cc +++ b/src/neural/cuda/layers.cc @@ -709,14 +709,15 @@ PolicyMapLayer::~PolicyMapLayer() { template FusedWinogradConvSELayer::FusedWinogradConvSELayer( BaseLayer* ip, int C, int H, int W, int Cin, bool relu, bool bias, - bool skip_add, bool se, int se_k) + bool skip_add, bool se, int se_k, bool use_gemm_ex) : BaseLayer(C, H, W, ip), c_input_(Cin), use_relu_(relu), use_bias_(bias), skip_add_(skip_add), has_se_(se), - se_k_(se_k) { + se_k_(se_k), + use_gemm_ex_(use_gemm_ex) { // Allocate memory for weights (filter tensor) and biases. const size_t weight_size = sizeof(DataType) * c_input_ * C * 3 * 3; ReportCUDAErrors(cudaMalloc(&weights_, weight_size)); @@ -811,12 +812,16 @@ void FusedWinogradConvSELayer::LoadSEWeights(float* w1, float* b1, copyTypeConverted((DataType*)b2_, (float*)scratch, num_biases2); } - -void cublasRowMjaorMatrixMul(const half* A, const half* B, half* Out, int M, - int N, int K, int batchSize, cublasHandle_t cublas, - int algo = -1) { - half halfOne = (half)1.0f; - half halfZero = (half)0.0f; +template <> +void FusedWinogradConvSELayer::cublasRowMajorMatrixMul( + const half* A, const half* B, half* Out, int M, int N, int K, int batchSize, + cublasHandle_t cublas) { + // Need to initialize 1.0 and 0.0 as hexadecimal for fp16 because typecasting + // float to half type doesn't work before CUDA 10.0 + __half_raw one_h{0x3C00}; + __half_raw zero_h{0}; + half halfOne = one_h; + half halfZero = zero_h; // dimensions of matrix A = M x K // dimensions of matrix B = K x N @@ -827,18 +832,26 @@ void cublasRowMjaorMatrixMul(const half* A, const half* B, half* Out, int M, ReportCUBLASErrors(cublasGemmStridedBatchedEx( cublas, CUBLAS_OP_N, CUBLAS_OP_N, N, M, K, &halfOne, B, CUDA_R_16F, N, N * K, A, CUDA_R_16F, K, K * M, &halfZero, Out, CUDA_R_16F, N, N * M, - batchSize, CUDA_R_16F, cublasGemmAlgo_t(algo))); + batchSize, CUDA_R_16F, CUBLAS_GEMM_DEFAULT)); } -void cublasRowMjaorMatrixMul(const float* A, const float* B, float* Out, int M, - int N, int K, int batchSize, cublasHandle_t cublas, - int algo = -1) { +template <> +void FusedWinogradConvSELayer::cublasRowMajorMatrixMul( + const float* A, const float* B, float* Out, int M, int N, int K, + int batchSize, cublasHandle_t cublas) { + float floatOne = 1.0f; float floatZero = 0.0f; - ReportCUBLASErrors(cublasGemmStridedBatchedEx( - cublas, CUBLAS_OP_N, CUBLAS_OP_N, N, M, K, &floatOne, B, CUDA_R_32F, N, - N * K, A, CUDA_R_32F, K, K * M, &floatZero, Out, CUDA_R_32F, N, N * M, - batchSize, CUDA_R_32F, cublasGemmAlgo_t(algo))); + if (use_gemm_ex_) + ReportCUBLASErrors(cublasGemmStridedBatchedEx( + cublas, CUBLAS_OP_N, CUBLAS_OP_N, N, M, K, &floatOne, B, CUDA_R_32F, N, + N * K, A, CUDA_R_32F, K, K * M, &floatZero, Out, CUDA_R_32F, N, N * M, + batchSize, CUDA_R_32F, CUBLAS_GEMM_DEFAULT)); + else + // Much slower on RTX 2060.. why? Maybe a cublas bug :-/ + ReportCUBLASErrors(cublasSgemmStridedBatched( + cublas, CUBLAS_OP_N, CUBLAS_OP_N, N, M, K, &floatOne, B, N, N * K, A, K, + K * M, &floatZero, Out, N, N * M, batchSize)); } template @@ -855,7 +868,7 @@ void FusedWinogradConvSELayer::Eval( InputTransform(N, C, transformed_input, input); - cublasRowMjaorMatrixMul(transformed_input, transformed_weights_, transformed_output, N*4, C, c_input_, 36, cublas); + cublasRowMajorMatrixMul(transformed_input, transformed_weights_, transformed_output, N*4, C, c_input_, 36, cublas); if (has_se_ && use_relu_ && use_bias_ && skip_add_) OutputTransform( diff --git a/src/neural/cuda/layers.h b/src/neural/cuda/layers.h index 239fd3e621..202240f2d4 100644 --- a/src/neural/cuda/layers.h +++ b/src/neural/cuda/layers.h @@ -214,8 +214,8 @@ class FusedWinogradConvSELayer : public BaseLayer { public: FusedWinogradConvSELayer(BaseLayer* ip, int C, int H, int W, - int Cin, bool relu, bool bias, bool skipAdd, - bool se, int se_k); + int Cin, bool relu, bool bias, bool skipAdd, bool se, + int se_k, bool use_gemm_ex); ~FusedWinogradConvSELayer(); void LoadWeights(float* pfilter, float* pBias, void* scratch); @@ -232,6 +232,7 @@ class FusedWinogradConvSELayer : public BaseLayer { const bool skip_add_; const bool has_se_; const int se_k_; + const bool use_gemm_ex_; DataType* biases_ = nullptr; DataType* weights_ = nullptr; @@ -242,6 +243,10 @@ class FusedWinogradConvSELayer : public BaseLayer { DataType* w2_; DataType* b1_; DataType* b2_; + + void cublasRowMajorMatrixMul(const DataType* A, const DataType* B, + DataType* Out, int M, int N, int K, + int batchSize, cublasHandle_t cublas); }; } // namespace cudnn_backend diff --git a/src/neural/cuda/network_cudnn.cc b/src/neural/cuda/network_cudnn.cc index 875a8c67b5..bfaf22bf8a 100644 --- a/src/neural/cuda/network_cudnn.cc +++ b/src/neural/cuda/network_cudnn.cc @@ -241,6 +241,7 @@ class CudnnNetwork : public Network { // Default layout is nchw. nhwc_ = false; + bool hasTensorCores = false; if (std::is_same::value) { // Check if the GPU support FP16. @@ -251,13 +252,15 @@ class CudnnNetwork : public Network { // (SM 5.3 and 6.2). SM 6.1 GPUs also have FP16, but slower than FP32. // nhwc_ remains false. } else if (deviceProp.major >= 7) { - // NHWC layout is faster with Tensor Cores. + // NHWC layout is faster with Tensor Cores when using cudnn's implicit + // gemm algorithm. // Supported on Volta and Turing (and hopefully future GPUs too). // Some GPUs (GTX 16xx) are SM 7.5 but don't have tensor cores // enabling TENSOR_OP_MATH or nhwc_ layout for them works but is // very very slow (likely because the system emulates it). if (!strstr(deviceProp.name, "GTX 16")) { + hasTensorCores = true; nhwc_ = true; } } else { @@ -272,12 +275,37 @@ class CudnnNetwork : public Network { // support it). ReportCUBLASErrors(cublasSetMathMode(cublas_, CUBLAS_TENSOR_OP_MATH)); - // Check if we want to enable our custom winograd ? constexpr bool fp16 = std::is_same::value; + const int kNumInputPlanes = kInputPlanes; + const int kNumFilters = weights.input.biases.size(); + numBlocks_ = weights.residual.size(); + + // Use our custom winograd for residual tower convolutions for most cases: + // + // 1. Should be always faster than cudnn's winograd that we use for fp32, + // and for fp16 on GPUs without tensor cores + // + // 2. Should also be faster than cudnn's implicit GEMM on GPUs with tensor + // cores too, but only for networks with 256 or higher no. of filters. + // + // 3. Currently a bug in cublas makes it slower on RTX GPUs with fp16 so + // it's disabled. TODO: Enable it once the bug has been fixed and it's + // tested to be faster. Putting check for cuda 11 for now. - // Use our custom winograd impl for fp32. - // TODO: Enable this for fp16 too once it's tested to be faster. - use_custom_winograd_ = !fp16; + if (fp16) { + int cuda_version; + cudaRuntimeGetVersion(&cuda_version); + if (!hasTensorCores) + use_custom_winograd_ = true; + else if (kNumFilters >= 256 && + !(deviceProp.major == 7 && deviceProp.minor == 5 && + cuda_version < 11000)) + use_custom_winograd_ = true; + else + use_custom_winograd_ = false; + } else { + use_custom_winograd_ = true; + } // Override if set in backend-opts. if (!options.IsDefault("custom_winograd")) @@ -286,14 +314,10 @@ class CudnnNetwork : public Network { // Winograd needs nchw tensor layout. if (use_custom_winograd_) nhwc_ = false; - const int kNumInputPlanes = kInputPlanes; - const int kNumFilters = weights.input.biases.size(); - - numBlocks_ = weights.residual.size(); - - has_se_ = false; + const bool use_gemm_ex = deviceProp.major >= 5; // 0. Check for SE. + has_se_ = false; if (weights.residual[0].has_se) { has_se_ = true; } @@ -370,7 +394,7 @@ class CudnnNetwork : public Network { if (use_custom_winograd_) { auto conv1 = std::make_unique>( getLastLayer(), kNumFilters, 8, 8, kNumFilters, true, true, false, - false, 0); + false, 0, use_gemm_ex); conv1->LoadWeights(&weights.residual[block].conv1.weights[0], &weights.residual[block].conv1.biases[0], scratch_mem_); @@ -380,7 +404,7 @@ class CudnnNetwork : public Network { int se_k = weights.residual[block].se.b1.size(); auto conv2 = std::make_unique>( getLastLayer(), kNumFilters, 8, 8, kNumFilters, true, true, true, - has_se, se_k); + has_se, se_k, use_gemm_ex); conv2->LoadWeights(&weights.residual[block].conv2.weights[0], &weights.residual[block].conv2.biases[0], scratch_mem_); From 89e35296c58e01abd66bbf09f7d6401d3b6b45bc Mon Sep 17 00:00:00 2001 From: Ed Lee Date: Mon, 27 Apr 2020 21:23:25 -0700 Subject: [PATCH 125/151] Allow most parts of fen to be optional. (#1234) Default to white to move, no castling, no en passant, 0 rule50ply, 1 total move. Also convert other string to std::string and removing using. --- src/chess/board.cc | 40 +++++++++++++++++++++++++--------------- src/chess/board.h | 2 +- src/chess/board_test.cc | 24 ++++++++++++++++++++++++ src/chess/pgn.h | 6 ------ 4 files changed, 50 insertions(+), 22 deletions(-) diff --git a/src/chess/board.cc b/src/chess/board.cc index 08dde6f8be..5b6fa47477 100644 --- a/src/chess/board.cc +++ b/src/chess/board.cc @@ -42,8 +42,6 @@ namespace lczero { -using std::string; - const char* ChessBoard::kStartposFen = "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1"; @@ -969,22 +967,34 @@ MoveList ChessBoard::GenerateLegalMoves() const { return result; } -void ChessBoard::SetFromFen(const std::string& fen, int* rule50_ply, - int* moves) { +void ChessBoard::SetFromFen(std::string fen, int* rule50_ply, int* moves) { Clear(); int row = 7; int col = 0; - std::istringstream fen_str(fen); - string board; - string who_to_move; - string castlings; - string en_passant; - int rule50_halfmoves; - int total_moves; - fen_str >> board >> who_to_move >> castlings >> en_passant >> - rule50_halfmoves >> total_moves; + // Remove any trailing whitespaces to detect eof after the last field. + fen.erase(std::find_if(fen.rbegin(), fen.rend(), + [](char c) { return !std::isspace(c); }) + .base(), + fen.end()); + std::istringstream fen_str(fen); + std::string board; + fen_str >> board; + std::string who_to_move = "w"; + if (!fen_str.eof()) fen_str >> who_to_move; + // Assume no castling rights. Other engines, e.g., Stockfish, assume kings and + // rooks on their initial rows can each castle with the outer-most rook. Our + // implementation currently supports 960 castling where white and black rooks + // have matching columns, so it's unclear which rights to assume. + std::string castlings = "-"; + if (!fen_str.eof()) fen_str >> castlings; + std::string en_passant = "-"; + if (!fen_str.eof()) fen_str >> en_passant; + int rule50_halfmoves = 0; + if (!fen_str.eof()) fen_str >> rule50_halfmoves; + int total_moves = 1; + if (!fen_str.eof()) fen_str >> total_moves; if (!fen_str) throw Exception("Bad fen string: " + fen); for (char c : board) { @@ -1127,8 +1137,8 @@ bool ChessBoard::HasMatingMaterial() const { return light_bishop && dark_bishop; } -string ChessBoard::DebugString() const { - string result; +std::string ChessBoard::DebugString() const { + std::string result; for (int i = 7; i >= 0; --i) { for (int j = 0; j < 8; ++j) { if (!our_pieces_.get(i, j) && !their_pieces_.get(i, j)) { diff --git a/src/chess/board.h b/src/chess/board.h index 939f533581..4cd3111b40 100644 --- a/src/chess/board.h +++ b/src/chess/board.h @@ -70,7 +70,7 @@ class ChessBoard { // If @rule50_ply and @moves are not nullptr, they are filled with number // of moves without capture and number of full moves since the beginning of // the game. - void SetFromFen(const std::string& fen, int* rule50_ply = nullptr, + void SetFromFen(std::string fen, int* rule50_ply = nullptr, int* moves = nullptr); // Nullifies the whole structure. void Clear(); diff --git a/src/chess/board_test.cc b/src/chess/board_test.cc index 65b46f44b8..fb927dd0ee 100644 --- a/src/chess/board_test.cc +++ b/src/chess/board_test.cc @@ -62,6 +62,30 @@ TEST(ChessBoard, PseudolegalMovesStartingPos) { EXPECT_EQ(moves.size(), 20); } +TEST(ChessBoard, PartialFen) { + ChessBoard board; + int rule50ply; + int gameply; + board.SetFromFen("k/1R//K", &rule50ply, &gameply); + auto moves = board.GeneratePseudolegalMoves(); + + EXPECT_EQ(moves.size(), 19); + EXPECT_EQ(rule50ply, 0); + EXPECT_EQ(gameply, 1); +} + +TEST(ChessBoard, PartialFenWithSpaces) { + ChessBoard board; + int rule50ply; + int gameply; + board.SetFromFen(" k/1R//K w ", &rule50ply, &gameply); + auto moves = board.GeneratePseudolegalMoves(); + + EXPECT_EQ(moves.size(), 19); + EXPECT_EQ(rule50ply, 0); + EXPECT_EQ(gameply, 1); +} + namespace { int Perft(const ChessBoard& board, int max_depth, bool dump = false, int depth = 0) { diff --git a/src/chess/pgn.h b/src/chess/pgn.h index 7fae7867d4..370af5584a 100644 --- a/src/chess/pgn.h +++ b/src/chess/pgn.h @@ -65,12 +65,6 @@ class PgnReader { if (uc_line.find("[FEN \"", 0) == 0) { auto start_trimmed = line.substr(6); cur_startpos_ = start_trimmed.substr(0, start_trimmed.find('"')); - // Some 'opening books' omit the last 2 fields, so there is only 3 - // space delimiters. - if (std::count(cur_startpos_.begin(), cur_startpos_.end(), ' ') == - 3) { - cur_startpos_ += " 0 1"; - } cur_board_.SetFromFen(cur_startpos_); } continue; From c697680a5032d33bbe80817a1006b7e318b5f659 Mon Sep 17 00:00:00 2001 From: Tilps Date: Tue, 28 Apr 2020 21:12:20 +1000 Subject: [PATCH 126/151] Fix UpdateNps to actually smooth the nps and correctly handle time_since_movestart_ms == 0 (#1243) --- src/mcts/stoppers/smooth.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/mcts/stoppers/smooth.cc b/src/mcts/stoppers/smooth.cc index dec6e7e9d5..e9a0732447 100644 --- a/src/mcts/stoppers/smooth.cc +++ b/src/mcts/stoppers/smooth.cc @@ -148,12 +148,13 @@ class SmoothTimeManager : public TimeManager { float UpdateNps(int64_t time_since_movestart_ms, int64_t nodes_since_movestart) { Mutex::Lock lock(mutex_); - if (nps_is_reliable_ && time_since_movestart_ms <= last_time_) { + if (time_since_movestart_ms <= 0) return nps_; + if (nps_is_reliable_ && time_since_movestart_ms >= last_time_) { const float nps = 1000.0f * nodes_since_movestart / time_since_movestart_ms; nps_ = ExponentialDecay(nps_, nps, params_.nps_halfupdate_seconds(), (time_since_movestart_ms - last_time_) / 1000.0f); - } else if (time_since_movestart_ms > 0) { + } else { nps_ = 1000.0f * nodes_since_movestart / time_since_movestart_ms; } last_time_ = time_since_movestart_ms; From 34d7d9ff02de44c5453f6a3622d4561f27f01f79 Mon Sep 17 00:00:00 2001 From: Tilps Date: Tue, 28 Apr 2020 21:12:32 +1000 Subject: [PATCH 127/151] Update changelog for 0.25.0 final release. (#1244) --- changelog.txt | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/changelog.txt b/changelog.txt index 733c78375e..696947d2a7 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,6 +1,18 @@ -v0.25.0-rc2 (2020-04-23) +v0.25.0 (2020-04-28) ~~~~~~~ +* Relax strictness for complete standard fens in uci and opening books. Fen + must still be standard, but default values will be substituted for sections + that are missing. +* Restore some backwards compatibility in cudnn backends that was lost with + the addition of the new convolution implementation. It is also on by default + for more scenarios, although still off for fp16 on RTX gpus. +* Small logic fix for nps smoothing in the new optional experimental time + manager. + +v0.25.0-rc2 (2020-04-23) +~~~~~~~~~~~ + * Increased upper limit for maximum collision events. * Allow negative values for some of the extended moves left head parameters. * Fix a critical bug in training data generation for input type 3. @@ -25,7 +37,7 @@ cudnn-fp16 until an issue with cublas performance on some gpus is resolved. v0.25.0-rc1 (2020-04-09) -~~~~~~~ +~~~~~~~~~~~ * Now requires a c++17 supporting compilation environment to build. * Support for Moves Left Head based networks. Includes options to adjust search From 8930378db5382f6f34e62fff7314afe5d1f99754 Mon Sep 17 00:00:00 2001 From: Ed Lee Date: Tue, 28 Apr 2020 11:01:54 -0700 Subject: [PATCH 128/151] Always report at least 1 depth. (#1247) --- src/chess/uciloop.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/chess/uciloop.cc b/src/chess/uciloop.cc index ee7c5e56df..42e4e4f393 100644 --- a/src/chess/uciloop.cc +++ b/src/chess/uciloop.cc @@ -27,6 +27,7 @@ #include "uciloop.h" +#include #include #include #include @@ -246,7 +247,8 @@ void UciLoop::SendInfo(const std::vector& infos) { if (info.game_id != -1) res += " gameid " + std::to_string(info.game_id); if (info.is_black) res += " side " + std::string(*info.is_black ? "black" : "white"); - if (info.depth >= 0) res += " depth " + std::to_string(info.depth); + if (info.depth >= 0) + res += " depth " + std::to_string(std::max(info.depth, 1)); if (info.seldepth >= 0) res += " seldepth " + std::to_string(info.seldepth); if (info.time >= 0) res += " time " + std::to_string(info.time); if (info.nodes >= 0) res += " nodes " + std::to_string(info.nodes); From 938615a413f217be6fa8062652649bf4b901a502 Mon Sep 17 00:00:00 2001 From: Ankan Banerjee Date: Wed, 29 Apr 2020 09:43:55 +0530 Subject: [PATCH 129/151] Fix un-intended regression for GTX GPUs (#1246) --- src/neural/cuda/network_cudnn.cc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/neural/cuda/network_cudnn.cc b/src/neural/cuda/network_cudnn.cc index bfaf22bf8a..8dfe3279d4 100644 --- a/src/neural/cuda/network_cudnn.cc +++ b/src/neural/cuda/network_cudnn.cc @@ -271,9 +271,8 @@ class CudnnNetwork : public Network { if (!options.IsDefault("nhwc")) nhwc_ = options.Get("nhwc"); } - // Always try to set tensor math (won't have any effect on GPUs that don't - // support it). - ReportCUBLASErrors(cublasSetMathMode(cublas_, CUBLAS_TENSOR_OP_MATH)); + if (hasTensorCores) + ReportCUBLASErrors(cublasSetMathMode(cublas_, CUBLAS_TENSOR_OP_MATH)); constexpr bool fp16 = std::is_same::value; const int kNumInputPlanes = kInputPlanes; From ad4b5f2c1db68c8120ad77c33a748bef724b053f Mon Sep 17 00:00:00 2001 From: Ankan Banerjee Date: Thu, 30 Apr 2020 13:21:06 +0530 Subject: [PATCH 130/151] memory optimization for cudnn custom_winograd (#1250) * memory optimization for cudnn custom_winograd - don't save untransformed weights - print warning message when low memory is detected. * address review comments * fix warning message * fix total weight size calculation 2 layers per residual block! --- src/neural/cuda/layers.cc | 9 ++++---- src/neural/cuda/layers.h | 1 - src/neural/cuda/network_cudnn.cc | 38 ++++++++++++++++++++++++++++++-- 3 files changed, 41 insertions(+), 7 deletions(-) diff --git a/src/neural/cuda/layers.cc b/src/neural/cuda/layers.cc index 9dacb69060..dbe27c9122 100644 --- a/src/neural/cuda/layers.cc +++ b/src/neural/cuda/layers.cc @@ -720,7 +720,6 @@ FusedWinogradConvSELayer::FusedWinogradConvSELayer( use_gemm_ex_(use_gemm_ex) { // Allocate memory for weights (filter tensor) and biases. const size_t weight_size = sizeof(DataType) * c_input_ * C * 3 * 3; - ReportCUDAErrors(cudaMalloc(&weights_, weight_size)); if (use_bias_) { const size_t blas_size = sizeof(DataType) * C; @@ -755,12 +754,15 @@ void FusedWinogradConvSELayer::LoadWeights(float* pfilter, const size_t weight_size = sizeof(float) * c_input_ * C * 3 * 3; const size_t blas_size = sizeof(float) * C; + // Store untransformed weights in scratch. + const DataType* weights = (DataType*)scratch + weight_size + blas_size; + // first copy from CPU memory to scratch space in GPU memory // and then do the type conversion using a kernel assert(scratch); ReportCUDAErrors( cudaMemcpy(scratch, pfilter, weight_size, cudaMemcpyHostToDevice)); - copyTypeConverted((DataType*)weights_, (float*)scratch, C * c_input_ * 3 * 3); + copyTypeConverted((DataType*)weights, (float*)scratch, C * c_input_ * 3 * 3); if (pBias) { ReportCUDAErrors( @@ -769,7 +771,7 @@ void FusedWinogradConvSELayer::LoadWeights(float* pfilter, } // run winograd transform kernel for the filter - FilterTransform(C, c_input_, transformed_weights_, weights_); + FilterTransform(C, c_input_, transformed_weights_, weights); } // TODO: Do this on the GPU to improve network load time! @@ -888,7 +890,6 @@ void FusedWinogradConvSELayer::Eval( template FusedWinogradConvSELayer::~FusedWinogradConvSELayer() { - ReportCUDAErrors(cudaFree(weights_)); ReportCUDAErrors(cudaFree(transformed_weights_)); if (use_bias_) ReportCUDAErrors(cudaFree(biases_)); if (has_se_) { diff --git a/src/neural/cuda/layers.h b/src/neural/cuda/layers.h index 202240f2d4..1b897744dd 100644 --- a/src/neural/cuda/layers.h +++ b/src/neural/cuda/layers.h @@ -235,7 +235,6 @@ class FusedWinogradConvSELayer : public BaseLayer { const bool use_gemm_ex_; DataType* biases_ = nullptr; - DataType* weights_ = nullptr; DataType* transformed_weights_ = nullptr; // After winograd transform. // Weights and Biases for (optional) SE. diff --git a/src/neural/cuda/network_cudnn.cc b/src/neural/cuda/network_cudnn.cc index 8dfe3279d4..8077091406 100644 --- a/src/neural/cuda/network_cudnn.cc +++ b/src/neural/cuda/network_cudnn.cc @@ -310,6 +310,32 @@ class CudnnNetwork : public Network { if (!options.IsDefault("custom_winograd")) use_custom_winograd_ = options.Get("custom_winograd"); + // Warn if the memory required for storing transformed weights is + // going to exceed 60% of total video memory, force custom_winograd off + // if it's going to exceed 80% of memory. + size_t residual_single_layer_weight_size = + 3 * 3 * kNumFilters * kNumFilters * sizeof(DataType); + size_t residual_weight_size = + residual_single_layer_weight_size * numBlocks_ * 2; + size_t transformed_residual_weight_size = residual_weight_size * 4; + if (residual_weight_size > 0.6 * deviceProp.totalGlobalMem) { + CERR << "Low video memory detected. You may run into OOM errors. Please " + "consider using a smaller network."; + // No hope of using custom winograd - even the fallback path might not run. + use_custom_winograd_ = false; + } else if (use_custom_winograd_) { + if (transformed_residual_weight_size > 0.8 * deviceProp.totalGlobalMem) { + CERR << "WARNING: Low GPU video memory detected. Turning off " + "custom_winograd."; + use_custom_winograd_ = false; + } else if (transformed_residual_weight_size > + 0.6 * deviceProp.totalGlobalMem) { + CERR << "WARNING: Low GPU video memory. You may run into OOM errors. " + "Please consider using a smaller network, or run with " + "--backend-opts=custom_winograd=false"; + } + } + // Winograd needs nchw tensor layout. if (use_custom_winograd_) nhwc_ = false; @@ -361,8 +387,16 @@ class CudnnNetwork : public Network { cudnn_, xDesc, wDesc, convDesc, xDesc, conv_algo, &scratch_size_)); // Have some minumum as we also use this for transforming weights. - const int maxWeightSize = 128 * 1024 * 1024; - if (scratch_size_ < maxWeightSize) scratch_size_ = maxWeightSize; + int max_weight_size = 128 * 1024 * 1024; + + // parts from scratch allocation are suballocated to hold various weights + // and biases when transforming winograd weights (one layer at a time), 128 + // MB is way more than that what we need but make sure it's at least 3x of + // single layer's weight size to be safe. + if (max_weight_size < 3 * residual_single_layer_weight_size) + max_weight_size = 3 * residual_single_layer_weight_size; + + if (scratch_size_ < max_weight_size) scratch_size_ = max_weight_size; if (use_custom_winograd_) { // Need additional space for transformed input/outputs which are 36/16 From 97bd3e703671f1268346702c0e97bca7b14a97a8 Mon Sep 17 00:00:00 2001 From: borg323 <39573933+borg323@users.noreply.github.com> Date: Mon, 4 May 2020 01:36:57 +0300 Subject: [PATCH 131/151] keep pdb files only for release builds (#1256) --- scripts/appveyor_win_build.cmd | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/appveyor_win_build.cmd b/scripts/appveyor_win_build.cmd index aa75c0c762..d74eb4e6c2 100644 --- a/scripts/appveyor_win_build.cmd +++ b/scripts/appveyor_win_build.cmd @@ -1,6 +1,7 @@ SET PGO=false IF %APPVEYOR_REPO_TAG%==true IF %DX%==false SET PGO=true -IF %PGO%==false msbuild "C:\projects\lc0\build\lc0.sln" /m /p:WholeProgramOptimization=true /p:DebugInformationFormat=ProgramDatabase /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" +IF %PGO%==false msbuild "C:\projects\lc0\build\lc0.sln" /m /p:WholeProgramOptimization=true /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" +IF EXIST build\lc0.pdb del build\lc0.pdb IF %PGO%==true msbuild "C:\projects\lc0\build\lc0.sln" /m /p:WholeProgramOptimization=PGInstrument /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" IF ERRORLEVEL 1 EXIT cd build From 41b041ee92a5b7cfde77a9c8d170676cf35829da Mon Sep 17 00:00:00 2001 From: borg323 <39573933+borg323@users.noreply.github.com> Date: Wed, 6 May 2020 00:03:13 +0300 Subject: [PATCH 132/151] doc update (#1267) --- CONTRIBUTING.md | 12 +----------- README.md | 2 ++ 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 639478d273..87786f73e9 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -2,7 +2,7 @@ These are the guidelines and standards followed by this codebase. -The language is C++, specifically C++14. As such, manual `new` and `delete` memory mangement is strongly discouraged; use the standard library tools for managing memory (such as `unique_ptr`, `shared_ptr` etc.). When compiler support is more widespread, the project may upgrade to the C++17 standard in the future. +The language is C++, specifically C++17. As such, manual `new` and `delete` memory mangement is strongly discouraged; use the standard library tools for managing memory (such as `unique_ptr`, `shared_ptr` etc.). This codebase uses semantic versioning. A release is the final commit for that version number, and all subsequent commits are development for the next version. `master` is the default branch, and the active development branch (as such, all Pull Requests go here); it always targets a minor (or major) version which succeeds the current relase. `release` is always equivalent to the latest tag. @@ -61,16 +61,6 @@ Pull Requests are squahsed when merged. This means all commits in the branch wil This also means it's not possible to reuse one branch for multiple PRs; new PRs must either use entirely new branches, or else you could use `git reset --hard` on the current branch. -### Command line/UCI options - -The options code handles both UCI options and command line options at the same time; in fact they are one and the same. Each option has a "flag name" and a "description". The flag name is used as the command line `--flag-name`. The description serves a dual purpose: it is the text printed by `./lc0 --help`, but it also serves as the *name* of the UCI option as well. Therefore the description should: - - * Not end with a period (per the UCI specification) - * Be clear and succinct, to serve as both a help message and standalone UCI option name - * Be short (to fit as a UCI option in chess GUIs) - * Be different from the flag name (since it's a help message) - - ### Allowed features Lc0 is still in early stages of development, and has not yet reached the point where we are ready to add small tweaks to add few points of a rating. Large code changes still happen, and having lots of small optimizations adds overhead to larger changes, slowing development. diff --git a/README.md b/README.md index b9a16a5e98..3b09e5c950 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,8 @@ Backend support includes (in theory) any CBLAS-compatible library for CPU usage, Finally, lc0 requires a compiler supporting C++17. Minimal versions seem to be g++ v8.0, clang v5.0 (with C++17 stdlib) or Visual Studio 2017. +*Note* that cuda checks the compiler version and stops even with newer compilers, and to work around this we have added the `nvcc_ccbin` build option. This is more of an issue with new Linux versions, where we recommend to install `g++-7` and add `-Dnvcc_ccbin=g++-7` to the `build.sh` command. + Given those basics, the OS and backend specific instructions are below. ### Linux From 9158a67a917d630caf7fbd22baeaebd31686db24 Mon Sep 17 00:00:00 2001 From: Ed Lee Date: Wed, 6 May 2020 01:07:29 -0700 Subject: [PATCH 133/151] Include verbose stats for the node. (#1268) Use printing lambdas for parts of the verbose output to share between the newly outputted node and its children. --- src/mcts/search.cc | 137 +++++++++++++++++++++++++-------------------- src/mcts/search.h | 2 +- 2 files changed, 76 insertions(+), 63 deletions(-) diff --git a/src/mcts/search.cc b/src/mcts/search.cc index c9a6f82f37..08553dfbf7 100644 --- a/src/mcts/search.cc +++ b/src/mcts/search.cc @@ -284,77 +284,90 @@ std::vector Search::GetVerboseStats(Node* node) const { b.GetQ(fpu, draw_score, logit_q) + b.GetU(U_coeff)); }); - std::vector infos; - for (const auto& edge : edges) { - std::ostringstream oss; - oss << std::fixed; - - oss << std::left << std::setw(5) - << edge.GetMove(is_black_to_move).as_string(); - - float Q = edge.GetQ(fpu, draw_score, logit_q); - float M_effect = do_moves_left_adjustment - ? (std::clamp(m_slope * edge.GetM(0.0f), -m_cap, m_cap) * - std::copysign(1.0f, -Q) * (a + b * std::abs(Q) + c * Q * Q)) - : 0.0f; - - // TODO: should this be displaying transformed index? - oss << " (" << std::setw(4) << edge.GetMove().as_nn_index(0) << ")"; - - oss << " N: " << std::right << std::setw(7) << edge.GetN() << " (+" - << std::setw(2) << edge.GetNInFlight() << ") "; - - oss << "(P: " << std::setw(5) << std::setprecision(2) << edge.GetP() * 100 - << "%) "; - - // Default value here assumes user knows to ignore this field when N is 0. - oss << "(WL: " << std::setw(8) << std::setprecision(5) << edge.GetWL(0.0f) - << ") "; - - // Default value here assumes user knows to ignore this field when N is 0. - oss << "(D: " << std::setw(6) << std::setprecision(3) << edge.GetD(0.0f) - << ") "; - - // Default value here assumes user knows to ignore this field when N is 0. - oss << "(M: " << std::setw(4) << std::setprecision(1) << edge.GetM(0.0f) - << ") "; - - oss << "(Q: " << std::setw(8) << std::setprecision(5) - << edge.GetQ(fpu, draw_score, /* logit_q= */ false) << ") "; - - oss << "(U: " << std::setw(6) << std::setprecision(5) << edge.GetU(U_coeff) - << ") "; - - oss << "(S: " << std::setw(8) << std::setprecision(5) - << Q + edge.GetU(U_coeff) + M_effect << ") "; - - oss << "(V: "; + auto print = [](auto* oss, auto pre, auto v, auto post, auto w, int p = 0) { + *oss << pre << std::setw(w) << std::setprecision(p) << v << post; + }; + auto print_head = [&](auto* oss, auto label, auto i, auto n, auto f, auto p) { + *oss << std::fixed; + print(oss, "", label, " ", 5); + print(oss, "(", i, ") ", 4); + *oss << std::right; + print(oss, "N: ", n, " ", 7); + print(oss, "(+", f, ") ", 2); + print(oss, "(P: ", p * 100, "%) ", 5, 2); + }; + auto print_stats = [&](auto* oss, const auto* n) { + const auto sign = n == node ? -1 : 1; + if (n) { + print(oss, "(WL: ", sign * n->GetWL(), ") ", 8, 5); + print(oss, "(D: ", n->GetD(), ") ", 5, 3); + print(oss, "(M: ", n->GetM(), ") ", 4, 1); + } else { + *oss << "(WL: -.-----) (D: -.---) (M: -.-) "; + } + print(oss, "(Q: ", n ? sign * n->GetQ(sign * draw_score) : fpu, ") ", 8, 5); + }; + auto print_tail = [&](auto* oss, const auto* n) { + const auto sign = n == node ? -1 : 1; std::optional v; - if (edge.IsTerminal()) { - v = edge.node()->GetQ(draw_score); + if (n && n->IsTerminal()) { + v = n->GetQ(sign * draw_score); } else { - NNCacheLock nneval = GetCachedNNEval(edge.node()); + NNCacheLock nneval = GetCachedNNEval(n); if (nneval) v = -nneval->q; } if (v) { - oss << std::setw(7) << std::setprecision(4) << *v; + print(oss, "(V: ", sign * *v, ") ", 7, 4); } else { - oss << " -.----"; + *oss << "(V: -.----) "; } - oss << ") "; - const auto [edge_lower, edge_upper] = edge.GetBounds(); - oss << (edge_lower == edge_upper - ? "(T) " - : edge_lower == GameResult::DRAW && - edge_upper == GameResult::WHITE_WON - ? "(W) " - : edge_lower == GameResult::BLACK_WON && - edge_upper == GameResult::DRAW - ? "(L) " - : ""); + if (n) { + auto [lo, up] = n->GetBounds(); + if (sign == -1) { + lo = -lo; + up = -up; + std::swap(lo, up); + } + *oss << (lo == up + ? "(T) " + : lo == GameResult::DRAW && up == GameResult::WHITE_WON + ? "(W) " + : lo == GameResult::BLACK_WON && up == GameResult::DRAW + ? "(L) " + : ""); + } + }; + + std::vector infos; + for (const auto& edge : edges) { + float Q = edge.GetQ(fpu, draw_score, logit_q); + float M_effect = + do_moves_left_adjustment + ? (std::clamp(m_slope * edge.GetM(0.0f), -m_cap, m_cap) * + std::copysign(1.0f, -Q) * (a + b * std::abs(Q) + c * Q * Q)) + : 0.0f; + + std::ostringstream oss; + oss << std::left; + // TODO: should this be displaying transformed index? + print_head(&oss, edge.GetMove(is_black_to_move).as_string(), + edge.GetMove().as_nn_index(0), edge.GetN(), edge.GetNInFlight(), + edge.GetP()); + print_stats(&oss, edge.node()); + print(&oss, "(U: ", edge.GetU(U_coeff), ") ", 6, 5); + print(&oss, "(S: ", Q + edge.GetU(U_coeff) + M_effect, ") ", 8, 5); + print_tail(&oss, edge.node()); infos.emplace_back(oss.str()); } + + // Include stats about the node in similar format to its children above. + std::ostringstream oss; + print_head(&oss, "node ", node->GetNumEdges(), node->GetN(), + node->GetNInFlight(), node->GetVisitedPolicy()); + print_stats(&oss, node); + print_tail(&oss, node); + infos.emplace_back(oss.str()); return infos; } @@ -384,7 +397,7 @@ void Search::SendMovesStats() const REQUIRES(counters_mutex_) { } } -NNCacheLock Search::GetCachedNNEval(Node* node) const { +NNCacheLock Search::GetCachedNNEval(const Node* node) const { if (!node) return {}; std::vector moves; diff --git a/src/mcts/search.h b/src/mcts/search.h index b64ee1b14b..ff5b703c8e 100644 --- a/src/mcts/search.h +++ b/src/mcts/search.h @@ -135,7 +135,7 @@ class Search { std::vector GetVerboseStats(Node* node) const; // Returns NN eval for a given node from cache, if that node is cached. - NNCacheLock GetCachedNNEval(Node* node) const; + NNCacheLock GetCachedNNEval(const Node* node) const; // Returns the draw score at the root of the search. At odd depth pass true to // the value of @is_odd_depth to change the sign of the draw score. From b6b3011378de00bd4327a9509d471925fa375743 Mon Sep 17 00:00:00 2001 From: Hace Date: Fri, 8 May 2020 22:28:08 +0200 Subject: [PATCH 134/151] add alphazero time manager (#1201) --- meson.build | 1 + src/mcts/stoppers/alphazero.cc | 95 ++++++++++++++++++++++++++++++++++ src/mcts/stoppers/alphazero.h | 37 +++++++++++++ src/mcts/stoppers/factory.cc | 15 ++++-- 4 files changed, 145 insertions(+), 3 deletions(-) create mode 100644 src/mcts/stoppers/alphazero.cc create mode 100644 src/mcts/stoppers/alphazero.h diff --git a/meson.build b/meson.build index 65591353a8..9d4343203c 100644 --- a/meson.build +++ b/meson.build @@ -131,6 +131,7 @@ files += [ 'src/mcts/stoppers/common.cc', 'src/mcts/stoppers/factory.cc', 'src/mcts/stoppers/legacy.cc', + 'src/mcts/stoppers/alphazero.cc', 'src/mcts/stoppers/smooth.cc', 'src/mcts/stoppers/stoppers.cc', 'src/mcts/stoppers/timemgr.cc', diff --git a/src/mcts/stoppers/alphazero.cc b/src/mcts/stoppers/alphazero.cc new file mode 100644 index 0000000000..5dfccdba01 --- /dev/null +++ b/src/mcts/stoppers/alphazero.cc @@ -0,0 +1,95 @@ +/* + This file is part of Leela Chess Zero. + Copyright (C) 2020 The LCZero Authors + + Leela Chess is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Leela Chess is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Leela Chess. If not, see . + + Additional permission under GNU GPL version 3 section 7 + + If you modify this Program, or any covered work, by linking or + combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA + Toolkit and the NVIDIA CUDA Deep Neural Network library (or a + modified version of those libraries), containing parts covered by the + terms of the respective license agreement, the licensors of this + Program grant you additional permission to convey the resulting work. +*/ + +#include "mcts/stoppers/stoppers.h" + +namespace lczero { + +namespace { + +class AlphazeroStopper : public TimeLimitStopper { + public: + AlphazeroStopper(int64_t deadline_ms, int64_t* time_piggy_bank) + : TimeLimitStopper(deadline_ms), time_piggy_bank_(time_piggy_bank) {} + void OnSearchDone(const IterationStats& stats) override { + *time_piggy_bank_ += GetTimeLimitMs() - stats.time_since_movestart; + } + + private: + int64_t* const time_piggy_bank_; +}; + +class AlphazeroTimeManager : public TimeManager { + public: + AlphazeroTimeManager(int64_t move_overhead, const OptionsDict& params) + : move_overhead_(move_overhead), + alphazerotimevalue_(params.GetOrDefault("alphazero-time-value", 20.0f)), + spend_saved_time_(params.GetOrDefault("immediate-use", 1.0f)) {} + std::unique_ptr GetStopper(const GoParams& params, + const NodeTree& tree) override; + + private: + const int64_t move_overhead_; + const float alphazerotimevalue_; + const float spend_saved_time_; + // No need to be atomic as only one thread will update it. + int64_t time_spared_ms_ = 0; +}; + +std::unique_ptr AlphazeroTimeManager::GetStopper( + const GoParams& params, const NodeTree& tree) { + const Position& position = tree.HeadPosition(); + const bool is_black = position.IsBlackToMove(); + const std::optional& time = (is_black ? params.btime : params.wtime); + // If no time limit is given, don't stop on this condition. + if (params.infinite || params.ponder || !time) return nullptr; + + const std::optional& inc = is_black ? params.binc : params.winc; + const int increment = inc ? std::max(int64_t(0), *inc) : 0; + + auto total_moves_time = *time + increment - move_overhead_; + + // use the increment in the first upcoming move + float this_move_time = increment + (total_moves_time / alphazerotimevalue_); + + LOGFILE << "Budgeted time for the move: " << this_move_time << "ms" + << "Remaining time " << *time + << "ms(-" << move_overhead_ << "ms overhead)"; + + // Make sure we don't exceed current time limit with what we calculated. + auto deadline = + std::min(static_cast(this_move_time), *time + increment - move_overhead_); + return std::make_unique(deadline, &time_spared_ms_); +} + +} // namespace + +std::unique_ptr MakeAlphazeroTimeManager(int64_t move_overhead, + const OptionsDict& params) { + return std::make_unique(move_overhead, params); +} +} // namespace lczero \ No newline at end of file diff --git a/src/mcts/stoppers/alphazero.h b/src/mcts/stoppers/alphazero.h new file mode 100644 index 0000000000..7baa0735c7 --- /dev/null +++ b/src/mcts/stoppers/alphazero.h @@ -0,0 +1,37 @@ +/* + This file is part of Leela Chess Zero. + Copyright (C) 2020 The LCZero Authors + + Leela Chess is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Leela Chess is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Leela Chess. If not, see . + + Additional permission under GNU GPL version 3 section 7 + + If you modify this Program, or any covered work, by linking or + combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA + Toolkit and the NVIDIA CUDA Deep Neural Network library (or a + modified version of those libraries), containing parts covered by the + terms of the respective license agreement, the licensors of this + Program grant you additional permission to convey the resulting work. +*/ + +#pragma once + +#include "utils/optionsdict.h" + +namespace lczero { + +std::unique_ptr MakeAlphazeroTimeManager(int64_t move_overhead, + const OptionsDict& params); + +} // namespace lczero \ No newline at end of file diff --git a/src/mcts/stoppers/factory.cc b/src/mcts/stoppers/factory.cc index 5c79c0326f..b88600698f 100644 --- a/src/mcts/stoppers/factory.cc +++ b/src/mcts/stoppers/factory.cc @@ -31,6 +31,7 @@ #include "factory.h" #include "mcts/stoppers/legacy.h" +#include "mcts/stoppers/alphazero.h" #include "mcts/stoppers/smooth.h" #include "mcts/stoppers/stoppers.h" #include "utils/exception.h" @@ -43,9 +44,11 @@ const OptionId kMoveOverheadId{ "Amount of time, in milliseconds, that the engine subtracts from it's " "total available time (to compensate for slow connection, interprocess " "communication, etc)."}; -const OptionId kTimeManagerId{"time-manager", "TimeManager", - "Name and config of atime manager."}; - +const OptionId kTimeManagerId{ + "time-manager", "TimeManager", + "Name and config of a time manager. " + "Possible names are 'legacy' (default), 'smooth-experimental' and 'alphazero'." + "See https://lc0.org/timemgr for configuration details."}; } // namespace void PopulateTimeManagementOptions(RunType for_what, OptionsParser* options) { @@ -61,6 +64,7 @@ std::unique_ptr MakeTimeManager(const OptionsDict& options) { OptionsDict tm_options; tm_options.AddSubdictFromString(options.Get(kTimeManagerId)); + const auto managers = tm_options.ListSubdicts(); std::unique_ptr time_manager; @@ -68,13 +72,18 @@ std::unique_ptr MakeTimeManager(const OptionsDict& options) { throw Exception("Exactly one time manager should be specified, " + std::to_string(managers.size()) + " specified instead."); } + if (managers[0] == "legacy") { time_manager = MakeLegacyTimeManager(move_overhead, tm_options.GetSubdict("legacy")); + } else if (managers[0] == "alphazero") { + time_manager = MakeAlphazeroTimeManager(move_overhead, + tm_options.GetSubdict("alphazero")); } else if (managers[0] == "smooth-experimental") { time_manager = MakeSmoothTimeManager( move_overhead, tm_options.GetSubdict("smooth-experimental")); } + if (!time_manager) { throw Exception("Unknown time manager: [" + managers[0] + "]"); } From 9b0cdd5a70e03ef4096c73e5a0a0ec87b6107e10 Mon Sep 17 00:00:00 2001 From: Kip Hamiltons <48076495+KipHamiltons@users.noreply.github.com> Date: Sat, 9 May 2020 06:43:08 +1000 Subject: [PATCH 135/151] Updated FLAGS.md with logfile flag (#1275) --- FLAGS.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/FLAGS.md b/FLAGS.md index dcc2a82d09..5b0234cda5 100644 --- a/FLAGS.md +++ b/FLAGS.md @@ -52,7 +52,7 @@ List of command line flags: | --fpu-reduction=NUM | First Play Urgency reduction | Default: `0.2` | | --cache-history-length=NUM | The length of history to include in the cache | Default: `7` | | --extra-virtual-loss=NUM | Extra virtual loss | Default: `0` | -| -l,
--debuglog=FILENAME | Do debug logging into a file | Default is off (empty string) | +| -l,
--logfile=FILENAME | Do debug logging into a file | Default is off (empty string) | ## Configuration Files @@ -65,7 +65,7 @@ List of command line flags: # The -- is optional. The following flags will work as well: weights=10445.txt.gz syzygy-paths=syzygy -debuglog=lc0.log +logfile=lc0.log ``` You can tell `lc0` to ignore the default configuration file by passing `--config=` on the command line. Command line arguments will override any arguments that also exist in the configuration file. From ca8b5cc73ee6857b627b97b626167ec78f0dfd72 Mon Sep 17 00:00:00 2001 From: Kip Hamiltons <48076495+KipHamiltons@users.noreply.github.com> Date: Sat, 9 May 2020 06:43:28 +1000 Subject: [PATCH 136/151] Fixed a typo in CONTRIBUTING.md (#1274) --- CONTRIBUTING.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 87786f73e9..90c7d4aa63 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -56,7 +56,7 @@ The internal code dependency structure looks like this: ### Git history -Pull Requests are squahsed when merged. This means all commits in the branch will be squashed into one commit applied onto master, so branches and their PRs should stick to *one* topic only. If you think changes deserve separate commits, make separate PRs for each commit. +Pull Requests are squashed when merged. This means all commits in the branch will be squashed into one commit applied onto master, so branches and their PRs should stick to *one* topic only. If you think changes deserve separate commits, make separate PRs for each commit. This also means it's not possible to reuse one branch for multiple PRs; new PRs must either use entirely new branches, or else you could use `git reset --hard` on the current branch. From a53a44eebd9278e53552305e7c8ae3b77dc98640 Mon Sep 17 00:00:00 2001 From: nguyenpham Date: Sat, 9 May 2020 22:49:22 +1000 Subject: [PATCH 137/151] Update Readme about using git (#1265) --- README.md | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 3b09e5c950..040ef01689 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ Lc0 is a UCI-compliant chess engine designed to play chess via neural network, s Lc0 can be acquired either via a git clone or an archive download from GitHub. Be aware that there is a required submodule which isn't included in source archives. -For essentially all purposes, including selfplay game generation and match play, we highly recommend using the latest `release/version` branch (for example `release/0.24`), which is equivalent to using the latest version tag. +For essentially all purposes, including selfplay game generation and match play, we highly recommend using the latest `release/version` branch (for example `release/0.25`), which is equivalent to using the latest version tag. Versioning follows the Semantic Versioning guidelines, with major, minor and patch sections. The training server enforces game quality using the versions output by the client and engine. @@ -17,9 +17,17 @@ Versioning follows the Semantic Versioning guidelines, with major, minor and pat Download using git: ``` -git clone -b release/0.24 --recurse-submodules https://github.com/LeelaChessZero/lc0.git +git clone -b release/0.25 --recurse-submodules https://github.com/LeelaChessZero/lc0.git ``` +If you have cloned already an old version, fetch, view and checkout a new branch: +``` +git fetch --all +git branch --all +git checkout -t remotes/origin/release/0.25 +``` + + If you prefer to download an archive, you need to also download and place the submodule: * Download the [.zip](https://api.github.com/repos/LeelaChessZero/lc0/zipball/release/0.24) file ([.tar.gz](https://api.github.com/repos/LeelaChessZero/lc0/tarball/release/0.24) archive is also available) * Extract From c40ea230babde319d4f331175584a66e6152110d Mon Sep 17 00:00:00 2001 From: Alexander Lyashuk Date: Sat, 9 May 2020 19:23:16 +0200 Subject: [PATCH 138/151] Make `wl_` double. (#1280) --- src/mcts/node.cc | 32 +++++++++++++------------- src/mcts/node.h | 58 +++++++++++++++++++----------------------------- 2 files changed, 40 insertions(+), 50 deletions(-) diff --git a/src/mcts/node.cc b/src/mcts/node.cc index ea97edd368..6159e5daba 100644 --- a/src/mcts/node.cc +++ b/src/mcts/node.cc @@ -167,14 +167,11 @@ std::string Edge::DebugString() const { return oss.str(); } -///////////////////////////////////////////////////////////////////////// -// EdgeList -///////////////////////////////////////////////////////////////////////// - -EdgeList::EdgeList(MoveList moves) - : edges_(std::make_unique(moves.size())), size_(moves.size()) { - auto* edge = edges_.get(); - for (const auto move : moves) edge++->SetMove(move); +std::unique_ptr Edge::FromMovelist(const MoveList& moves) { + std::unique_ptr edges = std::make_unique(moves.size()); + auto* edge = edges.get(); + for (const auto move : moves) edge++->move_ = move; + return edges; } ///////////////////////////////////////////////////////////////////////// @@ -184,7 +181,8 @@ EdgeList::EdgeList(MoveList moves) Node* Node::CreateSingleChildNode(Move move) { assert(!edges_); assert(!child_); - edges_ = EdgeList({move}); + edges_ = Edge::FromMovelist({move}); + num_edges_ = 1; child_ = std::make_unique(this, 0); return child_.get(); } @@ -192,17 +190,18 @@ Node* Node::CreateSingleChildNode(Move move) { void Node::CreateEdges(const MoveList& moves) { assert(!edges_); assert(!child_); - edges_ = EdgeList(moves); + edges_ = Edge::FromMovelist(moves); + num_edges_ = moves.size(); } -Node::ConstIterator Node::Edges() const { return {edges_, &child_}; } -Node::Iterator Node::Edges() { return {edges_, &child_}; } +Node::ConstIterator Node::Edges() const { return {*this, &child_}; } +Node::Iterator Node::Edges() { return {*this, &child_}; } float Node::GetVisitedPolicy() const { return visited_policy_; } Edge* Node::GetEdgeToNode(const Node* node) const { assert(node->parent_ == this); - assert(node->index_ < edges_.size()); + assert(node->index_ < num_edges_); return &edges_[node->index_]; } @@ -214,7 +213,7 @@ std::string Node::DebugString() const { << " Parent:" << parent_ << " Index:" << index_ << " Child:" << child_.get() << " Sibling:" << sibling_.get() << " WL:" << wl_ << " N:" << n_ << " N_:" << n_in_flight_ - << " Edges:" << edges_.size() + << " Edges:" << num_edges_ << " Bounds:" << static_cast(lower_bound_) - 2 << "," << static_cast(upper_bound_) - 2; return oss.str(); @@ -326,7 +325,10 @@ void Node::ReleaseChildrenExceptOne(Node* node_to_save) { // Make saved node the only child. (kills previous siblings). gNodeGc.AddToGcQueue(std::move(child_)); child_ = std::move(saved_node); - if (!child_) edges_ = EdgeList(); // Clear edges list. + if (!child_) { + num_edges_ = 0; + edges_.reset(); // Clear edges list. + } } V5TrainingData Node::GetV5TrainingData( diff --git a/src/mcts/node.h b/src/mcts/node.h index 217a5ceec2..8458925597 100644 --- a/src/mcts/node.h +++ b/src/mcts/node.h @@ -80,6 +80,9 @@ namespace lczero { class Node; class Edge { public: + // Creates array of edges from the list of moves. + static std::unique_ptr FromMovelist(const MoveList& moves); + // Returns move from the point of view of the player making it (if as_opponent // is false) or as opponent (if as_opponent is true). Move GetMove(bool as_opponent = false) const; @@ -93,8 +96,6 @@ class Edge { std::string DebugString() const; private: - void SetMove(Move move) { move_ = move; } - // Move corresponding to this node. From the point of view of a player, // i.e. black's e7e5 is stored as e2e4. // Root node contains move a1a1. @@ -103,23 +104,6 @@ class Edge { // Probability that this move will be made, from the policy head of the neural // network; compressed to a 16 bit format (5 bits exp, 11 bits significand). uint16_t p_ = 0; - - friend class EdgeList; -}; - -// Array of Edges. -class EdgeList { - public: - EdgeList() {} - EdgeList(MoveList moves); - Edge* get() const { return edges_.get(); } - Edge& operator[](size_t idx) const { return edges_[idx]; } - operator bool() const { return static_cast(edges_); } - uint16_t size() const { return size_; } - - private: - std::unique_ptr edges_; - uint16_t size_ = 0; }; class EdgeAndNode; @@ -152,7 +136,7 @@ class Node { Node* GetParent() const { return parent_; } // Returns whether a node has children. - bool HasChildren() const { return edges_; } + bool HasChildren() const { return static_cast(edges_); } // Returns sum of policy priors which have had at least one playout. float GetVisitedPolicy() const; @@ -173,7 +157,7 @@ class Node { bool IsTbTerminal() const { return terminal_type_ == Terminal::Tablebase; } typedef std::pair Bounds; Bounds GetBounds() const { return {lower_bound_, upper_bound_}; } - uint16_t GetNumEdges() const { return edges_.size(); } + uint8_t GetNumEdges() const { return num_edges_; } // Makes the node terminal and sets it's score. void MakeTerminal(GameResult result, float plies_left = 0.0f, @@ -266,11 +250,17 @@ class Node { // padding when new fields are added, we arrange the fields by size, largest // to smallest. - // TODO: shrink the padding on this somehow? It takes 16 bytes even though - // only 10 are real! Maybe even merge it into this class?? - EdgeList edges_; - // 8 byte fields. + // Average value (from value head of neural network) of all visited nodes in + // subtree. For terminal nodes, eval is stored. This is from the perspective + // of the player who "just" moved to reach this position, rather than from the + // perspective of the player-to-move for the position. + // WL stands for "W minus L". Is equal to Q if draw score is 0. + double wl_ = 0.0f; + + // 8 byte fields on 64-bit platforms, 4 byte on 32-bit. + // Array of edges. + std::unique_ptr edges_; // Pointer to a parent node. nullptr for the root. Node* parent_ = nullptr; // Pointer to a first child. nullptr for a leaf node. @@ -282,12 +272,6 @@ class Node { Node* best_child_cached_ = nullptr; // 4 byte fields. - // Average value (from value head of neural network) of all visited nodes in - // subtree. For terminal nodes, eval is stored. This is from the perspective - // of the player who "just" moved to reach this position, rather than from the - // perspective of the player-to-move for the position. - // WL stands for "W minus L". Is equal to Q if draw score is 0. - float wl_ = 0.0f; // Averaged draw probability. Works similarly to WL, except that D is not // flipped depending on the side to move. float d_ = 0.0f; @@ -309,6 +293,10 @@ class Node { // Index of this node is parent's edge list. uint16_t index_; + // 1 byte fields. + // Number of edges in @edges_. + uint8_t num_edges_ = 0; + // Bit fields using parts of uint8_t fields initialized in the constructor. // Whether or not this node end game (with a winning of either sides or draw). Terminal terminal_type_ : 2; @@ -446,10 +434,10 @@ class Edge_Iterator : public EdgeAndNode { Edge_Iterator() {} // Creates "begin()" iterator. Also happens to be a range constructor. - Edge_Iterator(const EdgeList& edges, Ptr node_ptr) - : EdgeAndNode(edges.size() ? edges.get() : nullptr, nullptr), - node_ptr_(node_ptr), - total_count_(edges.size()) { + Edge_Iterator(const Node& parent_node, Ptr child_ptr) + : EdgeAndNode(parent_node.edges_.get(), nullptr), + node_ptr_(child_ptr), + total_count_(parent_node.num_edges_) { if (edge_) Actualize(); } From 0622049bbcb2f348a75083c957041eada36b5cbd Mon Sep 17 00:00:00 2001 From: Alexander Lyashuk Date: Sat, 9 May 2020 19:33:04 +0200 Subject: [PATCH 139/151] Move move filter population to a constructor. (#1281) --- src/chess/bitboard.h | 2 +- src/mcts/search.cc | 94 ++++++++++++++++++++++---------------------- src/mcts/search.h | 12 ++---- 3 files changed, 51 insertions(+), 57 deletions(-) diff --git a/src/chess/bitboard.h b/src/chess/bitboard.h index 63e77559b1..d66ea49a89 100644 --- a/src/chess/bitboard.h +++ b/src/chess/bitboard.h @@ -259,7 +259,7 @@ class Move { uint16_t as_nn_index(int transform) const; explicit operator bool() const { return data_ != 0; } - bool operator==(const Move& other) { return data_ == other.data_; } + bool operator==(const Move& other) const { return data_ == other.data_; } void Mirror() { data_ ^= 0b111000111000; } diff --git a/src/mcts/search.cc b/src/mcts/search.cc index 08553dfbf7..1741c04569 100644 --- a/src/mcts/search.cc +++ b/src/mcts/search.cc @@ -47,6 +47,28 @@ namespace lczero { namespace { // Maximum delay between outputting "uci info" when nothing interesting happens. const int kUciInfoMinimumFrequencyMs = 5000; + +MoveList MakeRootMoveFilter(const MoveList& searchmoves, + SyzygyTablebase* syzygy_tb, + const PositionHistory& history, bool fast_play, + std::atomic* tb_hits) { + // Search moves overrides tablebase. + if (!searchmoves.empty()) return searchmoves; + const auto& board = history.Last().GetBoard(); + MoveList root_moves; + if (!syzygy_tb || !board.castlings().no_legal_castle() || + (board.ours() | board.theirs()).count() > syzygy_tb->max_cardinality()) { + return root_moves; + } + if (syzygy_tb->root_probe( + history.Last(), fast_play || history.DidRepeatSinceLastZeroingMove(), + &root_moves) || + syzygy_tb->root_probe_wdl(history.Last(), &root_moves)) { + tb_hits->fetch_add(1, std::memory_order_acq_rel); + } + return root_moves; +} + } // namespace Search::Search(const NodeTree& tree, Network* network, @@ -63,11 +85,14 @@ Search::Search(const NodeTree& tree, Network* network, syzygy_tb_(syzygy_tb), played_history_(tree.GetPositionHistory()), network_(network), + params_(options), searchmoves_(searchmoves), start_time_(start_time), initial_visits_(root_node_->GetN()), - uci_responder_(std::move(uci_responder)), - params_(options) { + root_move_filter_( + MakeRootMoveFilter(searchmoves_, syzygy_tb_, played_history_, + params_.GetSyzygyFastPlay(), &tb_hits_)), + uci_responder_(std::move(uci_responder)) { if (params_.GetMaxConcurrentSearchers() != 0) { pending_searchers_.store(params_.GetMaxConcurrentSearchers(), std::memory_order_release); @@ -476,25 +501,6 @@ std::int64_t Search::GetTotalPlayouts() const { return total_playouts_; } -bool Search::PopulateRootMoveLimit(MoveList* root_moves) const { - // Search moves overrides tablebase. - if (!searchmoves_.empty()) { - *root_moves = searchmoves_; - return false; - } - auto board = played_history_.Last().GetBoard(); - if (!syzygy_tb_ || !board.castlings().no_legal_castle() || - (board.ours() | board.theirs()).count() > syzygy_tb_->max_cardinality()) { - return false; - } - return syzygy_tb_->root_probe( - played_history_.Last(), - params_.GetSyzygyFastPlay() || - played_history_.DidRepeatSinceLastZeroingMove(), - root_moves) || - syzygy_tb_->root_probe_wdl(played_history_.Last(), root_moves); -} - void Search::ResetBestMove() { SharedMutex::Lock nodes_lock(nodes_mutex_); Mutex::Lock lock(counters_mutex_); @@ -522,8 +528,9 @@ void Search::EnsureBestMoveKnown() REQUIRES(nodes_mutex_) if (moves >= decay_delay_moves + decay_moves) { temperature = 0.0; } else if (moves >= decay_delay_moves) { - temperature *= static_cast - (decay_delay_moves + decay_moves - moves) / decay_moves; + temperature *= + static_cast(decay_delay_moves + decay_moves - moves) / + decay_moves; } // don't allow temperature to decay below endgame temperature if (temperature < params_.GetTemperatureEndgame()) { @@ -543,10 +550,6 @@ void Search::EnsureBestMoveKnown() REQUIRES(nodes_mutex_) std::vector Search::GetBestChildrenNoTemperature(Node* parent, int count, int depth) const { - MoveList root_limit; - if (parent == root_node_) { - PopulateRootMoveLimit(&root_limit); - } const bool is_odd_depth = (depth % 2) == 1; const float draw_score = GetDrawScore(is_odd_depth); // Best child is selected using the following criteria: @@ -557,9 +560,9 @@ std::vector Search::GetBestChildrenNoTemperature(Node* parent, // * If that number is larger than 0, the one with larger eval wins. std::vector edges; for (auto edge : parent->Edges()) { - if (parent == root_node_ && !root_limit.empty() && - std::find(root_limit.begin(), root_limit.end(), edge.GetMove()) == - root_limit.end()) { + if (parent == root_node_ && !root_move_filter_.empty() && + std::find(root_move_filter_.begin(), root_move_filter_.end(), + edge.GetMove()) == root_move_filter_.end()) { continue; } edges.push_back(edge); @@ -648,8 +651,6 @@ EdgeAndNode Search::GetBestChildNoTemperature(Node* parent, int depth) const { EdgeAndNode Search::GetBestRootChildWithTemperature(float temperature) const { // Root is at even depth. const float draw_score = GetDrawScore(/* is_odd_depth= */ false); - MoveList root_limit; - PopulateRootMoveLimit(&root_limit); std::vector cumulative_sums; float sum = 0.0; @@ -660,8 +661,9 @@ EdgeAndNode Search::GetBestRootChildWithTemperature(float temperature) const { GetFpu(params_, root_node_, /* is_root= */ true, draw_score); for (auto edge : root_node_->Edges()) { - if (!root_limit.empty() && std::find(root_limit.begin(), root_limit.end(), - edge.GetMove()) == root_limit.end()) { + if (!root_move_filter_.empty() && + std::find(root_move_filter_.begin(), root_move_filter_.end(), + edge.GetMove()) == root_move_filter_.end()) { continue; } if (edge.GetN() + offset > max_n) { @@ -677,8 +679,9 @@ EdgeAndNode Search::GetBestRootChildWithTemperature(float temperature) const { const float min_eval = max_eval - params_.GetTemperatureWinpctCutoff() / 50.0f; for (auto edge : root_node_->Edges()) { - if (!root_limit.empty() && std::find(root_limit.begin(), root_limit.end(), - edge.GetMove()) == root_limit.end()) { + if (!root_move_filter_.empty() && + std::find(root_move_filter_.begin(), root_move_filter_.end(), + edge.GetMove()) == root_move_filter_.end()) { continue; } if (edge.GetQ(fpu, draw_score, /* logit_q= */ false) < min_eval) continue; @@ -695,8 +698,9 @@ EdgeAndNode Search::GetBestRootChildWithTemperature(float temperature) const { cumulative_sums.begin(); for (auto edge : root_node_->Edges()) { - if (!root_limit.empty() && std::find(root_limit.begin(), root_limit.end(), - edge.GetMove()) == root_limit.end()) { + if (!root_move_filter_.empty() && + std::find(root_move_filter_.begin(), root_move_filter_.end(), + edge.GetMove()) == root_move_filter_.end()) { continue; } if (edge.GetQ(fpu, draw_score, /* logit_q= */ false) < min_eval) continue; @@ -902,13 +906,6 @@ void SearchWorker::InitializeIteration( computation_ = std::make_unique(std::move(computation), search_->cache_); minibatch_.clear(); - - if (!root_move_filter_populated_) { - root_move_filter_populated_ = true; - if (search_->PopulateRootMoveLimit(&root_move_filter_)) { - search_->tb_hits_.fetch_add(1, std::memory_order_acq_rel); - } - } } // 2. Gather minibatch. @@ -1021,6 +1018,7 @@ SearchWorker::NodeToProcess SearchWorker::PickNodeToExtend( bool is_root_node = true; const float even_draw_score = search_->GetDrawScore(false); const float odd_draw_score = search_->GetDrawScore(true); + const auto& root_move_filter = search_->root_move_filter_; uint16_t depth = 0; bool node_already_updated = true; @@ -1096,9 +1094,9 @@ SearchWorker::NodeToProcess SearchWorker::PickNodeToExtend( continue; } // If root move filter exists, make sure move is in the list. - if (!root_move_filter_.empty() && - std::find(root_move_filter_.begin(), root_move_filter_.end(), - child.GetMove()) == root_move_filter_.end()) { + if (!root_move_filter.empty() && + std::find(root_move_filter.begin(), root_move_filter.end(), + child.GetMove()) == root_move_filter.end()) { continue; } } diff --git a/src/mcts/search.h b/src/mcts/search.h index ff5b703c8e..ccb40282d0 100644 --- a/src/mcts/search.h +++ b/src/mcts/search.h @@ -121,10 +121,6 @@ class Search { // uci `stop` command; void WatchdogThread(); - // Populates the given list with allowed root moves. - // Returns true if the population came from tablebase. - bool PopulateRootMoveLimit(MoveList* root_moves) const; - // Fills IterationStats with global (rather than per-thread) portion of search // statistics. Currently all stats there (in IterationStats) are global // though. @@ -173,9 +169,13 @@ class Search { const PositionHistory& played_history_; Network* const network_; + const SearchParams params_; const MoveList searchmoves_; const std::chrono::steady_clock::time_point start_time_; const int64_t initial_visits_; + // tb_hits_ must be initialized before root_move_filter_. + std::atomic tb_hits_{0}; + const MoveList root_move_filter_; mutable SharedMutex nodes_mutex_; EdgeAndNode current_best_edge_ GUARDED_BY(nodes_mutex_); @@ -188,7 +188,6 @@ class Search { // Cumulative depth of all paths taken in PickNodetoExtend. uint64_t cum_depth_ GUARDED_BY(nodes_mutex_) = 0; std::optional nps_start_time_; - std::atomic tb_hits_{0}; std::atomic pending_searchers_{0}; @@ -196,7 +195,6 @@ class Search { GUARDED_BY(nodes_mutex_); std::unique_ptr uci_responder_; - const SearchParams params_; friend class SearchWorker; }; @@ -320,8 +318,6 @@ class SearchWorker { std::unique_ptr computation_; // History is reset and extended by PickNodeToExtend(). PositionHistory history_; - MoveList root_move_filter_; - bool root_move_filter_populated_ = false; int number_out_of_order_ = 0; const SearchParams& params_; std::unique_ptr precached_node_; From 9858ed0d0915d44a684eff1b04ee8b2f1ff5b896 Mon Sep 17 00:00:00 2001 From: Ed Lee Date: Sat, 9 May 2020 18:57:08 -0700 Subject: [PATCH 140/151] Filter out illegal searchmoves to avoid crashing. (#1282) --- src/engine.cc | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/engine.cc b/src/engine.cc index 3ad1320e4e..3d2da72c01 100644 --- a/src/engine.cc +++ b/src/engine.cc @@ -66,9 +66,15 @@ const OptionId kStrictUciTiming{"strict-uci-timing", "StrictTiming", MoveList StringsToMovelist(const std::vector& moves, const ChessBoard& board) { MoveList result; - result.reserve(moves.size()); - for (const auto& move : moves) { - result.emplace_back(board.GetModernMove({move, board.flipped()})); + if (moves.size()) { + result.reserve(moves.size()); + const auto legal_moves = board.GenerateLegalMoves(); + const auto end = legal_moves.end(); + for (const auto& move : moves) { + const auto m = board.GetModernMove({move, board.flipped()}); + if (std::find(legal_moves.begin(), end, m) != end) result.emplace_back(m); + } + if (result.empty()) throw Exception("No legal searchmoves."); } return result; } From ad28dce758ff5a3ee665ad4759f59873676ba2b0 Mon Sep 17 00:00:00 2001 From: Tilps Date: Sun, 10 May 2020 21:58:25 +1000 Subject: [PATCH 141/151] Clear policy for terminal loss. (#1285) --- src/mcts/node.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/mcts/node.cc b/src/mcts/node.cc index 6159e5daba..e4328909c9 100644 --- a/src/mcts/node.cc +++ b/src/mcts/node.cc @@ -232,6 +232,9 @@ void Node::MakeTerminal(GameResult result, float plies_left, Terminal type) { } else if (result == GameResult::BLACK_WON) { wl_ = -1.0f; d_ = 0.0f; + // Terminal losses have no uncertainty and no reason for their U value to be + // comparable to another non-loss choice. Force this by clearing the policy. + if (GetParent() != nullptr) GetOwnEdge()->SetP(0.0f); } } From 109bcce328b8aeb354d9835b2663d7235e2dfacb Mon Sep 17 00:00:00 2001 From: Tilps Date: Sun, 10 May 2020 23:51:07 +1000 Subject: [PATCH 142/151] Allow smart pruning to terminate search if win is known. (#1284) * Allow smart pruning to terminate search if win is known. * Minor tweak, better safe than sorry. --- src/mcts/search.cc | 2 ++ src/mcts/stoppers/stoppers.cc | 4 ++++ src/mcts/stoppers/timemgr.h | 1 + 3 files changed, 7 insertions(+) diff --git a/src/mcts/search.cc b/src/mcts/search.cc index 1741c04569..0f39d64cf2 100644 --- a/src/mcts/search.cc +++ b/src/mcts/search.cc @@ -752,8 +752,10 @@ void Search::PopulateCommonIterationStats(IterationStats* stats) { stats->batches_since_movestart = total_batches_; stats->average_depth = cum_depth_ / (total_playouts_ ? total_playouts_ : 1); stats->edge_n.clear(); + stats->win_found = false; for (const auto& edge : root_node_->Edges()) { stats->edge_n.push_back(edge.GetN()); + if (edge.IsTerminal() && edge.GetWL(0.0f) > 0.0f) stats->win_found = true; } } diff --git a/src/mcts/stoppers/stoppers.cc b/src/mcts/stoppers/stoppers.cc index 36ae8a9b72..0a65a3a143 100644 --- a/src/mcts/stoppers/stoppers.cc +++ b/src/mcts/stoppers/stoppers.cc @@ -190,6 +190,10 @@ bool SmartPruningStopper::ShouldStop(const IterationStats& stats, LOGFILE << "Only one possible move. Moving immediately."; return true; } + if (stats.win_found) { + LOGFILE << "Terminal win found, stopping search."; + return true; + } if (stats.nodes_since_movestart > 0 && !first_eval_time_) { first_eval_time_ = stats.time_since_movestart; return false; diff --git a/src/mcts/stoppers/timemgr.h b/src/mcts/stoppers/timemgr.h index 51d15351d0..67cc21df18 100644 --- a/src/mcts/stoppers/timemgr.h +++ b/src/mcts/stoppers/timemgr.h @@ -48,6 +48,7 @@ struct IterationStats { int64_t batches_since_movestart = 0; int average_depth = 0; std::vector edge_n; + bool win_found = false; }; // Hints from stoppers back to the search engine. Currently include: From 1cdc1abaf45cc755deefbf49f0184bc2b71e21be Mon Sep 17 00:00:00 2001 From: Tilps Date: Sun, 10 May 2020 23:52:01 +1000 Subject: [PATCH 143/151] Fix bug where pv might not update for best move change. (#1286) * Fix bug where pv might not update. * Fix... --- src/mcts/search.cc | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/mcts/search.cc b/src/mcts/search.cc index 0f39d64cf2..96c234a248 100644 --- a/src/mcts/search.cc +++ b/src/mcts/search.cc @@ -1526,6 +1526,8 @@ void SearchWorker::DoBackupUpdateSingleNode( // Nothing left to do without ancestors to update. if (!p) break; + bool old_update_parent_bounds = update_parent_bounds; + // Try setting parent bounds except the root or those already terminal. update_parent_bounds = update_parent_bounds && p != search_->root_node_ && !p->IsTerminal() && MaybeSetBounds(p, m); @@ -1537,8 +1539,16 @@ void SearchWorker::DoBackupUpdateSingleNode( // Update the stats. // Best move. + // If update_parent_bounds was set, we just adjusted bounds on the + // previous loop or there was no previous loop, so if n is a terminal, it + // just became that way and could be a candidate for changing the current + // best edge. Otherwise a visit can only change best edge if its to an edge + // that isn't already the best and the new n is equal or greater to the old + // n. if (p == search_->root_node_ && - search_->current_best_edge_.GetN() <= n->GetN()) { + (old_update_parent_bounds && n->IsTerminal() || + n != search_->current_best_edge_.node() && + search_->current_best_edge_.GetN() <= n->GetN())) { search_->current_best_edge_ = search_->GetBestChildNoTemperature(search_->root_node_, 0); } From 177d4ace609ce0f60ee655f01bbca029a403a90a Mon Sep 17 00:00:00 2001 From: Ankan Banerjee Date: Mon, 11 May 2020 11:57:13 +0530 Subject: [PATCH 144/151] fix weight memory check to be more conservative (#1260) --- src/neural/cuda/layers.cc | 26 +++++++-------- src/neural/cuda/network_cudnn.cc | 55 +++++++++++++++++--------------- 2 files changed, 43 insertions(+), 38 deletions(-) diff --git a/src/neural/cuda/layers.cc b/src/neural/cuda/layers.cc index dbe27c9122..b6060346af 100644 --- a/src/neural/cuda/layers.cc +++ b/src/neural/cuda/layers.cc @@ -206,7 +206,7 @@ void ConvLayer::LoadWeights(float* pfilter, float* pBias, ReportCUDAErrors( cudaMemcpy(biases, pBias, blas_size, cudaMemcpyHostToDevice)); } else { - ReportCUDAErrors(cudaMemset(biases, blas_size, 0)); + ReportCUDAErrors(cudaMemset(biases, 0, blas_size)); } } @@ -375,24 +375,24 @@ void SELayer::LoadWeights(float* w1, float* b1, float* w2, float* b2, // Weight for the first FC layer. ReportCUDAErrors( cudaMemcpy(scratch, w1, weight_size1, cudaMemcpyHostToDevice)); - copyTypeConverted((half*)w1_, (float*)scratch, num_weights1); + copyTypeConverted((half*)w1_, (float*)scratch, (int)num_weights1); if (kUseFusedSELayer && nhwc_) { // transposed copy for fused SE kernel cpuTranspose(temp.data(), w1, numFc1Out_, C); ReportCUDAErrors( cudaMemcpy(scratch, temp.data(), weight_size1, cudaMemcpyHostToDevice)); - copyTypeConverted((half*)w1_t_, (float*)scratch, num_weights1); + copyTypeConverted((half*)w1_t_, (float*)scratch, (int)num_weights1); } // Weight for the second FC layer. ReportCUDAErrors( cudaMemcpy(scratch, w2, weight_size2, cudaMemcpyHostToDevice)); - copyTypeConverted((half*)w2_, (float*)scratch, num_weights2); + copyTypeConverted((half*)w2_, (float*)scratch, (int)num_weights2); if (kUseFusedSELayer && nhwc_) { cpuTranspose(temp.data(), w2, 2 * C, numFc1Out_); ReportCUDAErrors( cudaMemcpy(scratch, temp.data(), weight_size2, cudaMemcpyHostToDevice)); - copyTypeConverted((half*)w2_t_, (float*)scratch, num_weights2); + copyTypeConverted((half*)w2_t_, (float*)scratch, (int)num_weights2); } // Bias for the first FC layer. @@ -521,17 +521,17 @@ void FCLayer::LoadWeights(float* cpuWeight, float* cpuBias, cudaMemcpy(scratch, cpuWeight, weight_size, cudaMemcpyHostToDevice)); if (nhwc_) { - fp32NCHWtofp16NHWC((half*)weights_, (float*)scratch, num_biases, - input_->GetC(), num_biases, input_->GetC(), + fp32NCHWtofp16NHWC((half*)weights_, (float*)scratch, (int)num_biases, + input_->GetC(), (int)num_biases, input_->GetC(), input_->GetH(), input_->GetW()); } else { - copyTypeConverted((half*)weights_, (float*)scratch, num_weights); + copyTypeConverted((half*)weights_, (float*)scratch, (int)num_weights); } if (cpuBias) { ReportCUDAErrors( cudaMemcpy(scratch, cpuBias, blas_size, cudaMemcpyHostToDevice)); - copyTypeConverted((half*)biases_, (float*)scratch, num_biases); + copyTypeConverted((half*)biases_, (float*)scratch, (int)num_biases); } } @@ -795,23 +795,23 @@ void FusedWinogradConvSELayer::LoadSEWeights(float* w1, float* b1, CpuTranspose(temp_transposed.data(), w1, se_k_, C); ReportCUDAErrors(cudaMemcpy(scratch, temp_transposed.data(), num_weights1*sizeof(float), cudaMemcpyHostToDevice)); - copyTypeConverted((DataType*)w1_, (float*)scratch, num_weights1); + copyTypeConverted((DataType*)w1_, (float*)scratch, (int)num_weights1); CpuTranspose(temp_transposed.data(), w2, 2 * C, se_k_); ReportCUDAErrors(cudaMemcpy(scratch, temp_transposed.data(), num_weights2 * sizeof(float), cudaMemcpyHostToDevice)); - copyTypeConverted((DataType*)w2_, (float*)scratch, num_weights2); + copyTypeConverted((DataType*)w2_, (float*)scratch, (int)num_weights2); ReportCUDAErrors(cudaMemcpy(scratch, b1, num_biases1 * sizeof(float), cudaMemcpyHostToDevice)); - copyTypeConverted((DataType*)b1_, (float*)scratch, num_biases1); + copyTypeConverted((DataType*)b1_, (float*)scratch, (int)num_biases1); ReportCUDAErrors(cudaMemcpy(scratch, b2, num_biases2 * sizeof(float), cudaMemcpyHostToDevice)); - copyTypeConverted((DataType*)b2_, (float*)scratch, num_biases2); + copyTypeConverted((DataType*)b2_, (float*)scratch, (int)num_biases2); } template <> diff --git a/src/neural/cuda/network_cudnn.cc b/src/neural/cuda/network_cudnn.cc index 8077091406..c577d42199 100644 --- a/src/neural/cuda/network_cudnn.cc +++ b/src/neural/cuda/network_cudnn.cc @@ -276,8 +276,8 @@ class CudnnNetwork : public Network { constexpr bool fp16 = std::is_same::value; const int kNumInputPlanes = kInputPlanes; - const int kNumFilters = weights.input.biases.size(); - numBlocks_ = weights.residual.size(); + const int kNumFilters = (int)weights.input.biases.size(); + numBlocks_ = (int)weights.residual.size(); // Use our custom winograd for residual tower convolutions for most cases: // @@ -306,34 +306,39 @@ class CudnnNetwork : public Network { use_custom_winograd_ = true; } - // Override if set in backend-opts. - if (!options.IsDefault("custom_winograd")) - use_custom_winograd_ = options.Get("custom_winograd"); - // Warn if the memory required for storing transformed weights is - // going to exceed 60% of total video memory, force custom_winograd off - // if it's going to exceed 80% of memory. + // going to exceed 40% of total video memory, force custom_winograd off + // if it's going to exceed 50% of memory. size_t residual_single_layer_weight_size = 3 * 3 * kNumFilters * kNumFilters * sizeof(DataType); size_t residual_weight_size = residual_single_layer_weight_size * numBlocks_ * 2; size_t transformed_residual_weight_size = residual_weight_size * 4; + if (residual_weight_size > 0.6 * deviceProp.totalGlobalMem) { CERR << "Low video memory detected. You may run into OOM errors. Please " "consider using a smaller network."; - // No hope of using custom winograd - even the fallback path might not run. + } + + const bool custom_winograd_override = !options.IsDefault("custom_winograd"); + + if (!custom_winograd_override && use_custom_winograd_ && + transformed_residual_weight_size > 0.5 * deviceProp.totalGlobalMem) { + CERR << "WARNING: Low GPU video memory. Turning off custom_winograd " + "path. You may still run into OOM errors. " + "Please consider using a smaller network."; use_custom_winograd_ = false; - } else if (use_custom_winograd_) { - if (transformed_residual_weight_size > 0.8 * deviceProp.totalGlobalMem) { - CERR << "WARNING: Low GPU video memory detected. Turning off " - "custom_winograd."; - use_custom_winograd_ = false; - } else if (transformed_residual_weight_size > - 0.6 * deviceProp.totalGlobalMem) { - CERR << "WARNING: Low GPU video memory. You may run into OOM errors. " - "Please consider using a smaller network, or run with " - "--backend-opts=custom_winograd=false"; - } + } + + // Override if set in backend-opts. + if (custom_winograd_override) + use_custom_winograd_ = options.Get("custom_winograd"); + + if (use_custom_winograd_ && + transformed_residual_weight_size > 0.4 * deviceProp.totalGlobalMem) { + CERR << "WARNING: Low GPU video memory. You may still run into OOM " + "errors. Try with backend-opts=custom_winograd=false, or " + "using a smaller network."; } // Winograd needs nchw tensor layout. @@ -387,7 +392,7 @@ class CudnnNetwork : public Network { cudnn_, xDesc, wDesc, convDesc, xDesc, conv_algo, &scratch_size_)); // Have some minumum as we also use this for transforming weights. - int max_weight_size = 128 * 1024 * 1024; + size_t max_weight_size = 128 * 1024 * 1024; // parts from scratch allocation are suballocated to hold various weights // and biases when transforming winograd weights (one layer at a time), 128 @@ -434,7 +439,7 @@ class CudnnNetwork : public Network { network_.emplace_back(std::move(conv1)); bool has_se = weights.residual[block].has_se; - int se_k = weights.residual[block].se.b1.size(); + int se_k = (int)weights.residual[block].se.b1.size(); auto conv2 = std::make_unique>( getLastLayer(), kNumFilters, 8, 8, kNumFilters, true, true, true, has_se, se_k, use_gemm_ex); @@ -468,7 +473,7 @@ class CudnnNetwork : public Network { network_.emplace_back(std::move(conv2)); if (weights.residual[block].has_se) { - int numFCOut = weights.residual[block].se.b1.size(); + int numFCOut = (int)weights.residual[block].se.b1.size(); auto se = std::make_unique>(getLastLayer(), numFCOut, false); se->LoadWeights(&weights.residual[block].se.w1[0], @@ -919,7 +924,7 @@ class CudnnNetwork : public Network { "version " << major << "." << minor << "." << pl; } - version = cudnnGetVersion(); + version = (int)cudnnGetVersion(); major = version / 1000; minor = (version - major * 1000) / 100; pl = version - major * 1000 - minor * 100; @@ -948,7 +953,7 @@ class CudnnNetwork : public Network { CERR << "GPU compute capability: " << deviceProp.major << "." << deviceProp.minor; - int version = cudnnGetVersion(); + int version = (int)cudnnGetVersion(); if (version < 7301 && (deviceProp.major > 7 || (deviceProp.major == 7 && deviceProp.minor >= 5))) { CERR << "WARNING: CUDNN version 7.3.1 or newer is better for this GPU."; From 2e51d43f35d8818304f52e4f2b5959c837b40486 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Forst=C3=A9n?= Date: Mon, 11 May 2020 15:31:36 +0300 Subject: [PATCH 145/151] Allow --show-hidden to be specified after --help (#738) --- src/mcts/stoppers/common.cc | 4 +++- src/utils/optionsparser.cc | 16 ++++++++++++---- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/src/mcts/stoppers/common.cc b/src/mcts/stoppers/common.cc index 74cb02c99a..e7c3934eea 100644 --- a/src/mcts/stoppers/common.cc +++ b/src/mcts/stoppers/common.cc @@ -81,6 +81,8 @@ void PopulateCommonStopperOptions(RunType for_what, OptionsParser* options) { if (for_what == RunType::kUci) { options->Add(kRamLimitMbId, 0, 100000000) = 0; + options->HideOption(kMinimumKLDGainPerNodeId); + options->HideOption(kKLDGainAverageIntervalId); options->HideOption(kNodesAsPlayoutsId); } } @@ -173,4 +175,4 @@ std::unique_ptr MakeCommonTimeManager( move_overhead); } -} // namespace lczero \ No newline at end of file +} // namespace lczero diff --git a/src/utils/optionsparser.cc b/src/utils/optionsparser.cc index bdaa9a60a4..8748f6e30f 100644 --- a/src/utils/optionsparser.cc +++ b/src/utils/optionsparser.cc @@ -119,12 +119,9 @@ bool OptionsParser::ProcessAllFlags() { } bool OptionsParser::ProcessFlags(const std::vector& args) { + auto show_help = false; for (auto iter = args.begin(), end = args.end(); iter != end; ++iter) { std::string param = *iter; - if (param == "-h" || param == "--help") { - ShowHelp(); - return false; - } if (param == "--help-md") { ShowHelpMd(); return false; @@ -133,6 +130,11 @@ bool OptionsParser::ProcessFlags(const std::vector& args) { ShowHidden(); continue; } + if (param == "-h" || param == "--help") { + // Set a flag so that --show-hidden after --help works. + show_help = true; + continue; + } if (param.substr(0, 2) == "--") { std::string context; @@ -190,6 +192,10 @@ bool OptionsParser::ProcessFlags(const std::vector& args) { CERR << "For help run:\n " << CommandLine::BinaryName() << " --help"; return false; } + if (show_help) { + ShowHelp(); + return false; + } return true; } @@ -260,6 +266,8 @@ void OptionsParser::ShowHelp() const { std::cout << "\nAllowed command line flags for current mode:\n"; std::cout << FormatFlag('h', "help", "Show help and exit."); + std::cout << FormatFlag('\0', "show-hidden", + "Show hidden options. Use with --help."); for (const auto& option : options_) { if (!option->hidden_) std::cout << option->GetHelp(defaults_); } From 2400dabfd0cc7f3c057254ff12f29d25dc7c1668 Mon Sep 17 00:00:00 2001 From: borg323 <39573933+borg323@users.noreply.github.com> Date: Mon, 11 May 2020 21:28:21 +0300 Subject: [PATCH 146/151] fix ispc download link (#1290) --- appveyor.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index 31867a70fc..1e685351eb 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -31,7 +31,7 @@ install: - cmd: IF %NAME%==cpu-openblas IF NOT EXIST C:\cache\OpenBLAS appveyor DownloadFile https://sjeng.org/ftp/OpenBLAS-0.3.3-win-oldthread.zip - cmd: IF %NAME%==cpu-openblas IF NOT EXIST C:\cache\OpenBLAS 7z x OpenBLAS-0.3.3-win-oldthread.zip -oC:\cache\OpenBLAS - cmd: IF %OPENCL%==true nuget install opencl-nug -Version 0.777.77 -OutputDirectory C:\cache -- cmd: IF %BLAS%==true IF NOT EXIST C:\cache\ispc-v1.9.2-windows appveyor DownloadFile https://sourceforge.net/projects/ispcmirror/files/v1.9.2/ispc-v1.9.2-windows.zip +- cmd: IF %BLAS%==true IF NOT EXIST C:\cache\ispc-v1.9.2-windows appveyor DownloadFile https://github.com/ispc/ispc/releases/download/v1.9.2/ispc-v1.9.2-windows.zip - cmd: IF %BLAS%==true IF NOT EXIST C:\cache\ispc-v1.9.2-windows 7z x ispc-v1.9.2-windows.zip -oC:\cache - cmd: IF %BLAS%==true set PATH=C:\cache\ispc-v1.9.2-windows;%PATH% - cmd: set "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.0" @@ -64,7 +64,7 @@ install: - cmd: IF %GTEST%==true IF NOT EXIST KQvKQ.rtbz curl --remote-name-all https://tablebase.lichess.ovh/tables/standard/3-4-5/K{P,N,R,B,Q}vK{P,N,R,B,Q}.rtb{w,z} - cmd: cd C:\projects\lc0 cache: - - C:\cache -> appveyor.yml + - C:\cache - 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.0' - C:\projects\lc0\subprojects\packagecache - C:\ndk\android-ndk-r19c\toolchains\llvm\prebuilt\windows-x86_64 From f33c0a4eacb20df01aea2f276ee252c5a3449ab4 Mon Sep 17 00:00:00 2001 From: Ed Lee Date: Mon, 11 May 2020 13:09:33 -0700 Subject: [PATCH 147/151] Use explicit int for printing verbose and debug num edges. (#1292) --- src/mcts/node.cc | 2 +- src/mcts/search.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mcts/node.cc b/src/mcts/node.cc index e4328909c9..8c17e75643 100644 --- a/src/mcts/node.cc +++ b/src/mcts/node.cc @@ -213,7 +213,7 @@ std::string Node::DebugString() const { << " Parent:" << parent_ << " Index:" << index_ << " Child:" << child_.get() << " Sibling:" << sibling_.get() << " WL:" << wl_ << " N:" << n_ << " N_:" << n_in_flight_ - << " Edges:" << num_edges_ + << " Edges:" << static_cast(num_edges_) << " Bounds:" << static_cast(lower_bound_) - 2 << "," << static_cast(upper_bound_) - 2; return oss.str(); diff --git a/src/mcts/search.cc b/src/mcts/search.cc index 96c234a248..b03838960f 100644 --- a/src/mcts/search.cc +++ b/src/mcts/search.cc @@ -312,7 +312,7 @@ std::vector Search::GetVerboseStats(Node* node) const { auto print = [](auto* oss, auto pre, auto v, auto post, auto w, int p = 0) { *oss << pre << std::setw(w) << std::setprecision(p) << v << post; }; - auto print_head = [&](auto* oss, auto label, auto i, auto n, auto f, auto p) { + auto print_head = [&](auto* oss, auto label, int i, auto n, auto f, auto p) { *oss << std::fixed; print(oss, "", label, " ", 5); print(oss, "(", i, ") ", 4); From 0b394a24c9196136f7e40e555e6f115a26ca7565 Mon Sep 17 00:00:00 2001 From: Anson Hu <40702929+farmersrice@users.noreply.github.com> Date: Wed, 13 May 2020 07:55:57 -0700 Subject: [PATCH 148/151] Small policy calculation optimization (#1278) --- src/mcts/search.cc | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/mcts/search.cc b/src/mcts/search.cc index b03838960f..f73bc78263 100644 --- a/src/mcts/search.cc +++ b/src/mcts/search.cc @@ -35,6 +35,7 @@ #include #include #include +#include #include "mcts/node.h" #include "neural/cache.h" @@ -1448,7 +1449,11 @@ void SearchWorker::FetchSingleNodeResult(NodeToProcess* node_to_process, edge.GetMove().as_nn_index( node_to_process->probability_transform))); } + // Intermediate array to store values when processing policy. + // There are never more than 256 valid legal moves in any legal position. + std::array intermediate; float total = 0.0; + int counter = 0; for (auto edge : node->Edges()) { float p = computation_->GetPVal( idx_in_computation, @@ -1457,16 +1462,14 @@ void SearchWorker::FetchSingleNodeResult(NodeToProcess* node_to_process, // Note that we want to calculate (exp(p-max_p))^(1/T) = exp((p-max_p)/T). p = FastExp((p - max_p) / params_.GetPolicySoftmaxTemp()); - // Note that p now lies in [0, 1], so it is safe to store it in compressed - // format. Normalization happens later. - edge.edge()->SetP(p); - // Edge::SetP does some rounding, so only add to the total after rounding. - total += edge.edge()->GetP(); + intermediate[counter++] = p; + total += p; } + counter = 0; // Normalize P values to add up to 1.0. - if (total > 0.0f) { - const float scale = 1.0f / total; - for (auto edge : node->Edges()) edge.edge()->SetP(edge.GetP() * scale); + const float scale = total > 0.0f ? 1.0f / total : 1.0f; + for (auto edge : node->Edges()) { + edge.edge()->SetP(intermediate[counter++] * scale); } // Add Dirichlet noise if enabled and at root. if (params_.GetNoiseEpsilon() && node == search_->root_node_) { From c313d7cc1189e71f9e5af4c756a0340e12e27731 Mon Sep 17 00:00:00 2001 From: Ed Lee Date: Wed, 13 May 2020 10:25:02 -0700 Subject: [PATCH 149/151] Show P: 100.0% instead of P: 100.00% to avoid shifting verbose output. (#1297) --- src/mcts/search.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mcts/search.cc b/src/mcts/search.cc index f73bc78263..24c79fb32c 100644 --- a/src/mcts/search.cc +++ b/src/mcts/search.cc @@ -320,7 +320,7 @@ std::vector Search::GetVerboseStats(Node* node) const { *oss << std::right; print(oss, "N: ", n, " ", 7); print(oss, "(+", f, ") ", 2); - print(oss, "(P: ", p * 100, "%) ", 5, 2); + print(oss, "(P: ", p * 100, "%) ", 5, p >= 1.0f ? 1 : 2); }; auto print_stats = [&](auto* oss, const auto* n) { const auto sign = n == node ? -1 : 1; From 0d1bb0b18e8b77f38fa516c4b76c3c9b1e38800b Mon Sep 17 00:00:00 2001 From: Alexander Lyashuk Date: Wed, 13 May 2020 20:07:11 +0200 Subject: [PATCH 150/151] Fix compiler warnings. (#1298) --- src/mcts/search.cc | 8 ++++---- src/mcts/stoppers/smooth.cc | 4 ---- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/src/mcts/search.cc b/src/mcts/search.cc index 24c79fb32c..d85d01b4c9 100644 --- a/src/mcts/search.cc +++ b/src/mcts/search.cc @@ -28,6 +28,7 @@ #include "mcts/search.h" #include +#include #include #include #include @@ -35,7 +36,6 @@ #include #include #include -#include #include "mcts/node.h" #include "neural/cache.h" @@ -1549,9 +1549,9 @@ void SearchWorker::DoBackupUpdateSingleNode( // that isn't already the best and the new n is equal or greater to the old // n. if (p == search_->root_node_ && - (old_update_parent_bounds && n->IsTerminal() || - n != search_->current_best_edge_.node() && - search_->current_best_edge_.GetN() <= n->GetN())) { + ((old_update_parent_bounds && n->IsTerminal()) || + (n != search_->current_best_edge_.node() && + search_->current_best_edge_.GetN() <= n->GetN()))) { search_->current_best_edge_ = search_->GetBestChildNoTemperature(search_->root_node_, 0); } diff --git a/src/mcts/stoppers/smooth.cc b/src/mcts/stoppers/smooth.cc index e9a0732447..8405f01e83 100644 --- a/src/mcts/stoppers/smooth.cc +++ b/src/mcts/stoppers/smooth.cc @@ -314,10 +314,6 @@ class SmoothTimeManager : public TimeManager { int64_t last_move_final_nodes_ GUARDED_BY(mutex_) = 0; // Time of the last report, since the beginning of the move. int64_t last_time_ GUARDED_BY(mutex_) = 0; - - // According to the recent calculations, how much time should be spent in - // average per move. - float last_expected_movetime_ms_ GUARDED_BY(mutex_) = 0.0f; }; SmoothStopper::SmoothStopper(int64_t deadline_ms, SmoothTimeManager* manager) From 90157a82c2c28e0b61cc59459b78517f7fd38fb5 Mon Sep 17 00:00:00 2001 From: Ed Lee Date: Thu, 14 May 2020 00:24:15 -0700 Subject: [PATCH 151/151] Switch verbose precision when value would round up to 100. (#1301) --- src/mcts/search.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mcts/search.cc b/src/mcts/search.cc index d85d01b4c9..c766b728fc 100644 --- a/src/mcts/search.cc +++ b/src/mcts/search.cc @@ -320,7 +320,7 @@ std::vector Search::GetVerboseStats(Node* node) const { *oss << std::right; print(oss, "N: ", n, " ", 7); print(oss, "(+", f, ") ", 2); - print(oss, "(P: ", p * 100, "%) ", 5, p >= 1.0f ? 1 : 2); + print(oss, "(P: ", p * 100, "%) ", 5, p >= 0.99995f ? 1 : 2); }; auto print_stats = [&](auto* oss, const auto* n) { const auto sign = n == node ? -1 : 1;