- ふかうら王、最大GPUの数を設定できるように。

- コンパイルオプションでMAX_GPU=32のように指定できるようにした。
yaneurao · May 9, 2024 · 692ae2f · 692ae2f
1 parent e62b37d
commit 692ae2f
Show file tree

Hide file tree

Showing 4 changed files with 26 additions and 67 deletions.
diff --git a/source/engine/dlshogi-engine/UctSearch.cpp b/source/engine/dlshogi-engine/UctSearch.cpp
@@ -1023,7 +1023,7 @@ namespace dlshogi
 		for (int i = 0; i < child_num; i++) {
 			if (uct_child[i].IsWin()) {
 				// 負けが確定しているノードは選択しない
-				if (child_win_count == i && uct_child[i].move_count > max_count) {
+				if (child_win_count == NodeCountType(i) && uct_child[i].move_count > max_count) {
 					// すべて負けの場合は、探索回数が最大の手を選択する
 					select_index = i;
 					max_count = uct_child[i].move_count;

diff --git a/source/engine/dlshogi-engine/YaneuraOu_dlshogi_bridge.cpp b/source/engine/dlshogi-engine/YaneuraOu_dlshogi_bridge.cpp
@@ -98,42 +98,17 @@ void USI::extra_option(USI::OptionsMap& o)
 
 	// RTX 3090で10bなら4、15bなら2で最適。
     o["UCT_Threads1"]                << USI::Option(2, 0, 256);
-    o["UCT_Threads2"]                << USI::Option(0, 0, 256);
-    o["UCT_Threads3"]                << USI::Option(0, 0, 256);
-    o["UCT_Threads4"]                << USI::Option(0, 0, 256);
-    o["UCT_Threads5"]                << USI::Option(0, 0, 256);
-    o["UCT_Threads6"]                << USI::Option(0, 0, 256);
-    o["UCT_Threads7"]                << USI::Option(0, 0, 256);
-    o["UCT_Threads8"]                << USI::Option(0, 0, 256);
-    o["UCT_Threads9"]                << USI::Option(0, 0, 256);
-    o["UCT_Threads10"]                << USI::Option(0, 0, 256);
-    o["UCT_Threads11"]                << USI::Option(0, 0, 256);
-    o["UCT_Threads12"]                << USI::Option(0, 0, 256);
-    o["UCT_Threads13"]                << USI::Option(0, 0, 256);
-    o["UCT_Threads14"]                << USI::Option(0, 0, 256);
-    o["UCT_Threads15"]                << USI::Option(0, 0, 256);
-    o["UCT_Threads16"]                << USI::Option(0, 0, 256);
+	for (int i = 2; i <= max_gpu ; ++i)
+		o["UCT_Threads" + std::to_string(i)] << USI::Option(0, 0, 256);
+
 #if defined(COREML)
 	// Core MLでは、ONNXではなく独自形式のモデルが必要。
     o["DNN_Model1"]                  << USI::Option(R"(model.mlmodel)");
 #else
     o["DNN_Model1"]                  << USI::Option(R"(model.onnx)");
 #endif
-    o["DNN_Model2"]                  << USI::Option("");
-    o["DNN_Model3"]                  << USI::Option("");
-    o["DNN_Model4"]                  << USI::Option("");
-    o["DNN_Model5"]                  << USI::Option("");
-    o["DNN_Model6"]                  << USI::Option("");
-    o["DNN_Model7"]                  << USI::Option("");
-    o["DNN_Model8"]                  << USI::Option("");
-    o["DNN_Model9"]                  << USI::Option("");
-    o["DNN_Model10"]                  << USI::Option("");
-    o["DNN_Model11"]                  << USI::Option("");
-    o["DNN_Model12"]                  << USI::Option("");
-    o["DNN_Model13"]                  << USI::Option("");
-    o["DNN_Model14"]                  << USI::Option("");
-    o["DNN_Model15"]                  << USI::Option("");
-    o["DNN_Model16"]                  << USI::Option("");
+	for (int i = 2; i <= max_gpu ; ++i)
+		o["DNN_Model" + std::to_string(i)] << USI::Option("");
 
 #if defined(TENSOR_RT) || defined(ORT_TRT)
 	// 通常時の推奨128 , 検討の時は推奨256。
@@ -145,21 +120,8 @@ void USI::extra_option(USI::OptionsMap& o)
 	// M1チップで8程度でスループットが飽和する。
 	o["DNN_Batch_Size1"]             << USI::Option(8, 1, 1024);
 #endif
-	o["DNN_Batch_Size2"]             << USI::Option(0, 0, 1024);
-	o["DNN_Batch_Size3"]             << USI::Option(0, 0, 1024);
-	o["DNN_Batch_Size4"]             << USI::Option(0, 0, 1024);
-	o["DNN_Batch_Size5"]             << USI::Option(0, 0, 1024);
-	o["DNN_Batch_Size6"]             << USI::Option(0, 0, 1024);
-	o["DNN_Batch_Size7"]             << USI::Option(0, 0, 1024);
-	o["DNN_Batch_Size8"]             << USI::Option(0, 0, 1024);
-	o["DNN_Batch_Size9"]             << USI::Option(0, 0, 1024);
-	o["DNN_Batch_Size10"]             << USI::Option(0, 0, 1024);
-	o["DNN_Batch_Size11"]             << USI::Option(0, 0, 1024);
-	o["DNN_Batch_Size12"]             << USI::Option(0, 0, 1024);
-	o["DNN_Batch_Size13"]             << USI::Option(0, 0, 1024);
-	o["DNN_Batch_Size14"]             << USI::Option(0, 0, 1024);
-	o["DNN_Batch_Size15"]             << USI::Option(0, 0, 1024);
-	o["DNN_Batch_Size16"]             << USI::Option(0, 0, 1024);
+	for (int i = 2; i <= max_gpu ; ++i)
+		o["DNN_Batch_Size" + std::to_string(i)] << USI::Option(0, 0, 1024);
 
     //(*this)["Const_Playout"]               = USIOption(0, 0, int_max);
 	// →　Playout数固定。これはNodesLimitでできるので不要。
@@ -212,25 +174,22 @@ void Search::clear()
 
 	// スレッド数と各GPUのbatchsizeをsearcherに設定する。
 
-	const int new_thread[max_gpu] = {
-		(int)Options["UCT_Threads1" ], (int)Options["UCT_Threads2" ], (int)Options["UCT_Threads3" ], (int)Options["UCT_Threads4" ],
-		(int)Options["UCT_Threads5" ], (int)Options["UCT_Threads6" ], (int)Options["UCT_Threads7" ], (int)Options["UCT_Threads8" ],
-		(int)Options["UCT_Threads9" ], (int)Options["UCT_Threads10"], (int)Options["UCT_Threads11"], (int)Options["UCT_Threads12"],
-		(int)Options["UCT_Threads13"], (int)Options["UCT_Threads14"], (int)Options["UCT_Threads15"], (int)Options["UCT_Threads16"]
-	};
-	const int new_policy_value_batch_maxsize[max_gpu] = {
-		(int)Options["DNN_Batch_Size1" ], (int)Options["DNN_Batch_Size2" ], (int)Options["DNN_Batch_Size3" ], (int)Options["DNN_Batch_Size4" ],
-		(int)Options["DNN_Batch_Size5" ], (int)Options["DNN_Batch_Size6" ], (int)Options["DNN_Batch_Size7" ], (int)Options["DNN_Batch_Size8" ],
-		(int)Options["DNN_Batch_Size9" ], (int)Options["DNN_Batch_Size10"], (int)Options["DNN_Batch_Size11"], (int)Options["DNN_Batch_Size12"],
-		(int)Options["DNN_Batch_Size13"], (int)Options["DNN_Batch_Size14"], (int)Options["DNN_Batch_Size15"], (int)Options["DNN_Batch_Size16"]
-	};
+	std::vector<int> new_thread;
+	std::vector<int> new_policy_value_batch_maxsize;
 
+	for (int i = 1; i <= max_gpu; ++i)
+	{
+		// GPU_unlimited() なら、すべてUCT_Threads1, DNN_Batch_Size1を参照する。
+		new_thread.emplace_back((int)Options["UCT_Threads" + std::to_string(i)]);
+		new_policy_value_batch_maxsize.emplace_back((int)Options["DNN_Batch_Size" + std::to_string(i)]);
+	}
+
 	// 対応デバイス数を取得する
 	int device_count = NN::get_device_count();
 
 	std::vector<int> thread_nums;
 	std::vector<int> policy_value_batch_maxsizes;
-	for (int i = 0; i < max_gpu; ++i)
+	for (int i = 0; i < max_gpu ; ++i)
 	{
 		// 対応デバイス数以上のデバイスIDのスレッド数は 0 として扱う(デバイスの無効化)
 		thread_nums.push_back(i < device_count ? new_thread[i] : 0);

diff --git a/source/eval/deep/nn_types.cpp b/source/eval/deep/nn_types.cpp
@@ -622,12 +622,9 @@ namespace Eval::dlshogi
 
 	Result init_model_paths()
 	{
-		const std::string model_paths[max_gpu] = {
-			Options["DNN_Model1"], Options["DNN_Model2"], Options["DNN_Model3"], Options["DNN_Model4"],
-			Options["DNN_Model5"], Options["DNN_Model6"], Options["DNN_Model7"], Options["DNN_Model8"],
-			Options["DNN_Model9"], Options["DNN_Model10"], Options["DNN_Model11"], Options["DNN_Model12"],
-			Options["DNN_Model13"], Options["DNN_Model14"], Options["DNN_Model15"], Options["DNN_Model16"]
-		};
+		std::vector<string> model_paths;
+		for (int i = 1; i <= max_gpu ; ++i)
+			model_paths.emplace_back(Options["DNN_Model" + std::to_string(i)]);
 
 		string eval_dir = Options["EvalDir"];
 
@@ -640,7 +637,7 @@ namespace Eval::dlshogi
 
 		// モデルファイル存在チェック
 		bool is_err = false;
-		for (int i = 0; i < max_gpu; ++i) {
+		for (int i = 0; i < max_gpu ; ++i) {
 			if (model_paths[i] != "")
 			{
 				string path = Path::Combine(eval_dir, model_paths[i].c_str());

diff --git a/source/eval/deep/nn_types.h b/source/eval/deep/nn_types.h
@@ -20,7 +20,10 @@ namespace Eval::dlshogi
 	// === GPU関連の設定 ===
 
 	// GPUの最大数(これ以上のGPUは扱えない)
-	constexpr int max_gpu = 16;
+#if !defined(MAX_GPU)
+	constexpr int MAX_GPU = 16;
+#endif
+	constexpr int max_gpu = MAX_GPU;
 
 	// === 入出力の特徴量の定義 ===