diff --git a/AUTHORS b/AUTHORS index fd6026ad1..20855d96b 100644 --- a/AUTHORS +++ b/AUTHORS @@ -14,6 +14,7 @@ Fanael Linithien (Fanael) Fauzi Akram Dabat (FauziAkram) FieryDragonLord Gabe (MrBrain295) +Gahtan Nahdi (gahtan-syarif) Giacomo Lorenzetti (G-Lorenz) Gian-Carlo Pascutto (gcp) Henri Wiechers (hwiechers) diff --git a/server/fishtest/api.py b/server/fishtest/api.py index 22755e5fa..6a403e512 100644 --- a/server/fishtest/api.py +++ b/server/fishtest/api.py @@ -34,7 +34,7 @@ according to the route/URL mapping defined in `__init__.py`. """ -WORKER_VERSION = 241 +WORKER_VERSION = 242 @exception_view_config(HTTPException) diff --git a/server/fishtest/rundb.py b/server/fishtest/rundb.py index 838bf7fdb..731d1facf 100644 --- a/server/fishtest/rundb.py +++ b/server/fishtest/rundb.py @@ -1158,23 +1158,6 @@ def priority(run): # lower is better if not have_binary: continue - # To avoid time losses in the case of large concurrency and short TC, - # probably due to cutechess-cli as discussed in issue #822, - # assign linux workers to LTC or multi-threaded jobs - # and windows workers only to LTC jobs - if max_threads >= 29: - if "windows" in worker_info["uname"].lower(): - tc_too_short = get_tc_ratio(run["args"]["tc"], base="55+0.5") < 1.0 - else: - tc_too_short = ( - get_tc_ratio( - run["args"]["tc"], run["args"]["threads"], "35+0.3" - ) - < 1.0 - ) - if tc_too_short: - continue - # Limit the number of cores. # Currently this is only done for spsa. if "spsa" in run["args"]: diff --git a/worker/games.py b/worker/games.py index 64463a3c5..8d56e3cff 100644 --- a/worker/games.py +++ b/worker/games.py @@ -67,7 +67,7 @@ def is_64bit(): HTTP_TIMEOUT = 30.0 -CUTECHESS_KILL_TIMEOUT = 15.0 +FASTCHESS_KILL_TIMEOUT = 15.0 UPDATE_RETRY_TIME = 15.0 RAWCONTENT_HOST = "https://raw.githubusercontent.com" @@ -504,24 +504,6 @@ def unzip(blob, save_dir): return file_list -def convert_book_move_counters(book_file): - # converts files with complete FENs, leaving others (incl. converted ones) unchanged - epds = [] - with open(book_file, "r") as file: - for fen in file: - fields = fen.split() - if len(fields) == 6 and fields[4].isdigit() and fields[5].isdigit(): - fields[4] = f"hmvc {fields[4]};" - fields[5] = f"fmvn {fields[5]};" - epds.append(" ".join(fields)) - else: - return - - with open(book_file, "w") as file: - for epd in epds: - file.write(epd + "\n") - - def clang_props(): """Parse the output of clang++ -E - -march=native -### and extract the available clang properties""" with subprocess.Popen( @@ -958,7 +940,7 @@ def results_to_score(results): assert abs(s5 - s3) < epsilon -def parse_cutechess_output( +def parse_fastchess_output( p, current_state, remote, result, spsa_tuning, games_to_play, batch_size, tc_limit ): hash_pattern = re.compile(r"(Base|New)-[a-f0-9]+") @@ -1002,13 +984,13 @@ def shorten_hash(match): # Parse line like this: # Warning: New-SHA doesn't have option ThreatBySafePawn if "Warning:" in line and "doesn't have option" in line: - message = r'Cutechess-cli says: "{}"'.format(line) + message = r'fast-chess says: "{}"'.format(line) raise RunException(message) # Parse line like this: # Warning: Invalid value for option P: -354 if "Warning:" in line and "Invalid value" in line: - message = r'Cutechess-cli says: "{}"'.format(line) + message = r'fast-chess says: "{}"'.format(line) raise RunException(message) # Parse line like this: @@ -1032,7 +1014,7 @@ def shorten_hash(match): validate_pentanomial( wld, rounds - ) # check if cutechess-cli result is compatible with + ) # check if fast-chess result is compatible with # our own bookkeeping pentanomial = [ @@ -1123,7 +1105,7 @@ def shorten_hash(match): return True -def launch_cutechess( +def launch_fastchess( cmd, current_state, remote, result, spsa_tuning, games_to_play, batch_size, tc_limit ): if spsa_tuning: @@ -1154,7 +1136,7 @@ def launch_cutechess( w_params = [] b_params = [] - # Run cutechess-cli binary. + # Run fast-chess binary. # Stochastic rounding and probability for float N.p: (N, 1-p); (N+1, p) idx = cmd.index("_spsa_") cmd = ( @@ -1179,7 +1161,7 @@ def launch_cutechess( + cmd[idx + 1 :] ) - # print(cmd) + # print(cmd) try: with subprocess.Popen( cmd, @@ -1201,7 +1183,7 @@ def launch_cutechess( close_fds=not IS_WINDOWS, ) as p: try: - task_alive = parse_cutechess_output( + task_alive = parse_fastchess_output( p, current_state, remote, @@ -1212,15 +1194,15 @@ def launch_cutechess( tc_limit, ) finally: - # We nicely ask cutechess-cli to stop. + # We nicely ask fast-chess to stop. try: send_sigint(p) except Exception as e: print("\nException in send_sigint:\n", e, sep="", file=sys.stderr) # now wait... - print("\nWaiting for cutechess-cli to finish ... ", end="", flush=True) + print("\nWaiting for fast-chess to finish ... ", end="", flush=True) try: - p.wait(timeout=CUTECHESS_KILL_TIMEOUT) + p.wait(timeout=FASTCHESS_KILL_TIMEOUT) except subprocess.TimeoutExpired: print("timeout", flush=True) kill_process(p) @@ -1228,12 +1210,12 @@ def launch_cutechess( print("done", flush=True) except (OSError, subprocess.SubprocessError) as e: print( - "Exception starting cutechess:\n", + "Exception starting fast-chess:\n", e, sep="", file=sys.stderr, ) - raise WorkerException("Unable to start cutechess. Error: {}".format(str(e))) + raise WorkerException("Unable to start fast-chess. Error: {}".format(str(e))) return task_alive @@ -1249,7 +1231,7 @@ def run_games( clear_binaries, global_cache, ): - # This is the main cutechess-cli driver. + # This is the main fast-chess driver. # It is ok, and even expected, for this function to # raise exceptions, implicitly or explicitly, if a # task cannot be completed. @@ -1317,7 +1299,7 @@ def run_games( start_game_index = opening_offset + input_total_games run_seed = int(hashlib.sha1(run["_id"].encode("utf-8")).hexdigest(), 16) % (2**30) - # Format options according to cutechess syntax. + # Format options according to fastchess syntax. def parse_options(s): results = [] chunks = s.split("=") @@ -1404,11 +1386,6 @@ def parse_options(s): blob = download_from_github(zipball) unzip(blob, testing_dir) - # convert .epd containing FENs into .epd containing EPDs with move counters - # only needed as long as cutechess-cli is the game manager - if book.endswith(".epd"): - convert_book_move_counters(testing_dir / book) - # Clean up the old networks (keeping the num_bkps most recent) num_bkps = 10 for old_net in sorted( @@ -1424,7 +1401,7 @@ def parse_options(s): file=sys.stderr, ) - # Add EvalFile* with full path to cutechess options, and download the networks if missing. + # Add EvalFile* with full path to fast-chess options, and download the networks if missing. for option, net in required_nets(base_engine).items(): base_options.append("option.{}={}".format(option, net)) establish_validated_net(remote, testing_dir, net, global_cache) @@ -1554,15 +1531,17 @@ def make_player(arg): if any(substring in book.upper() for substring in ["FRC", "960"]): variant = "fischerandom" - # Run cutechess binary. - cutechess = "cutechess-cli" + EXE_SUFFIX + # Run fastchess binary. + fastchess = "fast-chess" + EXE_SUFFIX cmd = ( [ - os.path.join(testing_dir, cutechess), + os.path.join(testing_dir, fastchess), "-recover", "-repeat", "-games", - str(int(games_to_play)), + "2", + "-rounds", + str(int(games_to_play) // 2), "-tournament", "gauntlet", ] @@ -1618,7 +1597,7 @@ def make_player(arg): + book_cmd ) - task_alive = launch_cutechess( + task_alive = launch_fastchess( cmd, current_state, remote, diff --git a/worker/sri.txt b/worker/sri.txt index ee2cad5b6..59386f6ad 100644 --- a/worker/sri.txt +++ b/worker/sri.txt @@ -1 +1 @@ -{"__version": 241, "updater.py": "Mg+pWOgGA0gSo2TuXuuLCWLzwGwH91rsW1W3ixg3jYauHQpRMtNdGnCfuD1GqOhV", "worker.py": "BMuQUpxZAKF0aP6ByTZY1r06MfPoIbdG2xraTrDQQRKgvhzJo6CKmeX2P8vX/QDm", "games.py": "9dFaa914vpqT7q4LLx2LlDdYwK6QFVX3h7+XRt18ATX0lt737rvFeBIiqakkttNC"} +{"__version": 242, "updater.py": "Mg+pWOgGA0gSo2TuXuuLCWLzwGwH91rsW1W3ixg3jYauHQpRMtNdGnCfuD1GqOhV", "worker.py": "jztkW6Di1yD8hx6nABYoT+CbB4V7aMBjgSBO4YajKF9y3yhVZ0hsdI5sGs/1KBFT", "games.py": "7RjSD5X3UP0DIgQecSpaRKTVFmf/4POUAHpNskuPVWNkdscE7+2PTQGfqHklIwho"} diff --git a/worker/tests/test_worker.py b/worker/tests/test_worker.py index 91cc84674..b1114d370 100644 --- a/worker/tests/test_worker.py +++ b/worker/tests/test_worker.py @@ -70,8 +70,12 @@ def test_sri(self): def test_toolchain_verification(self): self.assertTrue(worker.verify_toolchain()) - def test_setup_cutechess(self): - self.assertTrue(worker.setup_cutechess(Path.cwd())) + def test_setup_fastchess(self): + self.assertTrue( + worker.setup_fastchess( + Path.cwd(), list(worker.detect_compilers().keys())[0], 1, "" + ) + ) if __name__ == "__main__": diff --git a/worker/worker.py b/worker/worker.py index bfc40c95d..af7977f11 100644 --- a/worker/worker.py +++ b/worker/worker.py @@ -16,6 +16,7 @@ import stat import subprocess import sys +import tempfile import threading import time import traceback @@ -41,9 +42,12 @@ RunException, WorkerException, backup_log, + cache_read, + cache_write, download_from_github, format_return_code, log, + requests_get, run_games, send_api_post_request, str_signal, @@ -68,7 +72,7 @@ MIN_CLANG_MAJOR = 8 MIN_CLANG_MINOR = 0 -WORKER_VERSION = 241 +WORKER_VERSION = 242 FILE_LIST = ["updater.py", "worker.py", "games.py"] HTTP_TIMEOUT = 30.0 INITIAL_RETRY_TIME = 15.0 @@ -103,8 +107,8 @@ worker.py : worker() worker.py : fetch_and_handle_task() [in loop] games.py : run_games() -games.py : launch_cutechess() [in loop for spsa] -games.py : parse_cutechess_output() +games.py : launch_fastchess() [in loop for spsa] +games.py : parse_fastchess_output() Apis used by the worker ======================= @@ -120,8 +124,8 @@ /api/request_task POST /api/nn/ GET /git/trees/master GET - /git/trees/master/blobs/ GET /git/trees/master/blobs/ GET + /repos/Disservin/fast-chess/zipball/ GET /repos//zipball/ GET Main loop /api/update_task POST @@ -392,40 +396,17 @@ def get_credentials(config, options, args): return username, password -def download_cutechess(cutechess, save_dir): - if len(EXE_SUFFIX) > 0: - zipball = "cutechess-cli-win.zip" - elif IS_MACOS: - zipball = "cutechess-cli-macos-64bit.zip" - else: - zipball = "cutechess-cli-linux-{}.zip".format(platform.architecture()[0]) - try: - blob = download_from_github(zipball) - unzip(blob, save_dir) - - os.chmod(cutechess, os.stat(cutechess).st_mode | stat.S_IEXEC) - except Exception as e: - print( - "Exception downloading or extracting {}:\n".format(zipball), - e, - sep="", - file=sys.stderr, - ) - else: - print("Finished downloading {}".format(cutechess)) - +def verify_required_fastchess(fastchess_path, fastchess_sha): + # Verify that fastchess is working and has the required minimum version. -def verify_required_cutechess(cutechess_path): - # Verify that cutechess is working and has the required minimum version. - - if not cutechess_path.exists(): + if not fastchess_path.exists(): return False - print("Obtaining version info for {} ...".format(cutechess_path)) + print("Obtaining version info for {} ...".format(fastchess_path)) try: with subprocess.Popen( - [cutechess_path, "--version"], + [fastchess_path, "--version"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, @@ -433,98 +414,115 @@ def verify_required_cutechess(cutechess_path): close_fds=not IS_WINDOWS, ) as p: errors = p.stderr.read() - pattern = re.compile(r"cutechess-cli ([0-9]+)\.([0-9]+)\.([0-9]+)") - major, minor, patch = 0, 0, 0 + pattern = re.compile( + "fast-chess alpha [0-9]*.[0-9]*.[0-9]* [0-9]*-([0-9a-f-]*) \(compiled with cutechess output\)" + ) + short_sha = "" for line in iter(p.stdout.readline, ""): m = pattern.search(line) if m: print("Found", line.strip()) - major = int(m.group(1)) - minor = int(m.group(2)) - patch = int(m.group(3)) + short_sha = m.group(1) except (OSError, subprocess.SubprocessError) as e: - print("Unable to run cutechess-cli. Error: {}".format(str(e))) + print("Unable to run fast-chess. Error: {}".format(str(e))) return False if p.returncode != 0: print( - "Unable to run cutechess-cli. Return code: {}. Error: {}".format( + "Unable to run fast-chess. Return code: {}. Error: {}".format( format_return_code(p.returncode), errors ) ) return False - if major + minor + patch == 0: - print("Unable to find the version of cutechess-cli.") - return False + if len(short_sha) < 7: + print( + "Unable to find a suitable sha of length 7 or more in the fast-chess version." + ) - if (major, minor) < (1, 2): - print("Requires cutechess 1.2 or higher, found version doesn't match") + if not fastchess_sha.startswith(short_sha): + print( + "fast-chess sha {} required but the version shows {}".format( + fastchess_sha, short_sha + ) + ) return False return True -def setup_cutechess(worker_dir): +def setup_fastchess(worker_dir, compiler, concurrency, global_cache): # Create the testing directory if missing. testing_dir = worker_dir / "testing" testing_dir.mkdir(exist_ok=True) - curr_dir = Path.cwd() + fastchess_sha = "f65d3c375de5be0c2eb55c559f82ceaddadb87dc" + username = "Disservin" + fastchess = "fast-chess" + EXE_SUFFIX + if verify_required_fastchess(testing_dir / fastchess, fastchess_sha): + return True + + # build it ourselves try: - os.chdir(testing_dir) - except Exception as e: - print("Unable to enter {}. Error: {}".format(testing_dir, str(e))) - return False + item_url = ( + "https://api.github.com/repos/" + + username + + "/fast-chess/zipball/" + + fastchess_sha + ) - cutechess = "cutechess-cli" + EXE_SUFFIX - cutechess_path = testing_dir / cutechess + print("Building fast chess from sources at {}".format(item_url)) - # Download cutechess-cli if missing or overwrite if there are issues. - if not verify_required_cutechess(cutechess_path): - download_cutechess(cutechess, testing_dir) - else: - os.chdir(curr_dir) - return True + should_cache = False + blob = cache_read(global_cache, fastchess_sha + ".zip") - ret = True + if blob is None: + print("Downloading {}".format(item_url)) + blob = requests_get(item_url).content + should_cache = True + else: + print("Using {} from global cache".format(fastchess_sha + ".zip")) + + tmp_dir = Path(tempfile.mkdtemp(dir=testing_dir)) + file_list = unzip(blob, tmp_dir) + prefix = os.path.commonprefix([n.filename for n in file_list]) + + if should_cache: + cache_write(global_cache, fastchess_sha + ".zip", blob) - if not verify_required_cutechess(cutechess_path): + cd = os.getcwd() + os.chdir(tmp_dir / prefix) + + cmd = f"make -j{concurrency} USE_CUTE=true CXX={compiler} GIT_SHA={fastchess_sha[0:8]} GIT_DATE=01010101" + print(cmd) + with subprocess.Popen( + cmd, + shell=True, + env=os.environ, + stderr=subprocess.PIPE, + universal_newlines=True, + bufsize=1, + close_fds=not IS_WINDOWS, + ) as p: + errors = p.stderr.readlines() + + if p.returncode: + raise WorkerException("Executing {} failed. Error: {}".format(cmd, errors)) + + shutil.copy("fast-chess" + EXE_SUFFIX, testing_dir) + os.chdir(cd) + shutil.rmtree(tmp_dir) + + except Exception as e: print( - "The downloaded cutechess-cli is not working. Trying to restore a backup copy ..." - ) - bkp_cutechess_clis = sorted( - worker_dir.glob("_testing_*/" + cutechess), - key=os.path.getctime, - reverse=True, + "Exception downloading, extracting or building fast-chess:\n", + e, + sep="", + file=sys.stderr, ) - if bkp_cutechess_clis: - bkp_cutechess_cli = bkp_cutechess_clis[0] - try: - shutil.copy(bkp_cutechess_cli, testing_dir) - except Exception as e: - print( - "Unable to copy {} to {}. Error: {}".format( - bkp_cutechess_cli, testing_dir, str(e) - ) - ) - if not verify_required_cutechess(cutechess_path): - print( - "The backup copy {} doesn't work either ...".format( - bkp_cutechess_cli - ) - ) - print("No suitable cutechess-cli found") - ret = False - else: - print("No backup copy found") - print("No suitable cutechess-cli found") - ret = False - - os.chdir(curr_dir) - return ret + return verify_required_fastchess(testing_dir / fastchess, fastchess_sha) def validate(config, schema): @@ -821,7 +819,7 @@ def my_error(e): # Limit concurrency so that at least STC tests can run with the evailable memory # The memory need per engine is 16 for the TT Hash, 10 for the process 138 for the net and 16 per thread - # 60 is the need for cutechess-cli + # 60 is the need for fast-chess # These numbers need to be up-to-date with the server values STC_memory = 2 * (16 + 10 + 138 + 16) max_concurrency = int((options.max_memory - 60) / STC_memory) @@ -1512,12 +1510,21 @@ def worker(): print("Exception verifying worker version:\n", e, sep="", file=sys.stderr) return 1 + # Assemble the config/options data as well as some other data in a + # "worker_info" dictionary. + # This data will be sent to the server when a new task is requested. + + compiler, major, minor, patchlevel = options.compiler + print("Using {} {}.{}.{}".format(compiler, major, minor, patchlevel)) + # Check for common tool chain issues if not verify_toolchain(): return 1 - # Make sure we have a working cutechess-cli - if not setup_cutechess(worker_dir): + # Make sure we have a working fast-chess + if not setup_fastchess( + worker_dir, compiler, options.concurrency, options.global_cache + ): return 1 # Check if we are running an unmodified worker @@ -1525,13 +1532,6 @@ def worker(): if unmodified is None: return 1 - # Assemble the config/options data as well as some other data in a - # "worker_info" dictionary. - # This data will be sent to the server when a new task is requested. - - compiler, major, minor, patchlevel = options.compiler - print("Using {} {}.{}.{}".format(compiler, major, minor, patchlevel)) - uname = platform.uname() worker_info = { "uname": uname[0] + " " + uname[2] + (" (colab)" if IS_COLAB else ""),