MCTS.py

from godot import exposed, export
from godot import *


@exposed
class MCTS(Node):

	"""
A minimal implementation of Monte Carlo tree search (MCTS) in Python 3
Luke Harold Miles, July 2019, Public Domain Dedication
See also https://en.wikipedia.org/wiki/Monte_Carlo_tree_search
https://gist.github.com/qpwo/c538c6f73727e254fdc7fab81024f6e1
"""
from abc import ABC, abstractmethod
from collections import defaultdict
import math


NUM_ACTIONS = 2

class monte_node(ABC):
	"""
	A representation of a single board state.
	MCTS works by constructing a tree of these Nodes.
	Could be e.g. a chess or checkers board state.
	"""

	def __init__(self):
		self.terminal=False
	
	def find_children(self):
		"All possible successors of this board state"
		if board.terminal  # If the game is finished then no moves can be made
			return set()
		# Otherwise, you can make a move in each of the empty spots
		return {
			board.make_move(i) for i, value in enumerate(board.tup) if value is None
			}


	def find_random_child(self):
		if self.terminal
		"Random successor of this board state (for more efficient simulation)"
		return None


	def is_terminal(self):
		"Returns True if the node has no children"
		
		return 


	def reward(self):
		"Assumes `self` is terminal node. 1=win, 0=loss, .5=tie, etc"
		return 0


	def __hash__(self):
		"Nodes must be hashable"
		return 123456789


	def __eq__(node1, node2):
		"Nodes must be comparable"
		return True

class MCTS:
	"Monte Carlo tree searcher. First rollout the tree then choose a move."

	def __init__(self, exploration_weight=1):
		self.Q = defaultdict(int)  # total reward of each node
		self.N = defaultdict(int)  # total visit count for each node
		self.children = dict()  # children of each node
		self.exploration_weight = exploration_weight

	def choose(self, node):
		"Choose the best successor of node. (Choose a move in the game)"
		if node.is_terminal():
			raise RuntimeError(f"choose called on terminal node {node}")

		if node not in self.children:
			return node.find_random_child()

		def score(n):
			if self.N[n] == 0:
				return float("-inf")  # avoid unseen moves
			return self.Q[n] / self.N[n]  # average reward

		return max(self.children[node], key=score)

	def do_rollout(self, node):
		"Make the tree one layer better. (Train for one iteration.)"
		path = self._select(node)
		leaf = path[-1]
		self._expand(leaf)
		reward = self._simulate(leaf)
		self._backpropagate(path, reward)

	def _select(self, node):
		"Find an unexplored descendent of `node`"
		path = []
		while True:
			path.append(node)
			if node not in self.children or not self.children[node]:
				# node is either unexplored or terminal
				return path
			unexplored = self.children[node] - self.children.keys()
			if unexplored:
				n = unexplored.pop()
				path.append(n)
				return path
			node = self._uct_select(node)  # descend a layer deeper

	def _expand(self, node):
		"Update the `children` dict with the children of `node`"
		if node in self.children:
			return  # already expanded
		self.children[node] = node.find_children()

	def _simulate(self, node):
		"Returns the reward for a random simulation (to completion) of `node`"
		invert_reward = True
		while True:
			if node.is_terminal():
				reward = node.reward()
				return 1 - reward if invert_reward else reward
			node = node.find_random_child()
			invert_reward = not invert_reward

	def _backpropagate(self, path, reward):
		"Send the reward back up to the ancestors of the leaf"
		for node in reversed(path):
			self.N[node] += 1
			self.Q[node] += reward
			reward = 1 - reward  # 1 for me is 0 for my enemy, and vice versa

	def _uct_select(self, node):
		"Select a child of node, balancing exploration & exploitation"

		# All children of node should already be expanded:
		assert all(n in self.children for n in self.children[node])

		log_N_vertex = math.log(self.N[node])

		def uct(n):
			"Upper confidence bound for trees"
			return self.Q[n] / self.N[n] + self.exploration_weight * math.sqrt(
				log_N_vertex / self.N[n]
			)

		return max(self.children[node], key=uct)