Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Represent MARITIME_TRADE as 10-int list
Browse files Browse the repository at this point in the history
zarns committed Nov 3, 2024
1 parent 76442c7 commit 7c6ad6e
Showing 5 changed files with 95 additions and 54 deletions.
31 changes: 15 additions & 16 deletions catanatron_core/catanatron/models/actions.py
Original file line number Diff line number Diff line change
@@ -298,25 +298,24 @@ def inner_maritime_trade_possibilities(hand_freqdeck, bank_freqdeck, port_resour
trade_offers = set()

# Get lowest rate per resource
rates: Dict[FastResource, int] = {WOOD: 4, BRICK: 4, SHEEP: 4, WHEAT: 4, ORE: 4}
rates = {res: 4 for res in RESOURCES}
if None in port_resources:
rates = {WOOD: 3, BRICK: 3, SHEEP: 3, WHEAT: 3, ORE: 3}
rates = {res: 3 for res in RESOURCES}
for resource in port_resources:
if resource != None:
if resource is not None:
rates[resource] = 2

# For resource in hand
for index, resource in enumerate(RESOURCES):
amount = hand_freqdeck[index]
if amount >= rates[resource]:
resource_out: List[Any] = [resource] * rates[resource]
resource_out += [None] * (4 - rates[resource])
for j_resource in RESOURCES:
if (
resource != j_resource
and freqdeck_count(bank_freqdeck, j_resource) > 0
):
trade_offer = tuple(resource_out + [j_resource])
trade_offers.add(trade_offer)
# For each resource in hand
for give_idx, amount in enumerate(hand_freqdeck):
rate = rates[RESOURCES[give_idx]]
if amount >= rate:
# Try trading for each other resource the bank has
for receive_idx, bank_amount in enumerate(bank_freqdeck):
if receive_idx != give_idx and bank_amount > 0:
# Create concatenated freqdeck [give_5 + receive_5]
freqdeck = [0] * 10
freqdeck[give_idx] = rate
freqdeck[5 + receive_idx] = 1
trade_offers.add(tuple(freqdeck))

return trade_offers
29 changes: 17 additions & 12 deletions catanatron_core/catanatron/state.py
Original file line number Diff line number Diff line change
@@ -589,19 +589,24 @@ def apply_action(state: State, action: Action):
state.current_prompt = ActionPrompt.PLAY_TURN
state.playable_actions = generate_playable_actions(state)
elif action.action_type == ActionType.MARITIME_TRADE:
trade_offer = action.value
offering = freqdeck_from_listdeck(
filter(lambda r: r is not None, trade_offer[:-1])
)
asking = freqdeck_from_listdeck(trade_offer[-1:])
if not player_resource_freqdeck_contains(state, action.color, offering):
# action.value is now a 10-length tuple of integers [give_5 + receive_5]
giving_freqdeck = list(action.value[:5]) # First 5 are resources given
receiving_freqdeck = list(action.value[5:]) # Last 5 are resources received

# Validate player has resources
if not player_resource_freqdeck_contains(state, action.color, giving_freqdeck):
raise ValueError("Trying to trade without money")
if not freqdeck_contains(state.resource_freqdeck, asking):
raise ValueError("Bank doenst have those cards")
player_freqdeck_subtract(state, action.color, offering)
state.resource_freqdeck = freqdeck_add(state.resource_freqdeck, offering)
player_freqdeck_add(state, action.color, asking)
state.resource_freqdeck = freqdeck_subtract(state.resource_freqdeck, asking)
# Validate bank has resources
if not freqdeck_contains(state.resource_freqdeck, receiving_freqdeck):
raise ValueError("Bank doesn't have those cards")

# Execute trade
player_freqdeck_subtract(state, action.color, giving_freqdeck)
state.resource_freqdeck = freqdeck_add(state.resource_freqdeck, giving_freqdeck)
player_freqdeck_add(state, action.color, receiving_freqdeck)
state.resource_freqdeck = freqdeck_subtract(
state.resource_freqdeck, receiving_freqdeck
)

# state.current_player_index stays the same
state.current_prompt = ActionPrompt.PLAY_TURN
71 changes: 49 additions & 22 deletions catanatron_gym/catanatron_gym/envs/catanatron_env.py
Original file line number Diff line number Diff line change
@@ -20,14 +20,59 @@

BASE_TOPOLOGY = BASE_MAP_TEMPLATE.topology
TILE_COORDINATES = [x for x, y in BASE_TOPOLOGY.items() if y == LandTile]


def generate_trade_actions():
"""Generates all possible maritime trade actions in freqdeck format"""
trade_actions = []

# 4:1 trades
for give_resource in RESOURCES:
give_idx = RESOURCES.index(give_resource)
for receive_resource in RESOURCES:
if give_resource != receive_resource:
receive_idx = RESOURCES.index(receive_resource)
# Create 10-length freqdeck [giving_5 + receiving_5]
freqdeck = [0] * 10
freqdeck[give_idx] = 4 # Give 4 resources
freqdeck[5 + receive_idx] = 1 # Receive 1 resource
trade_actions.append((ActionType.MARITIME_TRADE, tuple(freqdeck)))

# 3:1 port trades
for give_resource in RESOURCES:
give_idx = RESOURCES.index(give_resource)
for receive_resource in RESOURCES:
if give_resource != receive_resource:
receive_idx = RESOURCES.index(receive_resource)
freqdeck = [0] * 10
freqdeck[give_idx] = 3 # Give 3 resources
freqdeck[5 + receive_idx] = 1 # Receive 1 resource
trade_actions.append((ActionType.MARITIME_TRADE, tuple(freqdeck)))

# 2:1 port trades
for give_resource in RESOURCES:
give_idx = RESOURCES.index(give_resource)
for receive_resource in RESOURCES:
if give_resource != receive_resource:
receive_idx = RESOURCES.index(receive_resource)
freqdeck = [0] * 10
freqdeck[give_idx] = 2 # Give 2 resources
freqdeck[5 + receive_idx] = 1 # Receive 1 resource
trade_actions.append((ActionType.MARITIME_TRADE, tuple(freqdeck)))

return trade_actions


ACTIONS_ARRAY = [
(ActionType.ROLL, None),
# TODO: One for each tile (and abuse 1v1 setting).
# Move robber actions
*[(ActionType.MOVE_ROBBER, tile) for tile in TILE_COORDINATES],
(ActionType.DISCARD, None),
# Build actions
*[(ActionType.BUILD_ROAD, tuple(sorted(edge))) for edge in get_edges()],
*[(ActionType.BUILD_SETTLEMENT, node_id) for node_id in range(NUM_NODES)],
*[(ActionType.BUILD_CITY, node_id) for node_id in range(NUM_NODES)],
# Development card actions
(ActionType.BUY_DEVELOPMENT_CARD, None),
(ActionType.PLAY_KNIGHT_CARD, None),
*[
@@ -38,27 +83,8 @@
*[(ActionType.PLAY_YEAR_OF_PLENTY, (first_card,)) for first_card in RESOURCES],
(ActionType.PLAY_ROAD_BUILDING, None),
*[(ActionType.PLAY_MONOPOLY, r) for r in RESOURCES],
# 4:1 with bank
*[
(ActionType.MARITIME_TRADE, tuple(4 * [i] + [j]))
for i in RESOURCES
for j in RESOURCES
if i != j
],
# 3:1 with port
*[
(ActionType.MARITIME_TRADE, tuple(3 * [i] + [None, j])) # type: ignore
for i in RESOURCES
for j in RESOURCES
if i != j
],
# 2:1 with port
*[
(ActionType.MARITIME_TRADE, tuple(2 * [i] + [None, None, j])) # type: ignore
for i in RESOURCES
for j in RESOURCES
if i != j
],
# Maritime trade actions
*generate_trade_actions(),
(ActionType.END_TURN, None),
]
ACTION_SPACE_SIZE = len(ACTIONS_ARRAY)
@@ -70,6 +96,7 @@ def to_action_type_space(action):


def normalize_action(action):
"""Normalize action value to ensure it can be found in ACTIONS_ARRAY"""
normalized = action
if normalized.action_type == ActionType.ROLL:
return Action(action.color, action.action_type, None)
2 changes: 1 addition & 1 deletion tests/test_gym.py
Original file line number Diff line number Diff line change
@@ -126,7 +126,7 @@ def test_enemies():

# Virtually impossible for a Random bot to beat Value Function Player
assert env.game.winning_color() == Color.RED # type: ignore
assert reward - 1
assert reward == -1
env.close()


16 changes: 13 additions & 3 deletions tests/test_state.py
Original file line number Diff line number Diff line change
@@ -76,11 +76,21 @@ def test_trade_execution():
state = State(players)

player_deck_replenish(state, players[0].color, BRICK, 4)
trade_offer = tuple([BRICK] * 4 + [ORE])
action = Action(players[0].color, ActionType.MARITIME_TRADE, trade_offer)

# Create trade freqdeck
brick_idx = RESOURCES.index(BRICK)
ore_idx = RESOURCES.index(ORE)
give_freqdeck = [0] * 5
receive_freqdeck = [0] * 5
give_freqdeck[brick_idx] = 4 # Give 4 BRICK
receive_freqdeck[ore_idx] = 1 # Get 1 ORE
trade_freqdeck = tuple(give_freqdeck + receive_freqdeck)

action = Action(players[0].color, ActionType.MARITIME_TRADE, trade_freqdeck)
apply_action(state, action)

assert player_num_resource_cards(state, players[0].color) == 1
assert player_num_resource_cards(state, players[0].color, BRICK) == 0
assert player_num_resource_cards(state, players[0].color, ORE) == 1
assert sum(state.resource_freqdeck) == 19 * 5 + 4 - 1


0 comments on commit 7c6ad6e

Please sign in to comment.