-
Notifications
You must be signed in to change notification settings - Fork 12
/
dilemma.py
88 lines (76 loc) · 3.08 KB
/
dilemma.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
"""Prisoners Dilemma
Two members of a criminal gang are arrested and imprisoned.
Each prisoner is in solitary confinement with no means of speaking to
or exchanging messages with the other.
The police admit they don't have enough evidence
to convict the pair on the principal charge.
They plan to sentence both to a year in prison on a lesser charge.
Simultaneously, the police offer each prisoner a Faustian bargain.
Each prisoner is given the opportunity either to betray (defect) the other,
by testifying that the other committed the crime,
or to cooperate with the other by remaining silent.
Here's how it goes:
If A and B both defect the other
each of them serves 2 years in prison
If A defects B but B remains silent
A will be set free and B will serve 3 years in prison
If A and B both remain silent
both of them will only serve 1 year in prison
"""
"""
Next objectif, with Qlearning, refind the reward with titfortat
"""
import itertools
import csv
import numpy as np
import math
from strategies import *
class Game(object):
"""docstring for Game"""
def __init__(self, prisoner_a, prisoner_b):
"initialize with the Prisoners"
self.prisoner_a = prisoner_a
self.prisoner_b = prisoner_b
self.move_id = 0
self.data = {'id': [], 'A': [], 'B': []}
self.REWARD = 4 # put 4 here?
self.SUCKER = 0
self.TEMPTATION = 5
self.PENALTY = 1
def reset_players(self):
pass
def play(self, moves=1, iterated=False): # add parameters of the game
"""Play x times"""
for _ in range(0, moves):
if not iterated:
self.reset_players()
self.play_a_move()
def play_a_move(self):
"""Execute one iteration of the game
We ask for strategy and then save the results
"""
# The state here is the history of the player results
action_a = self.prisoner_a.strategy(state=self.data['A'])
action_b = self.prisoner_b.strategy(state=self.data['B'])
## Game mechanic
if action_a == action_b == "defect":
return_a, return_b = self.PENALTY, self.PENALTY
elif action_a == "defect" and action_b == "cooperate":
return_a = self.TEMPTATION
return_b = self.SUCKER
elif action_a == "cooperate" and action_b == "defect":
return_a = self.SUCKER
return_b = self.TEMPTATION
elif (action_a and action_b) == "cooperate":
return_a, return_b = self.REWARD, self.REWARD
else:
assert False # "Error, impossible move"
# state = history
# state, action, reward, new_state
self.move_id += 1
self.data['id'].append(self.move_id)
self.data['A'].append(return_a)
self.data['B'].append(return_b)
# history, action, return_a, history
self.prisoner_a.punish(state=self.data['A'][:-1], action=action_a, reward=return_a, new_state=self.data['A'])
self.prisoner_b.punish(state=self.data['B'][:-1], action=action_b, reward=return_b, new_state=self.data['B'])