-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathGridworld.py
146 lines (120 loc) · 4.97 KB
/
Gridworld.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import numpy as np
def randPair(s,e):
return np.random.randint(s,e), np.random.randint(s,e)
class BoardPiece:
def __init__(self, name, code, pos):
self.name = name #name of the piece
self.code = code #an ASCII character to display on the board
self.pos = pos #2-tuple e.g. (1,4)
class GridBoard:
def __init__(self, size=4):
self.size = size #Board dimensions, e.g. 4 x 4
self.components = {} #name : board piece
def addPiece(self, name, code, pos=(0,0)):
newPiece = BoardPiece(name, code, pos)
self.components[name] = newPiece
def movePiece(self, name, pos):
self.components[name].pos = pos
def delPiece(self, name):
del self.components['name']
def render(self):
dtype = '<U2'
displ_board = np.zeros((self.size, self.size), dtype=dtype)
displ_board[:] = ' '
for name, piece in self.components.items():
displ_board[piece.pos] = piece.code
return displ_board
def render_np(self):
num_pieces = len(self.components)
displ_board = np.zeros((num_pieces, self.size, self.size), dtype=np.uint8)
layer = 0
for name, piece in self.components.items():
pos = (layer,) + piece.pos
displ_board[pos] = 1
layer += 1
return displ_board
def addTuple(a,b):
return tuple([sum(x) for x in zip(a,b)])
class Gridworld:
def __init__(self, size=4, mode='static'):
if size >= 4:
self.board = GridBoard(size=size)
else:
print("Minimum board size is 4. Initialized to size 4.")
self.board = GridBoard(size=4)
#Add pieces, positions will be updated later
self.board.addPiece('Player','P',(0,0))
self.board.addPiece('Goal','+',(1,0))
self.board.addPiece('Pit','-',(2,0))
self.board.addPiece('Wall','W',(3,0))
if mode == 'static':
self.initGridStatic()
elif mode == 'player':
self.initGridPlayer()
else:
self.initGridRand()
#Initialize stationary grid, all items are placed deterministically
def initGridStatic(self):
#Setup static pieces
self.board.components['Player'].pos = (0,3)
self.board.components['Goal'].pos = (0,0)
self.board.components['Pit'].pos = (0,1)
self.board.components['Wall'].pos = (1,1)
#Check if board is initialized appropriately (no overlapping pieces)
def validateBoard(self):
all_positions = [piece.pos for name,piece in self.board.components.items()]
if len(all_positions) > len(set(all_positions)):
return False
else:
return True
#Initialize player in random location, but keep wall, goal and pit stationary
def initGridPlayer(self):
#height x width x depth (number of pieces)
self.initGridStatic()
#place player
self.board.components['Player'].pos = randPair(0,self.board.size)
if (not self.validateBoard()):
#print('Invalid grid. Rebuilding..')
self.initGridPlayer()
#Initialize grid so that goal, pit, wall, player are all randomly placed
def initGridRand(self):
#height x width x depth (number of pieces)
self.board.components['Player'].pos = randPair(0,self.board.size)
self.board.components['Goal'].pos = randPair(0,self.board.size)
self.board.components['Pit'].pos = randPair(0,self.board.size)
self.board.components['Wall'].pos = randPair(0,self.board.size)
if (not self.validateBoard()):
#print('Invalid grid. Rebuilding..')
self.initGridRand()
def makeMove(self, action):
#need to determine what object (if any) is in the new grid spot the player is moving to
#actions in {u,d,l,r}
def checkMove(addpos=(0,0)):
new_pos = addTuple(self.board.components['Player'].pos, addpos)
if new_pos == self.board.components['Wall'].pos:
pass #block move, player can't move to wall
elif max(new_pos) > (self.board.size-1): #if outside bounds of board
pass
elif min(new_pos) < 0: #if outside bounds
pass
else:
self.board.movePiece('Player', new_pos)
if action == 'u': #up
checkMove((-1,0))
elif action == 'd': #down
checkMove((1,0))
elif action == 'l': #left
checkMove((0,-1))
elif action == 'r': #right
checkMove((0,1))
else:
pass
def getReward(self):
if (self.board.components['Player'].pos == self.board.components['Pit'].pos):
return -10
elif (self.board.components['Player'].pos == self.board.components['Goal'].pos):
return 10
else:
return -1
def dispGrid(self):
return self.board.render()