-
Notifications
You must be signed in to change notification settings - Fork 0
/
cart-pole.py
117 lines (94 loc) · 3.23 KB
/
cart-pole.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import gym
import numpy as np
'''
--- Cart and Pole ---
Observations
------------------
Index -> Observations
0 -> Cart Postion
1 -> Cart Velocity
2 -> Pole Angle
3 -> Pole Angular Velocity
Actions
------------------
Number -> Action
0 -> Negative Force 1
1 -> Positive Force 1
Episode Terminates at:
Pole angle is more then +-12 degrees
Cart position is +- 2.4
The episode step reaches 200.
choose_action
----------------
Parameters
-----------
obs -> the observations given in the environment
Variables
-----------
cart_pos -> the cart's position
cart_vel -> the cart's velocity
pole_angle -> the angle of the pole
pole_angular_vel -> the pole's angular velocity
right_velocity_limit -> the cart velocity limit we are
restricting in the positive x direction.
left_velocity_limit -> the cart velocity limit we are
restricting in the negative x direction
Returns
----------
1 -> if the pole_angle is greater than 0 or cart_vel less
than the left_velocity_limit
0 -> if pole_angle is less than 0 or cart_vel is greater than
right_velocity_limit
'''
def choose_action(obs):
cart_pos = obs[0]
cart_vel = obs[1]
pole_angle = obs[2]
pole_angular_vel = obs[3]
right_velocity_limit = .8
left_velocity_limit = -right_velocity_limit
if pole_angle > 0 or cart_vel < left_velocity_limit:
return 1
elif pole_angle < 0 or cart_vel > right_velocity_limit:
return 0
'''
main()
--------------------
Variables
----------
env -> The Cart and Pole environment
obs -> The observations of the env, which is reset after
every episode
episodes -> the number of times the experiment is repeated
results -> holds the number of steps that each episode runs
through. An average is taken from this. 195 over
100 episodes considers the problem solved.
steps -> counts the steps before the episode is terminated
action -> the chosen action that is returned from choose_action
done -> Returns true if any of the terminating conditions happen
'''
def main():
env = gym.make('CartPole-v0') # creates the environment
obs = env.reset()
episodes = 10 # number of episodes
results = [] # collects each episodes step result
# the goal is 195 over 100 episodes
for e in range(episodes): # episode loop
obs = env.reset()
steps = 0
for t in range(201): # step loop
env.render()
action = choose_action(obs) # finds an action based on current state
obs, reward, done, info = env.step(action) # creates new state based on action
steps += 1
# print(f'Observations: {obs}')
if done:
results.append(steps)
print(f'Finished State: {obs}')
print(f'Steps: {steps}')
break
env.close()
print(f'Average Success over {episodes} episodes:')
print(np.average(results))
if __name__ == '__main__':
main()