-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
54 lines (46 loc) · 1.62 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import gym
import simulation
import time
import torch
from neuralnet import ActorNet
from ppo import PPO
def main():
env = gym.make('RCSumo-v0')
states = env.observation_space
actions = env.action_space
#print(env.action_space.sample()) #this shows a sample of the actions
#print(env.observation_space.sample()) #this shows a sample of the observations
#print(env.observation_space.shape)
#print(env.action_space.shape)
#print(env.observation_space.shape[0])
#print(env.action_space.shape[0])
competitiveAgents(env)
#randomActions(env,states,actions)
#Competitive Baselines features
#MultiProcess multi actions
#MultiProcess multi observations
#1 to N different models in training
#Model training against pretrained models
#Model training with 1 to 1 advancement rate, no advancement, limited advancement rate.
#Episodic performance report
#Models can have different action space and observation space
def competitiveAgents(env):
models = PPO(env)
models.learn(10000)
def randomActions(env,states,actions):
episodes = 10
for episode in range(1, episodes + 1):
state1, state2 = env.reset()
score1 = 0
score2 = 0
while True:
action1 = env.action_space.sample()
action2 = env.action_space.sample()
[n_state1, reward1, done1, info1], [n_state2, reward2, done2, info2] = env.step(action1,action2)
score1 += reward1
score2 += reward2
if done1 or done2:
break
print('Episode: {}, Score1: {}, Score2: {}'.format(episode,score1,score2))
if __name__ == '__main__':
main()