-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnets.py
66 lines (56 loc) · 1.81 KB
/
nets.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import torch
from torch import nn
import torch.nn.functional as F
class MountaincarActor(nn.Module):
def __init__(self):
super().__init__()
self.fc1 = nn.Linear(2, 128)
self.fc2 = nn.Linear(128, 256)
self.fc3 = nn.Linear(256, 128)
self.out = nn.Linear(128, 1)
def forward(self, state):
x = F.relu(self.fc1(state))
x = F.relu(self.fc2(x))
x = F.relu(self.fc3(x))
act = torch.tanh(self.out(x))
return act
class MountaincarCritic(nn.Module):
def __init__(self):
super().__init__()
self.fc1 = nn.Linear(2, 400)
self.fc2 = nn.Linear(401, 256)
self.fc3 = nn.Linear(256, 128)
self.out = nn.Linear(128, 1)
def forward(self, state, action):
x = F.relu(self.fc1(state))
x = F.relu(self.fc2(torch.cat((x, action), dim=1)))
x = F.relu(self.fc3(x))
val = self.out(x)
return val
class PendulumActor(nn.Module):
def __init__(self):
super().__init__()
self.fc1 = nn.Linear(3, 256)
self.fc2 = nn.Linear(256, 400)
self.fc3 = nn.Linear(400, 128)
self.out = nn.Linear(128, 1)
def forward(self, state):
x = F.relu(self.fc1(state))
x = F.relu(self.fc2(x))
x = F.relu(self.fc3(x))
act = torch.tanh(self.out(x))
return act
class PendulumCritic(nn.Module):
def __init__(self):
super().__init__()
self.fc1 = nn.Linear(3, 400)
self.fc2 = nn.Linear(401, 300)
self.fc3 = nn.Linear(300, 128)
self.out = nn.Linear(128, 1)
def forward(self, state, action):
x = F.relu(self.fc1(state))
x_act = torch.cat((x, action), dim=1)
x = F.relu(self.fc2(x_act))
x = F.relu(self.fc3(x))
val = self.out(x)
return val