-
Notifications
You must be signed in to change notification settings - Fork 26
/
Copy pathLaunchCartPole.cpp
180 lines (147 loc) · 4.89 KB
/
LaunchCartPole.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
/*
* Copyright (C) 2019 Istituto Italiano di Tecnologia (IIT)
* All rights reserved.
*
* This software may be modified and distributed under the terms of the
* GNU Lesser General Public License v2.1 or any later version.
*/
#include "gympp/Common.h"
#include "gympp/Environment.h"
#include "gympp/GymFactory.h"
#include "gympp/Log.h"
#include "gympp/PluginDatabase.h"
#include "gympp/Space.h"
#include "clara.hpp"
#include <ignition/common/SignalHandler.hh>
#include <cassert>
#include <chrono>
#include <cstdlib>
#include <iostream>
#include <memory>
#include <optional>
#include <string>
#include <thread>
#include <utility>
#include <vector>
using namespace gympp;
using namespace clara;
struct Config
{
bool help = false;
bool gui = false;
std::optional<size_t> seed;
};
int main(int argc, char* argv[])
{
// ==================
// PARSE COMMAND LINE
// ==================
Config config;
// Create the command line parser
auto cli = Help(config.help) | Opt(config.gui)["-g"]["--gui"]("render the environment")
| Opt([&](unsigned value) { config.seed = value; },
"seed")["-s"]["--seed"]("use a specific seed for randomness");
// Parse the command line
if (auto result = cli.parse(Args(argc, argv)); !result) {
gymppError << "Error in command line: " << result.errorMessage() << std::endl;
exit(EXIT_FAILURE);
}
if (config.help) {
std::cout << cli;
exit(EXIT_SUCCESS);
}
// ==========================
// INITIALIZE THE ENVIRONMENT
// ==========================
// Create the environment
auto env = GymFactory::Instance()->make("CartPole");
if (!env) {
gymppError << "Failed to load the CartPole environment" << std::endl;
return EXIT_FAILURE;
}
// Initialize the signal handler
ignition::common::SignalHandler sigHandler;
assert(sigHandler.Initialized());
sigHandler.AddCallback([&](const int /*_sig*/) {
gymppDebug << "Shutting down gracefully" << std::endl;
env.reset();
exit(EXIT_FAILURE);
});
// Initialize the seed
if (config.seed) {
env->seed(config.seed.value());
}
// Reset the environment
auto reward = Environment::Reward(0);
auto observation = env->reset();
if (!observation) {
gymppError << "Failed to retrieve the initial observation" << std::endl;
return EXIT_FAILURE;
}
// Create the initial state object
Environment::State oldState;
oldState.done = false;
oldState.observation = observation.value();
// Render the environment
if (config.gui && !env->render(Environment::RenderMode::HUMAN)) {
gymppError << "Failed to render the environment" << std::endl;
return EXIT_FAILURE;
}
// ===============
// SIMULATION LOOP
// ===============
size_t epoch = 1;
size_t iteration = 0;
while (epoch <= 100) {
iteration++;
// Process oldState to obtain the action.
// Here we use a random action to bypass it.
auto actionSample = env->action_space->sample();
// Simulate the system with the given action
auto state = env->step(actionSample);
if (!state) {
gymppError << "The environment didn't return the state" << std::endl;
return EXIT_FAILURE;
}
// Print the observation
if (auto* o = state->observation.getBuffer<double>(); o) {
std::cout << "#" << epoch << "." << iteration << "\t";
std::cout << "[" << *actionSample.get<int>(0) << "]\t";
for (const auto el : *o) {
std::cout.setf(std::ios::fixed);
std::cout.precision(6);
std::cout << el << "\t";
}
std::cout << std::endl << std::flush;
}
else {
gymppError << "The environment didn't return the observation" << std::endl;
return EXIT_FAILURE;
}
// Cumulate the reward
reward += state->reward;
// Save the old state
oldState = std::move(state.value());
// Handle termination
if (state->done) {
gymppDebug << "The environment reached the terminal state" << std::endl;
std::this_thread::sleep_for(std::chrono::milliseconds(50));
// Reset the environment
auto newObservation = env->reset();
if (!newObservation) {
gymppError << "Failed to reset the environment" << std::endl;
return EXIT_FAILURE;
}
else {
epoch++;
iteration = 0;
oldState.observation = newObservation.value();
oldState.reward = 0;
oldState.done = false;
gymppDebug << "Resetting the environment" << std::endl;
continue;
}
}
}
return EXIT_SUCCESS;
}