forked from stratosphereips/NetSecGameAgents
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrandom_agent.py
244 lines (216 loc) · 12.8 KB
/
random_agent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
# Author: Sebastian Garcia. [email protected]
# This agents just randomnly blocks IP.
import sys
import logging
import os
from random import choice
import argparse
from random import uniform
import numpy as np
import time
import mlflow
from os import path
# importing agent utils and base agent
sys.path.append(path.dirname(path.dirname(path.dirname(path.abspath(__file__) ))))
# with the path fixed, we can import now
from AIDojoCoordinator.game_components import Action, Observation, ActionType
from base_agent import BaseAgent
from agent_utils import generate_valid_actions
class RandomDefenderAgent(BaseAgent):
def __init__(self, host:str, port:int, role:str, allowed_actions:list, apm_limit:int=None) -> None:
super().__init__(host, port, role)
self._allowed_actions = allowed_actions
self._apm_limit = apm_limit
if self._apm_limit:
self.interval = 60/apm_limit
else:
self.interval = 0
def play_game(self, observation, num_episodes=1):
"""
The main function for the gameplay. Handles agent registration and the main interaction loop.
"""
returns = []
num_steps = 0
for episode in range(num_episodes):
self._logger.info(f"Playing episode {episode}")
episodic_returns = []
start_time = time.time()
while observation and not observation.end:
num_steps += 1
self._logger.debug(f'Observation received:{observation}')
# Store return in episode
episodic_returns.append(observation.reward)
# select the action randomly
action = self.select_action(observation)
observation = self.make_step(action)
# To return
last_observation = observation
if self._apm_limit:
elapsed_time = time.time() - start_time
remaining_time = self.interval - elapsed_time
if remaining_time > 0:
# Add randomness to the interval by multiplying it with a random factor
randomized_interval = max(0, remaining_time *uniform(-1, 5))
self._logger.debug(f"Waiting for {randomized_interval}s before next action")
time.sleep(randomized_interval)
start_time = time.time()
self._logger.debug(f'Observation received:{observation}')
returns.append(np.sum(episodic_returns))
self._logger.info(f"Episode {episode} ended with return{np.sum(episodic_returns)}. Mean returns={np.mean(returns)}±{np.std(returns)}")
# Reset the episode
observation = self.request_game_reset()
self._logger.info(f"Final results for {self.__class__.__name__} after {num_episodes} episodes: {np.mean(returns)}±{np.std(returns)}")
return (last_observation, num_steps)
def select_action(self, observation:Observation)->Action:
valid_actions = generate_valid_actions(observation.state)
# filter actions based on the allowed action types
allowed_actions = filter(lambda action: action.type in self._allowed_actions, valid_actions)
allowed_actions = [a for a in allowed_actions] + [Action(ActionType.ResetGame, parameters={})]
action = choice([a for a in allowed_actions])
return action
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--host", help="Host where the game server is", default="127.0.0.1", action='store', required=False)
parser.add_argument("--port", help="Port where the game server is", default=9000, type=int, action='store', required=False)
parser.add_argument("--episodes", help="Sets number of testing episodes", default=1, type=int)
parser.add_argument("--test_each", help="Evaluate performance during testing every this number of episodes.", default=10, type=int)
parser.add_argument("--logdir", help="Folder to store logs", default=os.path.join(os.path.dirname(os.path.abspath(__file__)), "logs"))
parser.add_argument("--apm", help="Actions per minute", default=1000000, type=int, required=False)
parser.add_argument("--evaluate", help="Evaluate the agent and report, instead of playing the game only once.", default=True)
parser.add_argument("--mlflow_url", help="URL for mlflow tracking server. If not provided, mlflow will store locally.", default=None)
args = parser.parse_args()
if not os.path.exists(args.logdir):
os.makedirs(args.logdir)
logging.basicConfig(filename=os.path.join(args.logdir, "defender_random_agent.log"), filemode='w', format='%(asctime)s %(name)s %(levelname)s %(message)s', datefmt='%H:%M:%S',level=logging.DEBUG)
# Create agent
agent = RandomDefenderAgent(args.host, args.port, "Defender", allowed_actions=[ActionType.FindData, ActionType.ExfiltrateData, ActionType.FindServices, ActionType.BlockIP], apm_limit=args.apm)
print("Agent created. Starting interaction with the game server...")
if not args.evaluate:
# Play the normal game
observation = agent.register()
print("Agent registered. Starting to play the game...")
agent.play_game(observation, args.episodes)
agent._logger.info("Terminating interaction")
agent.terminate_connection()
else:
# Evaluate the agent performance
# How it works:
# - Evaluate for several 'episodes' (parameter)
# - Each episode finishes with: steps played, return, win/lose. Store all
# - Each episode compute the avg and std of all.
# - Every X episodes (parameter), report in log and mlflow
# - At the end, report in log and mlflow and console
# Mlflow experiment name
experiment_name = "Evaluation of Random Defender Agent"
if args.mlflow_url:
mlflow.set_tracking_uri(args.mlflow_url)
mlflow.set_experiment(experiment_name)
# Register in the game
observation = agent.register()
print("Agent registered. Starting evaluation...")
with mlflow.start_run(run_name=experiment_name) as run:
# To keep statistics of each episode
wins = 0
detected = 0
max_steps = 0
num_win_steps = []
num_detected_steps = []
num_max_steps_steps = []
num_detected_returns = []
num_win_returns = []
num_max_steps_returns = []
# Log more things in Mlflow
mlflow.set_tag("experiment_name", experiment_name)
# Log notes or additional information
mlflow.set_tag("notes", "This is an evaluation")
mlflow.set_tag("episode_number", args.episodes)
#mlflow.log_param("learning_rate", learning_rate)
for episode in range(1, args.episodes + 1):
agent.logger.info(f'Starting the testing for episode {episode}')
print(f'Starting the testing for episode {episode}')
# Play the game for one episode
observation, num_steps = agent.play_game(observation, 1)
state = observation.state
reward = observation.reward
end = observation.end
info = observation.info
if observation.info and observation.info['end_reason'] == 'blocked':
detected +=1
num_detected_steps += [num_steps]
num_detected_returns += [reward]
elif observation.info and observation.info['end_reason'] == 'goal_reached':
wins += 1
num_win_steps += [num_steps]
num_win_returns += [reward]
elif observation.info and observation.info['end_reason'] == 'max_steps':
max_steps += 1
num_max_steps_steps += [num_steps]
num_max_steps_returns += [reward]
# Reset the game
observation = agent.request_game_reset()
eval_win_rate = (wins/episode) * 100
eval_detection_rate = (detected/episode) * 100
eval_average_returns = np.mean(num_detected_returns+num_win_returns+num_max_steps_returns)
eval_std_returns = np.std(num_detected_returns+num_win_returns+num_max_steps_returns)
eval_average_episode_steps = np.mean(num_win_steps+num_detected_steps+num_max_steps_steps)
eval_std_episode_steps = np.std(num_win_steps+num_detected_steps+num_max_steps_steps)
eval_average_win_steps = np.mean(num_win_steps)
eval_std_win_steps = np.std(num_win_steps)
eval_average_detected_steps = np.mean(num_detected_steps)
eval_std_detected_steps = np.std(num_detected_steps)
eval_average_max_steps_steps = np.mean(num_max_steps_steps)
eval_std_max_steps_steps = np.std(num_max_steps_steps)
# Log and report every X episodes
if episode % args.test_each == 0 and episode != 0:
text = f'''Tested after {episode} episodes.
Wins={wins},
Detections={detected},
winrate={eval_win_rate:.3f}%,
detection_rate={eval_detection_rate:.3f}%,
average_returns={eval_average_returns:.3f} +- {eval_std_returns:.3f},
average_episode_steps={eval_average_episode_steps:.3f} +- {eval_std_episode_steps:.3f},
average_win_steps={eval_average_win_steps:.3f} +- {eval_std_win_steps:.3f},
average_detected_steps={eval_average_detected_steps:.3f} +- {eval_std_detected_steps:.3f}
average_max_steps_steps={eval_std_max_steps_steps:.3f} +- {eval_std_max_steps_steps:.3f},
'''
agent.logger.info(text)
print(text)
# Store in mlflow
mlflow.log_metric("eval_avg_win_rate", eval_win_rate, step=episode)
mlflow.log_metric("eval_avg_detection_rate", eval_detection_rate, step=episode)
mlflow.log_metric("eval_avg_returns", eval_average_returns, step=episode)
mlflow.log_metric("eval_std_returns", eval_std_returns, step=episode)
mlflow.log_metric("eval_avg_episode_steps", eval_average_episode_steps, step=episode)
mlflow.log_metric("eval_std_episode_steps", eval_std_episode_steps, step=episode)
mlflow.log_metric("eval_avg_win_steps", eval_average_win_steps, step=episode)
mlflow.log_metric("eval_std_win_steps", eval_std_win_steps, step=episode)
mlflow.log_metric("eval_avg_detected_steps", eval_average_detected_steps, step=episode)
mlflow.log_metric("eval_std_detected_steps", eval_std_detected_steps, step=episode)
mlflow.log_metric("eval_avg_max_steps_steps", eval_average_max_steps_steps, step=episode)
mlflow.log_metric("eval_std_max_steps_steps", eval_std_max_steps_steps, step=episode)
# Log the last final episode when it ends
text = f'''Episode {episode}. Final eval after {episode} episodes, for {args.episodes} steps.
Wins={wins},
Detections={detected},
winrate={eval_win_rate:.3f}%,
detection_rate={eval_detection_rate:.3f}%,
average_returns={eval_average_returns:.3f} +- {eval_std_returns:.3f},
average_episode_steps={eval_average_episode_steps:.3f} +- {eval_std_episode_steps:.3f},
average_win_steps={eval_average_win_steps:.3f} +- {eval_std_win_steps:.3f},
average_detected_steps={eval_average_detected_steps:.3f} +- {eval_std_detected_steps:.3f}
average_max_steps_steps={eval_std_max_steps_steps:.3f} +- {eval_std_max_steps_steps:.3f},
'''
agent.logger.info(text)
print(text)
agent._logger.info("Terminating interaction")
agent.terminate_connection()
# Print and log the mlflow experiment ID, run ID, and storage location
experiment_id = run.info.experiment_id
run_id = run.info.run_id
storage_location = "locally" if not args.mlflow_url else f"at {args.mlflow_url}"
print(f"MLflow Experiment ID: {experiment_id}")
print(f"MLflow Run ID: {run_id}")
print(f"Experiment saved {storage_location}")
agent._logger.info(f"MLflow Experiment ID: {experiment_id}")
agent._logger.info(f"MLflow Run ID: {run_id}")
agent._logger.info(f"Experiment saved {storage_location}")