-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrl_play.py
87 lines (80 loc) · 5.8 KB
/
rl_play.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import argparse
import numpy as np
import time
from lib.rl_env import RLNetworkEnv
"""Create a reinforcement learning environment based on the network simulator and start a play."""
def getargs():
"""Parse command line arguments."""
args = argparse.ArgumentParser()
args.add_argument('flow_path', help="Path to the input npy files describing flow profiles.")
args.add_argument('route_path', help="Path to the input npy files describing flow routes.")
args.add_argument('--simulation-time', type=float, default=100.0,
help="Total simulation time (in seconds).")
args.add_argument('--scheduling-policy', type=str, default="fifo",
help="Type of scheduler applied at each hop of the network. Choose between 'fifo' and 'sced'.")
args.add_argument('--shaping-mode', type=str, default="pfs",
help="Type of traffic shapers applied. Choose among 'pfs', 'ils', 'is', 'itb',"
"and 'ntb'. Only active when scheduling policy is 'fifo'")
args.add_argument('--buffer-bound', type=str, default='infinite',
help="Link buffer bound. Choose between 'infinite', and 'with_shaping'.")
args.add_argument('--arrival-pattern-type', type=str, default="sync",
help="Type of traffic arrival pattern. Choose between 'sync' and 'async'.")
args.add_argument('--sync-jitter', type=float, default=0,
help="Jitter for synchronized flow burst. Only active when the arrival pattern is 'sync_burst'.")
args.add_argument('--periodic-arrival-ratio', type=float, default=1.0,
help="Percent of flows that have periodic arrival patterns. Non-periodic flows send the maximum"
"amount of traffic throughout the simulation.")
args.add_argument('--awake-prob-choice', type=float, default=[1.0], nargs='+',
help="Choices of probability that periodic flows awake at the beginning of each period.")
args.add_argument('--awake-prob-sample-weight', type=float, default=[1.0], nargs='+',
help="The sampling weight of each choice of flow awake probability.")
args.add_argument('--awake-dur', type=float, default=0.0, help="Length of awake time of periodic flows.")
args.add_argument('--awake-dist', type=str, default="constant",
help="Periodic flow awake time distribution. Choose between 'exponential' and 'constant'.")
args.add_argument('--sleep-dur', type=str, default='max', help="Length of sleep time of periodic flows. Can be set"
"to 'min', 'max', or a number.")
args.add_argument('--sleep-dist', type=str, default="constant",
help="Periodic flow sleep time distribution. Choose between 'uniform' and 'constant'.")
args.add_argument('--pause-interval', type=float, default=1,
help="The length of a time step (in second) for the reinforcement learning environment.")
args.add_argument('--high-reward', type=float, default=1,
help="The highest possible reward received by a flow when its end-to-end delay is 0.")
args.add_argument('--low-reward', type=float, default=0.1,
help="The reward received by a flow when its end-to-end delay is equal to the worst case bound.")
args.add_argument('--penalty', type=float, default=-10,
help="The negative penalty received by a flow when its end-to-end delay exceeds the bound.")
return args.parse_args()
if __name__ == '__main__':
"""An example use case of the reinforcement learning environment for the network simulator."""
# Parse the command line arguments.
args = getargs()
# Load the input data.
flow_profile = np.load(args.flow_path)
flow_route = np.load(args.route_path)
# Assuming full shaping, compute the shaping delay.
shaping_delay = np.minimum(flow_profile[:, 2], flow_profile[:, 1] / flow_profile[:, 0])
# Set the random seed for reproducible results.
np.random.seed(0)
# Create the RL environment. Make your own choice of the input parameters.
environment = RLNetworkEnv(flow_profile, flow_route, shaping_delay, simulation_time=args.simulation_time,
scheduling_policy=args.scheduling_policy, shaping_mode=args.shaping_mode,
buffer_bound=args.buffer_bound, arrival_pattern_type=args.arrival_pattern_type,
sync_jitter=args.sync_jitter, periodic_arrival_ratio=args.periodic_arrival_ratio,
awake_prob_choice=tuple(args.awake_prob_choice),
awake_prob_sample_weight=tuple(args.awake_prob_sample_weight),
awake_dur=args.awake_dur, awake_dist=args.awake_dist, sleep_dur=args.sleep_dur,
sleep_dist=args.sleep_dist, arrival_pattern=None, keep_per_hop_departure=False,
scaling_factor=1.0, packet_size=1, propagation_delay=0,
pause_interval=args.pause_interval, high_reward=args.high_reward,
low_reward=args.low_reward, penalty=args.penalty)
# Initialize the environment and get the initial state.
initial_state = environment.reset()
# Keep iterating until the end of an episode.
while True:
# Specify the shaping control action. Here we assume we always turn on the shapers.
# You may train an RL agent to make the shaping control decision here.
action = np.ones((environment.num_action,), dtype=bool)
next_state, reward, terminate, truncate = environment.step(action)
# End the episode if the simulation terminates or gets truncated.
if terminate or truncate:
break