-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMyPandaRobotEnvs.py
129 lines (108 loc) · 4.35 KB
/
MyPandaRobotEnvs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import numpy as np
from panda_gym.envs.core import RobotTaskEnv
from panda_gym.envs.core import Task
from panda_gym.envs.core import RobotTaskEnv
from panda_gym.pybullet import PyBullet
from panda_gym.envs.robots.panda import Panda
from panda_gym.utils import distance
from gymnasium.envs.registration import register
class MyReachTargetTask(Task):
def __init__(
self,
sim,
get_ee_position,
reward_type="sparse",
distance_threshold=0.05,
goal_range=0.3,
) -> None:
super(MyReachTargetTask, self).__init__(sim)
self.reward_type = reward_type # "sparse" or "dense"
self.distance_threshold = distance_threshold
self.get_ee_position = get_ee_position
self.goal_range_low = np.array([-goal_range / 2, -goal_range / 2, 0])
self.goal_range_high = np.array([goal_range / 2, goal_range / 2, goal_range])
with self.sim.no_rendering():
self._create_scene()
def _create_scene(self) -> None:
self.sim.create_plane(z_offset=-0.4) # * create a plane
self.sim.create_table(length=1.1, width=0.7, height=0.4, x_offset=-0.3) # * create a table
# * create a target sphere
self.sim.create_sphere(
body_name="target",
radius=0.02,
mass=0.0,
ghost=True,
position=np.array([0.1, 0.1, 0.1]),
rgba_color=np.array([0.1, 0.9, 0.1, 0.3]),
)
def get_obs(self) -> np.ndarray:
# * in this function, we can return some task-specific observations
return np.array([])
def get_achieved_goal(self) -> np.ndarray:
# * return the current ee position, the achieved position.
achieved_goal = np.array(self.get_ee_position())
return achieved_goal
def reset(self) -> None:
self.goal = self._sample_goals()
self.sim.set_base_pose("target", self.goal[:3], np.array([0.0, 0.0, 0.0, 1.0]))
def _sample_goals(self) -> np.ndarray:
# + this function is used to sample a goal, usually randomly.
return np.array([0.1, 0.1, 0.1], dtype=np.float32)
def is_success(self, achieved_goal: np.ndarray, desired_goal: np.ndarray) -> np.ndarray:
d = distance(achieved_goal[:3], desired_goal[:3])
if d < self.distance_threshold:
return np.array(True)
else:
return np.array(False)
def compute_reward(self, achieved_goal, desired_goal, info) -> np.ndarray:
if self.reward_type == "sparse":
# * for the "sparse" rewards, give a reward only when the final goal is completed
if self.is_success():
return np.array(1, dtype=np.float32)
else:
return np.array(0, dtype=np.float32)
else:
# * for the "dense" rewards, give the reward to assess the distance
d = distance(achieved_goal[:3], desired_goal[:3])
return -d.astype(np.float32)
class MyReachTargetEnv(RobotTaskEnv):
def __init__(
self,
render_mode: str = "rgb_array",
reward_type: str = "sparse",
control_type: str = "ee",
renderer: str = "Tiny",
render_width: int = 720,
render_height: int = 480,
render_target_position=None,
render_distance: float = 1.4,
render_yaw: float = 45,
render_pitch: float = -30,
render_roll: float = 0,
) -> None:
sim = PyBullet(render_mode=render_mode, renderer=renderer)
robot = Panda(sim, block_gripper=True, base_position=np.array([-0.6, 0.0, 0.0]), control_type=control_type)
task = MyReachTargetTask(sim, reward_type=reward_type, get_ee_position=robot.get_ee_position)
super().__init__(
robot,
task,
render_width=render_width,
render_height=render_height,
render_target_position=render_target_position,
render_distance=render_distance,
render_yaw=render_yaw,
render_pitch=render_pitch,
render_roll=render_roll,
)
register(
id="My-Panda-Reach-Target-Sparse",
entry_point="Envs.PandaRobot.Envs:MyReachTargetEnv",
kwargs={"reward_type": "sparse"},
max_episode_steps=200,
)
register(
id="My-Panda-Reach-Target-Dense",
entry_point="Envs.PandaRobot.Envs:MyReachTargetEnv",
kwargs={"reward_type": "dense"},
max_episode_steps=200,
)