Skip to content

Commit

Permalink
fix(zjow): fix typo for QAC class (#729)
Browse files Browse the repository at this point in the history
  • Loading branch information
zjowowen authored Sep 18, 2023
1 parent def473f commit f131c36
Show file tree
Hide file tree
Showing 19 changed files with 42 additions and 42 deletions.
4 changes: 2 additions & 2 deletions ding/bonus/ddpg.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from ding.utils import set_pkg_seed
from ding.utils import get_env_fps, render
from ding.config import save_config_py, compile_config
from ding.model import QAC
from ding.model import ContinuousQAC
from ding.data import DequeBuffer
from ding.bonus.common import TrainingReturn, EvalReturn
from ding.config.example.DDPG import supported_env_cfg
Expand Down Expand Up @@ -74,7 +74,7 @@ def __init__(
os.makedirs(self.exp_name)
save_config_py(self.cfg, os.path.join(self.exp_name, 'policy_config.py'))
if model is None:
model = QAC(**self.cfg.policy.model)
model = ContinuousQAC(**self.cfg.policy.model)
self.buffer_ = DequeBuffer(size=self.cfg.policy.other.replay_buffer.replay_buffer_size)
self.policy = DDPGPolicy(self.cfg.policy, model=model)
if policy_state_dict is not None:
Expand Down
4 changes: 2 additions & 2 deletions ding/bonus/sac.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from ding.utils import set_pkg_seed
from ding.utils import get_env_fps, render
from ding.config import save_config_py, compile_config
from ding.model import QAC
from ding.model import ContinuousQAC
from ding.model import model_wrap
from ding.data import DequeBuffer
from ding.bonus.common import TrainingReturn, EvalReturn
Expand Down Expand Up @@ -75,7 +75,7 @@ def __init__(
os.makedirs(self.exp_name)
save_config_py(self.cfg, os.path.join(self.exp_name, 'policy_config.py'))
if model is None:
model = QAC(**self.cfg.policy.model)
model = ContinuousQAC(**self.cfg.policy.model)
self.buffer_ = DequeBuffer(size=self.cfg.policy.other.replay_buffer.replay_buffer_size)
self.policy = SACPolicy(self.cfg.policy, model=model)
if policy_state_dict is not None:
Expand Down
4 changes: 2 additions & 2 deletions ding/bonus/td3.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from ding.utils import set_pkg_seed
from ding.utils import get_env_fps, render
from ding.config import save_config_py, compile_config
from ding.model import QAC
from ding.model import ContinuousQAC
from ding.data import DequeBuffer
from ding.bonus.common import TrainingReturn, EvalReturn
from ding.config.example.TD3 import supported_env_cfg
Expand Down Expand Up @@ -74,7 +74,7 @@ def __init__(
os.makedirs(self.exp_name)
save_config_py(self.cfg, os.path.join(self.exp_name, 'policy_config.py'))
if model is None:
model = QAC(**self.cfg.policy.model)
model = ContinuousQAC(**self.cfg.policy.model)
self.buffer_ = DequeBuffer(size=self.cfg.policy.other.replay_buffer.replay_buffer_size)
self.policy = TD3Policy(self.cfg.policy, model=model)
if policy_state_dict is not None:
Expand Down
4 changes: 2 additions & 2 deletions ding/example/collect_demo_data.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import gym
from ditk import logging
import torch
from ding.model import QAC
from ding.model import ContinuousQAC
from ding.policy import SACPolicy
from ding.envs import DingEnvWrapper, BaseEnvManagerV2
from ding.data import offline_data_save_type
Expand All @@ -22,7 +22,7 @@ def main():

set_pkg_seed(cfg.seed, use_cuda=cfg.policy.cuda)

model = QAC(**cfg.policy.model)
model = ContinuousQAC(**cfg.policy.model)
policy = SACPolicy(cfg.policy, model=model, enable_field=['collect'])
state_dict = torch.load(cfg.policy.collect.state_dict_path, map_location='cpu')
policy.collect_mode.load_state_dict(state_dict)
Expand Down
4 changes: 2 additions & 2 deletions ding/example/ddpg.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import gym
from ditk import logging
from ding.model.template.qac import QAC
from ding.model.template.qac import ContinuousQAC
from ding.policy import DDPGPolicy
from ding.envs import DingEnvWrapper, BaseEnvManagerV2
from ding.data import DequeBuffer
Expand All @@ -27,7 +27,7 @@ def main():

set_pkg_seed(cfg.seed, use_cuda=cfg.policy.cuda)

model = QAC(**cfg.policy.model)
model = ContinuousQAC(**cfg.policy.model)
buffer_ = DequeBuffer(size=cfg.policy.other.replay_buffer.replay_buffer_size)
policy = DDPGPolicy(cfg.policy, model=model)

Expand Down
4 changes: 2 additions & 2 deletions ding/example/sac.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from ditk import logging
from ding.model import QAC
from ding.model import ContinuousQAC
from ding.policy import SACPolicy
from ding.envs import BaseEnvManagerV2
from ding.data import DequeBuffer
Expand Down Expand Up @@ -27,7 +27,7 @@ def main():

set_pkg_seed(cfg.seed, use_cuda=cfg.policy.cuda)

model = QAC(**cfg.policy.model)
model = ContinuousQAC(**cfg.policy.model)
buffer_ = DequeBuffer(size=cfg.policy.other.replay_buffer.replay_buffer_size)
policy = SACPolicy(cfg.policy, model=model)

Expand Down
6 changes: 3 additions & 3 deletions ding/example/sqil_continuous.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from ditk import logging
import torch
from ding.model import QAC
from ding.model import ContinuousQAC
from ding.policy import SQILSACPolicy
from ding.envs import BaseEnvManagerV2
from ding.data import DequeBuffer
Expand Down Expand Up @@ -35,8 +35,8 @@ def main():

set_pkg_seed(cfg.seed, use_cuda=cfg.policy.cuda)

model = QAC(**cfg.policy.model)
expert_model = QAC(**cfg.policy.model)
model = ContinuousQAC(**cfg.policy.model)
expert_model = ContinuousQAC(**cfg.policy.model)

buffer_ = DequeBuffer(size=cfg.policy.other.replay_buffer.replay_buffer_size)
expert_buffer = DequeBuffer(size=cfg.policy.other.replay_buffer.replay_buffer_size)
Expand Down
4 changes: 2 additions & 2 deletions ding/example/td3.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from ditk import logging
from ding.model import QAC
from ding.model import ContinuousQAC
from ding.policy import TD3Policy
from ding.envs import BaseEnvManagerV2
from ding.data import DequeBuffer
Expand Down Expand Up @@ -27,7 +27,7 @@ def main():

set_pkg_seed(cfg.seed, use_cuda=cfg.policy.cuda)

model = QAC(**cfg.policy.model)
model = ContinuousQAC(**cfg.policy.model)
buffer_ = DequeBuffer(size=cfg.policy.other.replay_buffer.replay_buffer_size)
policy = TD3Policy(cfg.policy, model=model)

Expand Down
12 changes: 6 additions & 6 deletions ding/model/template/maqac.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,12 +145,12 @@ def compute_actor(self, inputs: Dict) -> Dict:
- q_value (:obj:`torch.FloatTensor`): :math:`(B, )`, B is batch size.
Examples:
>>> # Regression mode
>>> model = QAC(64, 64, 'regression')
>>> model = DiscreteQAC(64, 64, 'regression')
>>> inputs = torch.randn(4, 64)
>>> actor_outputs = model(inputs,'compute_actor')
>>> assert actor_outputs['action'].shape == torch.Size([4, 64])
>>> # Reparameterization Mode
>>> model = QAC(64, 64, 'reparameterization')
>>> model = DiscreteQAC(64, 64, 'reparameterization')
>>> inputs = torch.randn(4, 64)
>>> actor_outputs = model(inputs,'compute_actor')
>>> actor_outputs['logit'][0].shape # mu
Expand Down Expand Up @@ -329,12 +329,12 @@ def forward(self, inputs: Union[torch.Tensor, Dict], mode: str) -> Dict:
Actor Examples:
>>> # Regression mode
>>> model = QAC(64, 64, 'regression')
>>> model = ContinuousQAC(64, 64, 'regression')
>>> inputs = torch.randn(4, 64)
>>> actor_outputs = model(inputs,'compute_actor')
>>> assert actor_outputs['action'].shape == torch.Size([4, 64])
>>> # Reparameterization Mode
>>> model = QAC(64, 64, 'reparameterization')
>>> model = ContinuousQAC(64, 64, 'reparameterization')
>>> inputs = torch.randn(4, 64)
>>> actor_outputs = model(inputs,'compute_actor')
>>> actor_outputs['logit'][0].shape # mu
Expand Down Expand Up @@ -376,12 +376,12 @@ def compute_actor(self, inputs: Dict) -> Dict:
(action_args are continuous real value)
Examples:
>>> # Regression mode
>>> model = QAC(64, 64, 'regression')
>>> model = ContinuousQAC(64, 64, 'regression')
>>> inputs = torch.randn(4, 64)
>>> actor_outputs = model(inputs,'compute_actor')
>>> assert actor_outputs['action'].shape == torch.Size([4, 64])
>>> # Reparameterization Mode
>>> model = QAC(64, 64, 'reparameterization')
>>> model = ContinuousQAC(64, 64, 'reparameterization')
>>> inputs = torch.randn(4, 64)
>>> actor_outputs = model(inputs,'compute_actor')
>>> actor_outputs['logit'][0].shape # mu
Expand Down
4 changes: 2 additions & 2 deletions dizoo/classic_control/pendulum/entry/pendulum_ddpg_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from ding.worker import BaseLearner, SampleSerialCollector, InteractionSerialEvaluator, AdvancedReplayBuffer
from ding.envs import BaseEnvManager, DingEnvWrapper
from ding.policy import DDPGPolicy
from ding.model import QAC
from ding.model import ContinuousQAC
from ding.utils import set_pkg_seed
from dizoo.classic_control.pendulum.envs import PendulumEnv
from dizoo.classic_control.pendulum.config.pendulum_ddpg_config import pendulum_ddpg_config
Expand Down Expand Up @@ -50,7 +50,7 @@ def main(cfg, seed=0):
set_pkg_seed(seed, use_cuda=cfg.policy.cuda)

# Set up RL Policy
model = QAC(**cfg.policy.model)
model = ContinuousQAC(**cfg.policy.model)
policy = DDPGPolicy(cfg.policy, model=model)

# Set up collection, training and evaluation utilities
Expand Down
4 changes: 2 additions & 2 deletions dizoo/classic_control/pendulum/entry/pendulum_td3_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from ding.worker import BaseLearner, SampleSerialCollector, InteractionSerialEvaluator, AdvancedReplayBuffer
from ding.envs import BaseEnvManager, DingEnvWrapper
from ding.policy import DDPGPolicy
from ding.model import QAC
from ding.model import ContinuousQAC
from ding.utils import set_pkg_seed
from dizoo.classic_control.pendulum.envs import PendulumEnv
from dizoo.classic_control.pendulum.config.pendulum_td3_config import pendulum_td3_config
Expand Down Expand Up @@ -40,7 +40,7 @@ def main(cfg, seed=0):
set_pkg_seed(seed, use_cuda=cfg.policy.cuda)

# Set up RL Policy
model = QAC(**cfg.policy.model)
model = ContinuousQAC(**cfg.policy.model)
policy = DDPGPolicy(cfg.policy, model=model)
# lr_scheduler demo
lr_scheduler = LambdaLR(
Expand Down
4 changes: 2 additions & 2 deletions dizoo/dmc2gym/entry/dmc2gym_sac_pixel_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from ditk import logging
import os
import numpy as np
from ding.model.template.qac import QAC
from ding.model.template.qac import ContinuousQAC
from ding.policy import SACPolicy
from ding.envs import BaseEnvManagerV2
from ding.data import DequeBuffer
Expand Down Expand Up @@ -36,7 +36,7 @@ def main():

set_pkg_seed(cfg.seed, use_cuda=cfg.policy.cuda)

model = QAC(**cfg.policy.model)
model = ContinuousQAC(**cfg.policy.model)
logging.info(model)
buffer_ = DequeBuffer(size=cfg.policy.other.replay_buffer.replay_buffer_size)
policy = SACPolicy(cfg.policy, model=model)
Expand Down
4 changes: 2 additions & 2 deletions dizoo/dmc2gym/entry/dmc2gym_sac_state_main.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from ditk import logging
from ding.model import QAC
from ding.model import ContinuousQAC
from ding.policy import SACPolicy
from ding.envs import BaseEnvManagerV2
from ding.data import DequeBuffer
Expand Down Expand Up @@ -36,7 +36,7 @@ def main():

set_pkg_seed(cfg.seed, use_cuda=cfg.policy.cuda)

model = QAC(**cfg.policy.model)
model = ContinuousQAC(**cfg.policy.model)
buffer_ = DequeBuffer(size=cfg.policy.other.replay_buffer.replay_buffer_size)
policy = SACPolicy(cfg.policy, model=model)

Expand Down
4 changes: 2 additions & 2 deletions dizoo/gym_hybrid/entry/gym_hybrid_ddpg_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from ding.envs import BaseEnvManager, DingEnvWrapper
from ding.envs import get_vec_env_setting
from ding.policy import DDPGPolicy
from ding.model import QAC
from ding.model import ContinuousQAC
from ding.utils import set_pkg_seed
from ding.rl_utils import get_epsilon_greedy_fn
from dizoo.gym_hybrid.config.gym_hybrid_ddpg_config import gym_hybrid_ddpg_config, gym_hybrid_ddpg_create_config
Expand All @@ -33,7 +33,7 @@ def main(main_cfg, create_cfg, seed=0):
set_pkg_seed(seed, use_cuda=cfg.policy.cuda)

# Set up RL Policy
model = QAC(**cfg.policy.model)
model = ContinuousQAC(**cfg.policy.model)
policy = DDPGPolicy(cfg.policy, model=model)
policy.eval_mode.load_state_dict(torch.load(cfg.policy.load_path, map_location='cpu'))

Expand Down
4 changes: 2 additions & 2 deletions dizoo/gym_hybrid/entry/gym_hybrid_ddpg_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from ding.worker import BaseLearner, SampleSerialCollector, InteractionSerialEvaluator, AdvancedReplayBuffer
from ding.envs import BaseEnvManager
from ding.policy import DDPGPolicy
from ding.model import QAC
from ding.model import ContinuousQAC
from ding.utils import set_pkg_seed
from ding.rl_utils import get_epsilon_greedy_fn
from dizoo.gym_hybrid.envs.gym_hybrid_env import GymHybridEnv
Expand Down Expand Up @@ -43,7 +43,7 @@ def main(cfg, seed=0):
set_pkg_seed(seed, use_cuda=cfg.policy.cuda)

# Set up RL Policy
model = QAC(**cfg.policy.model)
model = ContinuousQAC(**cfg.policy.model)
policy = DDPGPolicy(cfg.policy, model=model)

# Set up collection, training and evaluation utilities
Expand Down
2 changes: 1 addition & 1 deletion dizoo/metadrive/config/metadrive_onppo_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import gym
from ding.envs import BaseEnvManager, SyncSubprocessEnvManager
from ding.config import compile_config
from ding.model.template import QAC, VAC
from ding.model.template import ContinuousQAC, VAC
from ding.policy import PPOPolicy
from ding.worker import SampleSerialCollector, InteractionSerialEvaluator, BaseLearner
from dizoo.metadrive.env.drive_env import MetaDrivePPOOriginEnv
Expand Down
4 changes: 2 additions & 2 deletions dizoo/mujoco/entry/mujoco_ddpg_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from ding.envs import BaseEnvManager
from ding.envs import get_vec_env_setting, create_env_manager
from ding.policy import DDPGPolicy
from ding.model import QAC
from ding.model import ContinuousQAC
from ding.utils import set_pkg_seed
from ding.rl_utils import get_epsilon_greedy_fn
from dizoo.mujoco.envs.mujoco_env import MujocoEnv
Expand Down Expand Up @@ -41,7 +41,7 @@ def main(main_cfg, seed=0):
set_pkg_seed(seed, use_cuda=cfg.policy.cuda)

# Set up RL Policy
model = QAC(**cfg.policy.model)
model = ContinuousQAC(**cfg.policy.model)
policy = DDPGPolicy(cfg.policy, model=model)
policy.eval_mode.load_state_dict(torch.load(cfg.policy.load_path, map_location='cpu'))

Expand Down
4 changes: 2 additions & 2 deletions dizoo/mujoco/entry/mujoco_ddpg_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from ding.worker import BaseLearner, SampleSerialCollector, InteractionSerialEvaluator, AdvancedReplayBuffer
from ding.envs import BaseEnvManager, DingEnvWrapper
from ding.policy import DDPGPolicy
from ding.model import QAC
from ding.model import ContinuousQAC
from ding.utils import set_pkg_seed
from dizoo.mujoco.envs.mujoco_env import MujocoEnv
from dizoo.mujoco.config.hopper_ddpg_config import hopper_ddpg_config
Expand Down Expand Up @@ -37,7 +37,7 @@ def main(cfg, seed=0, max_iterations=int(1e10)):
evaluator_env.seed(seed, dynamic_seed=False)
set_pkg_seed(seed, use_cuda=cfg.policy.cuda)

model = QAC(**cfg.policy.model)
model = ContinuousQAC(**cfg.policy.model)
policy = DDPGPolicy(cfg.policy, model=model)
tb_logger = SummaryWriter(os.path.join('./log/', 'serial'))
learner = BaseLearner(cfg.policy.learn.learner, policy.learn_mode, tb_logger)
Expand Down
4 changes: 2 additions & 2 deletions dizoo/mujoco/example/mujoco_sac.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from ditk import logging
from ding.model import QAC
from ding.model import ContinuousQAC
from ding.policy import SACPolicy
from ding.envs import DingEnvWrapper, SubprocessEnvManagerV2
from ding.data import DequeBuffer
Expand All @@ -26,7 +26,7 @@ def main():

set_pkg_seed(cfg.seed, use_cuda=cfg.policy.cuda)

model = QAC(**cfg.policy.model)
model = ContinuousQAC(**cfg.policy.model)
buffer_ = DequeBuffer(size=cfg.policy.other.replay_buffer.replay_buffer_size)
policy = SACPolicy(cfg.policy, model=model)

Expand Down

0 comments on commit f131c36

Please sign in to comment.