diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..eb05789 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,7 @@ +[Git Guide](https://di-engine-docs.readthedocs.io/en/latest/24_cooperation/git_guide.html) + +[GitHub Cooperation Guide](https://di-engine-docs.readthedocs.io/en/latest/24_cooperation/issue_pr.html) + + - [Code Style](https://di-engine-docs.readthedocs.io/en/latest/21_code_style/index.html) + - [Unit Test](https://di-engine-docs.readthedocs.io/en/latest/22_test/index.html) + - [Code Review](https://di-engine-docs.readthedocs.io/en/latest/24_cooperation/issue_pr.html#pr-s-code-review) \ No newline at end of file diff --git a/README.md b/README.md index 4b12396..f5a5cf7 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) -English | [简体中文(Simplified Chinese)](https://github.com/opendilab/GenerativeRL_Preview/blob/main/README.zh.md) +English | [简体中文(Simplified Chinese)](https://github.com/opendilab/GenerativeRL/blob/main/README.zh.md) **GenerativeRL**, short for Generative Reinforcement Learning, is a Python library for solving reinforcement learning (RL) problems using generative models, such as diffusion models and flow models. This library aims to provide a framework for combining the power of generative models with the decision-making capabilities of reinforcement learning algorithms. @@ -62,8 +62,8 @@ pip install grl Or, if you want to install from source: ```bash -git clone https://github.com/opendilab/GenerativeRL_Preview.git -cd GenerativeRL_Preview +git clone https://github.com/opendilab/GenerativeRL.git +cd GenerativeRL pip install -e . ``` diff --git a/README.zh.md b/README.zh.md index 903071c..337abec 100644 --- a/README.zh.md +++ b/README.zh.md @@ -2,7 +2,7 @@ [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) -[英语 (English)](https://github.com/opendilab/GenerativeRL_Preview/blob/main/README.md) | 简体中文 +[英语 (English)](https://github.com/opendilab/GenerativeRL/blob/main/README.md) | 简体中文 **GenerativeRL** 是一个使用生成式模型解决强化学习问题的算法库,支持扩散模型和流模型等不同类型的生成式模型。这个库旨在提供一个框架,将生成式模型的能力与强化学习算法的决策能力相结合。 @@ -59,8 +59,8 @@ pip install grl 或者,如果你想从源码安装: ```bash -git clone https://github.com/opendilab/GenerativeRL_Preview.git -cd GenerativeRL_Preview +git clone https://github.com/opendilab/GenerativeRL.git +cd GenerativeRL pip install -e . ``` diff --git a/docs/source/tutorials/installation/index.rst b/docs/source/tutorials/installation/index.rst index ad818cd..b3be7df 100644 --- a/docs/source/tutorials/installation/index.rst +++ b/docs/source/tutorials/installation/index.rst @@ -17,4 +17,4 @@ If you want to try a preview of the latest features, you can install the latest .. code-block:: console - $ pip install git+https://github.com/opendilab/GenerativeRL_Preview.git + $ pip install git+https://github.com/opendilab/GenerativeRL.git diff --git a/grl/algorithms/srpo.py b/grl/algorithms/srpo.py index ba71306..f39c900 100644 --- a/grl/algorithms/srpo.py +++ b/grl/algorithms/srpo.py @@ -385,22 +385,11 @@ def policy(obs: np.ndarray) -> np.ndarray: lr=config.parameter.behaviour_policy.learning_rate, ) - # checkpoint = torch.load( - # "/root/github/GenerativeRL_Preview/grl_pipelines/d4rl-halfcheetah-srpo/2024-04-17 06:22:21/checkpoint_diffusion_600000.pt" - # ) - # self.model["SRPOPolicy"].sro.diffusion_model.model.load_state_dict( - # checkpoint["diffusion_model"] - # ) - # behaviour_model_optimizer.load_state_dict( - # checkpoint["behaviour_model_optimizer"] - # ) - for train_diffusion_iter in track( range(config.parameter.behaviour_policy.iterations), description="Behaviour policy training", ): data = next(data_generator) - # data["s"].shape torch.Size([2048, 17]) data["a"].shape torch.Size([2048, 6]) data["r"].shape torch.Size([2048, 1]) behaviour_model_training_loss = self.model[ "SRPOPolicy" ].behaviour_policy_loss(data["a"], data["s"]) @@ -408,10 +397,6 @@ def policy(obs: np.ndarray) -> np.ndarray: behaviour_model_training_loss.backward() behaviour_model_optimizer.step() - # if train_iter == 0 or (train_iter + 1) % config.parameter.evaluation.evaluation_interval == 0: - # evaluation_results = evaluate(self.model["SRPOPolicy"], train_iter=train_iter) - # wandb_run.log(data=evaluation_results, commit=False) - wandb_run.log( data=dict( train_diffusion_iter=train_diffusion_iter, @@ -444,11 +429,6 @@ def policy(obs: np.ndarray) -> np.ndarray: lr=config.parameter.critic.learning_rate, ) - # checkpoint = torch.load( - # "/root/github/GenerativeRL_Preview/grl_pipelines/d4rl-halfcheetah-srpo/2024-04-17 06:22:21/checkpoint_critic_600000.pt" - # ) - # self.model["SRPOPolicy"].critic.q0.load_state_dict(checkpoint["q_model"]) - # self.model["SRPOPolicy"].critic.vf.load_state_dict(checkpoint["v_model"]) data_generator = get_train_data( DataLoader( self.dataset,