chore(conda-recipe.yaml): add conda-recipe.yaml for installation

PKU-Alignment · Jul 4, 2024 · 49994a5 · 49994a5
1 parent fe7513c
commit 49994a5
Show file tree

Hide file tree

Showing 6 changed files with 123 additions and 30 deletions.
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
@@ -24,7 +24,6 @@ What types of changes does your code introduce? Put an `x` in all the boxes that
 Go over all the following points, and put an `x` in all the boxes that apply.
 If you are unsure about any of these, don't hesitate to ask. We are here to help!
 
-<!-- - [ ] I have read the [CONTRIBUTION](https://safe-sora.readthedocs.io/en/latest/developer/contributing.html) guide. (**required**) -->
 - [ ] My change requires a change to the documentation.
 - [ ] I have updated the tests accordingly. (*required for a bug fix or a new feature*)
 - [ ] I have updated the documentation accordingly.

diff --git a/conda-recipe.yaml b/conda-recipe.yaml
@@ -0,0 +1,56 @@
+# Copyright 2024 PKU-Alignment Team. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+#
+# Create virtual environment with command:
+#
+#   $ CONDA_OVERRIDE_CUDA=11.8 conda env create --file conda-recipe.yaml
+#
+
+name: safe-sora
+channels:
+  - huggingface
+  - pytorch
+  - nvidia/label/cuda-12.1.0
+  - defaults
+  - conda-forge
+dependencies:
+  - python = 3.11
+  - pip
+
+  - pytorch::pytorch >= 2.0
+  - pytorch::pytorch-mutex =*=*cuda*
+  - pytorch::torchvision
+  - transformers >= 4.42
+  - datasets
+  - tokenizers >= 0.19
+  - sentencepiece
+  - tensorboard
+  - wandb
+  - pip:
+      - accelerate
+      - deepspeed
+      - decord
+      - opencv-python
+
+  - nvidia/label/cuda-12.1.0::cuda-toolkit = 12.1
+
+  - matplotlib-base
+  - rich
+  - tqdm
+  - typing-extensions
+  - bitsandbytes
+  - av
+  - einops
+  - peft
diff --git a/examples/README.md b/examples/README.md
@@ -30,7 +30,12 @@ Furthermore, we utilize these models to evaluate four open-source models on our
 
 ## Training
 
-First, you need to [download our dataset](../README.md#data-access) to local and prepare the training environment.
+First, you need to [download our dataset](../README.md#data-access) to local and prepare the training environment using:
+
+```bash
+conda env create -f conda-recipe.yaml  # mamba env create -f conda-recipe.yaml
+conda activate safe-sora
+```
 
 Then, you need to download the Video-LLaVA model and the MM-MLP adapter from the Hugging Face model hub. For example, you can download them use the following commands:
 

diff --git a/examples/scripts/finetune_reward_model.sh b/examples/scripts/finetune_reward_model.sh
@@ -20,18 +20,39 @@ if [ -z "${BASH_VERSION}" ]; then
     exit 1
 fi
 
-DIMENSION="helpfulness"
-
-IMAGE_FOLDER="./SafeSora/videos"
-VIDEO_FOLDER="./SafeSora/videos"
-
+VIDEO_DIR="./SafeSora/videos"
+TRAIN_DATA_PATH="./SafeSora/config-train.json.gz"
+EVAL_DATA_PATH="./SafeSora/config-test.json.gz"
 MODEL_NAME_OR_PATH="LanguageBind/Video-LLaVA-7B"
 MM_MLP_ADAPTER_PATH="LanguageBind/Video-LLaVA-Pretrain-7B/mm_projector.bin"
+OUTPUT_DIR="./outputs"
+DIMENSION="helpfulness"
 
 while [[ "$#" -gt 0 ]]; do
     arg="$1"
     shift
     case "${arg}" in
+    --video_dir)
+        VIDEO_DIR="$1"
+        shift
+        ;;
+    --video_dir=*)
+        VIDEO_DIR="${arg#*=}"
+        ;;
+    --train_data_path)
+        TRAIN_DATA_PATH="$1"
+        shift
+        ;;
+    --train_data_path=*)
+        TRAIN_DATA_PATH="${arg#*=}"
+        ;;
+    --eval_data_path)
+        EVAL_DATA_PATH="$1"
+        shift
+        ;;
+    --eval_data_path=*)
+        EVAL_DATA_PATH="${arg#*=}"
+        ;;
     --model_name_or_path)
         MODEL_NAME_OR_PATH="$1"
         shift
@@ -72,6 +93,7 @@ if [[ ! "helpfulness harmlessness instruction_following correctness informativen
     exit 1
 fi
 
+IMAGE_DIR="${VIDEO_DIR}"
 RUN_NAME="reward-${DIMENSION}"
 OUTPUT_DIR="${OUTPUT_DIR}/${RUN_NAME}"
 
@@ -99,11 +121,11 @@ deepspeed --master_port="${MASTER_PORT}" examples/reward_model/train_reward.py \
     --version v1 \
     --run_name "${RUN_NAME}" \
     --model_name_or_path "${MODEL_NAME_OR_PATH}" \
-    --train_data_path /data/SafeSora/config-train.json.gz \
-    --eval_data_path /data/SafeSora/config-test.json.gz \
+    --train_data_path "${TRAIN_DATA_PATH}" \
+    --eval_data_path "${EVAL_DATA_PATH}" \
     --preference_dimension "${DIMENSION}" \
-    --image_dir "${IMAGE_FOLDER}" \
-    --video_dir "${VIDEO_FOLDER}" \
+    --image_dir "${IMAGE_DIR}" \
+    --video_dir "${VIDEO_DIR}" \
     --image_tower LanguageBind/LanguageBind_Image \
     --video_tower LanguageBind/LanguageBind_Video_merge \
     --mm_projector_type mlp2x_gelu \
@@ -137,8 +159,7 @@ deepspeed --master_port="${MASTER_PORT}" examples/reward_model/train_reward.py \
     --tokenizer_model_max_length 3072 \
     --gradient_checkpointing True \
     --dataloader_num_workers 4 \
-    --lazy_preprocess True \
     --report_to wandb \
     --bf16 True \
     --tf32 True \
-    --num_frames 3
+    --num_frames 8
diff --git a/pyproject.toml b/pyproject.toml
@@ -38,6 +38,16 @@ dependencies = [
     "transformers",
     "datasets",
     "tokenizers",
+    "accelerate",
+    "deepspeed",
+    "bitsandbytes",
+    "numpy",
+    "sentencepiece",
+    "wandb",
+    "tensorboard",
+    "matplotlib",
+    "tqdm",
+    "rich",
     "av",
 ]
 dynamic = ["version"]

diff --git a/safe_sora/trainers/reward_trainer.py b/safe_sora/trainers/reward_trainer.py
@@ -20,8 +20,10 @@
 
 """Reward Trainer for training the model with the reward signal."""
 
+from __future__ import annotations
+
 import os
-from typing import Any, Dict, Generator, Iterator, List, Optional, Tuple, Union
+from typing import Any, Generator, Iterator
 
 import bitsandbytes
 import torch
@@ -61,9 +63,9 @@ def maybe_zero_3(
 
 
 def get_mm_adapter_state_maybe_zero_3(
-    named_params: List[Tuple[str, torch.Tensor]],
-    keys_to_match: List[str],
-) -> Dict[str, torch.Tensor]:
+    named_params: list[tuple[str, torch.Tensor]],
+    keys_to_match: list[str],
+) -> dict[str, torch.Tensor]:
     """Get the state of the adapter with zero stage 3."""
     to_return = {
         k: t for k, t in named_params if any(key_match in k for key_match in keys_to_match)
@@ -72,10 +74,10 @@ def get_mm_adapter_state_maybe_zero_3(
 
 
 def split_to_even_chunks(
-    indices: List[int],
+    indices: list[int],
     lengths: list[int],
     num_chunks: int,
-) -> List[List[int]]:
+) -> list[list[int]]:
     """Split a list of indices into `chunks` chunks of roughly equal lengths."""
 
     if len(indices) % num_chunks != 0:
@@ -97,11 +99,11 @@ def split_to_even_chunks(
 
 # pylint: disable=too-many-locals
 def get_modality_length_grouped_indices(
-    lengths: List[int],
+    lengths: list[int],
     batch_size: int,
     world_size: int,
     generator: Generator | None = None,
-) -> List[int]:
+) -> list[int]:
     """Get indices grouped by modality and length."""
     # We need to use torch for the random part
     # as a distributed sampler will set the random seed for torch.
@@ -144,11 +146,11 @@ def get_modality_length_grouped_indices(
 
 
 def get_length_grouped_indices(
-    lengths: List[int],
+    lengths: list[int],
     batch_size: int,
     world_size: int,
     generator: Generator | None = None,
-) -> List[int]:
+) -> list[int]:
     """Get indices grouped by length."""
     # We need to use torch for the random part
     # as a distributed sampler will set the random seed for torch.
@@ -178,7 +180,7 @@ def __init__(  # pylint: disable=too-many-arguments,super-init-not-called
         self,
         batch_size: int,
         world_size: int,
-        lengths: Optional[List[int]] = None,
+        lengths: list[int] | None = None,
         generator: Generator | None = None,
         group_by_modality: bool = False,
     ) -> None:
@@ -215,7 +217,7 @@ def __iter__(self) -> Iterator[int]:
 class RewardTrainer(Trainer):
     """Reward Trainer for training the model with the reward signal."""
 
-    def _get_train_sampler(self) -> Optional[torch.utils.data.Sampler]:
+    def _get_train_sampler(self) -> torch.utils.data.Sampler | None:
         if self.train_dataset is None or not has_length(self.train_dataset):
             return None
 
@@ -370,7 +372,7 @@ def _save_checkpoint(self, model: Any, trial: Any, metrics: Any | None = None) -
         else:
             super()._save_checkpoint(model, trial, metrics)  # pylint: disable=no-member
 
-    def _save(self, output_dir: Optional[str] = None, state_dict: bool | None = None) -> None:
+    def _save(self, output_dir: str | None = None, state_dict: bool | None = None) -> None:
         if getattr(self.args, 'tune_mm_mlp_adapter', False):
             pass
         else:
@@ -381,7 +383,7 @@ def compute_loss(
         model: Any,
         inputs: torch.Tensor,
         return_outputs: bool = False,
-    ) -> Union[float, Tuple[float, Any]]:
+    ) -> float | tuple[float, Any]:
         assert inputs['input_ids'].size(0) % 2 == 0, 'Batch size should be even.'
 
         outputs: ScoreModelOutput = model(**inputs)
@@ -401,10 +403,10 @@ def compute_loss(
     def prediction_step(
         self,
         model: nn.Module,
-        inputs: Dict[str, Union[torch.Tensor, Any]],
+        inputs: dict[str, torch.Tensor | Any],
         prediction_loss_only: bool,  # pylint: disable=unused-argument
-        ignore_keys: Optional[List[str]] = None,  # pylint: disable=unused-argument
-    ) -> Tuple[Optional[torch.Tensor], Optional[torch.Tensor], Optional[torch.Tensor]]:
+        ignore_keys: list[str] | None = None,  # pylint: disable=unused-argument
+    ) -> tuple[torch.Tensor | None, torch.Tensor | None, torch.Tensor | None]:
 
         inputs = self._prepare_inputs(inputs)