Skip to content

Commit

Permalink
chore(conda-recipe.yaml): add conda-recipe.yaml for installation
Browse files Browse the repository at this point in the history
  • Loading branch information
calico-1226 committed Jul 4, 2024
1 parent fe7513c commit 49994a5
Show file tree
Hide file tree
Showing 6 changed files with 123 additions and 30 deletions.
1 change: 0 additions & 1 deletion .github/PULL_REQUEST_TEMPLATE.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ What types of changes does your code introduce? Put an `x` in all the boxes that
Go over all the following points, and put an `x` in all the boxes that apply.
If you are unsure about any of these, don't hesitate to ask. We are here to help!

<!-- - [ ] I have read the [CONTRIBUTION](https://safe-sora.readthedocs.io/en/latest/developer/contributing.html) guide. (**required**) -->
- [ ] My change requires a change to the documentation.
- [ ] I have updated the tests accordingly. (*required for a bug fix or a new feature*)
- [ ] I have updated the documentation accordingly.
Expand Down
56 changes: 56 additions & 0 deletions conda-recipe.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Copyright 2024 PKU-Alignment Team. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
#
# Create virtual environment with command:
#
# $ CONDA_OVERRIDE_CUDA=11.8 conda env create --file conda-recipe.yaml
#

name: safe-sora
channels:
- huggingface
- pytorch
- nvidia/label/cuda-12.1.0
- defaults
- conda-forge
dependencies:
- python = 3.11
- pip

- pytorch::pytorch >= 2.0
- pytorch::pytorch-mutex =*=*cuda*
- pytorch::torchvision
- transformers >= 4.42
- datasets
- tokenizers >= 0.19
- sentencepiece
- tensorboard
- wandb
- pip:
- accelerate
- deepspeed
- decord
- opencv-python

- nvidia/label/cuda-12.1.0::cuda-toolkit = 12.1

- matplotlib-base
- rich
- tqdm
- typing-extensions
- bitsandbytes
- av
- einops
- peft
7 changes: 6 additions & 1 deletion examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,12 @@ Furthermore, we utilize these models to evaluate four open-source models on our

## Training

First, you need to [download our dataset](../README.md#data-access) to local and prepare the training environment.
First, you need to [download our dataset](../README.md#data-access) to local and prepare the training environment using:

```bash
conda env create -f conda-recipe.yaml # mamba env create -f conda-recipe.yaml
conda activate safe-sora
```

Then, you need to download the Video-LLaVA model and the MM-MLP adapter from the Hugging Face model hub. For example, you can download them use the following commands:

Expand Down
43 changes: 32 additions & 11 deletions examples/scripts/finetune_reward_model.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,39 @@ if [ -z "${BASH_VERSION}" ]; then
exit 1
fi

DIMENSION="helpfulness"

IMAGE_FOLDER="./SafeSora/videos"
VIDEO_FOLDER="./SafeSora/videos"

VIDEO_DIR="./SafeSora/videos"
TRAIN_DATA_PATH="./SafeSora/config-train.json.gz"
EVAL_DATA_PATH="./SafeSora/config-test.json.gz"
MODEL_NAME_OR_PATH="LanguageBind/Video-LLaVA-7B"
MM_MLP_ADAPTER_PATH="LanguageBind/Video-LLaVA-Pretrain-7B/mm_projector.bin"
OUTPUT_DIR="./outputs"
DIMENSION="helpfulness"

while [[ "$#" -gt 0 ]]; do
arg="$1"
shift
case "${arg}" in
--video_dir)
VIDEO_DIR="$1"
shift
;;
--video_dir=*)
VIDEO_DIR="${arg#*=}"
;;
--train_data_path)
TRAIN_DATA_PATH="$1"
shift
;;
--train_data_path=*)
TRAIN_DATA_PATH="${arg#*=}"
;;
--eval_data_path)
EVAL_DATA_PATH="$1"
shift
;;
--eval_data_path=*)
EVAL_DATA_PATH="${arg#*=}"
;;
--model_name_or_path)
MODEL_NAME_OR_PATH="$1"
shift
Expand Down Expand Up @@ -72,6 +93,7 @@ if [[ ! "helpfulness harmlessness instruction_following correctness informativen
exit 1
fi

IMAGE_DIR="${VIDEO_DIR}"
RUN_NAME="reward-${DIMENSION}"
OUTPUT_DIR="${OUTPUT_DIR}/${RUN_NAME}"

Expand Down Expand Up @@ -99,11 +121,11 @@ deepspeed --master_port="${MASTER_PORT}" examples/reward_model/train_reward.py \
--version v1 \
--run_name "${RUN_NAME}" \
--model_name_or_path "${MODEL_NAME_OR_PATH}" \
--train_data_path /data/SafeSora/config-train.json.gz \
--eval_data_path /data/SafeSora/config-test.json.gz \
--train_data_path "${TRAIN_DATA_PATH}" \
--eval_data_path "${EVAL_DATA_PATH}" \
--preference_dimension "${DIMENSION}" \
--image_dir "${IMAGE_FOLDER}" \
--video_dir "${VIDEO_FOLDER}" \
--image_dir "${IMAGE_DIR}" \
--video_dir "${VIDEO_DIR}" \
--image_tower LanguageBind/LanguageBind_Image \
--video_tower LanguageBind/LanguageBind_Video_merge \
--mm_projector_type mlp2x_gelu \
Expand Down Expand Up @@ -137,8 +159,7 @@ deepspeed --master_port="${MASTER_PORT}" examples/reward_model/train_reward.py \
--tokenizer_model_max_length 3072 \
--gradient_checkpointing True \
--dataloader_num_workers 4 \
--lazy_preprocess True \
--report_to wandb \
--bf16 True \
--tf32 True \
--num_frames 3
--num_frames 8
10 changes: 10 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,16 @@ dependencies = [
"transformers",
"datasets",
"tokenizers",
"accelerate",
"deepspeed",
"bitsandbytes",
"numpy",
"sentencepiece",
"wandb",
"tensorboard",
"matplotlib",
"tqdm",
"rich",
"av",
]
dynamic = ["version"]
Expand Down
36 changes: 19 additions & 17 deletions safe_sora/trainers/reward_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,10 @@

"""Reward Trainer for training the model with the reward signal."""

from __future__ import annotations

import os
from typing import Any, Dict, Generator, Iterator, List, Optional, Tuple, Union
from typing import Any, Generator, Iterator

import bitsandbytes
import torch
Expand Down Expand Up @@ -61,9 +63,9 @@ def maybe_zero_3(


def get_mm_adapter_state_maybe_zero_3(
named_params: List[Tuple[str, torch.Tensor]],
keys_to_match: List[str],
) -> Dict[str, torch.Tensor]:
named_params: list[tuple[str, torch.Tensor]],
keys_to_match: list[str],
) -> dict[str, torch.Tensor]:
"""Get the state of the adapter with zero stage 3."""
to_return = {
k: t for k, t in named_params if any(key_match in k for key_match in keys_to_match)
Expand All @@ -72,10 +74,10 @@ def get_mm_adapter_state_maybe_zero_3(


def split_to_even_chunks(
indices: List[int],
indices: list[int],
lengths: list[int],
num_chunks: int,
) -> List[List[int]]:
) -> list[list[int]]:
"""Split a list of indices into `chunks` chunks of roughly equal lengths."""

if len(indices) % num_chunks != 0:
Expand All @@ -97,11 +99,11 @@ def split_to_even_chunks(

# pylint: disable=too-many-locals
def get_modality_length_grouped_indices(
lengths: List[int],
lengths: list[int],
batch_size: int,
world_size: int,
generator: Generator | None = None,
) -> List[int]:
) -> list[int]:
"""Get indices grouped by modality and length."""
# We need to use torch for the random part
# as a distributed sampler will set the random seed for torch.
Expand Down Expand Up @@ -144,11 +146,11 @@ def get_modality_length_grouped_indices(


def get_length_grouped_indices(
lengths: List[int],
lengths: list[int],
batch_size: int,
world_size: int,
generator: Generator | None = None,
) -> List[int]:
) -> list[int]:
"""Get indices grouped by length."""
# We need to use torch for the random part
# as a distributed sampler will set the random seed for torch.
Expand Down Expand Up @@ -178,7 +180,7 @@ def __init__( # pylint: disable=too-many-arguments,super-init-not-called
self,
batch_size: int,
world_size: int,
lengths: Optional[List[int]] = None,
lengths: list[int] | None = None,
generator: Generator | None = None,
group_by_modality: bool = False,
) -> None:
Expand Down Expand Up @@ -215,7 +217,7 @@ def __iter__(self) -> Iterator[int]:
class RewardTrainer(Trainer):
"""Reward Trainer for training the model with the reward signal."""

def _get_train_sampler(self) -> Optional[torch.utils.data.Sampler]:
def _get_train_sampler(self) -> torch.utils.data.Sampler | None:
if self.train_dataset is None or not has_length(self.train_dataset):
return None

Expand Down Expand Up @@ -370,7 +372,7 @@ def _save_checkpoint(self, model: Any, trial: Any, metrics: Any | None = None) -
else:
super()._save_checkpoint(model, trial, metrics) # pylint: disable=no-member

def _save(self, output_dir: Optional[str] = None, state_dict: bool | None = None) -> None:
def _save(self, output_dir: str | None = None, state_dict: bool | None = None) -> None:
if getattr(self.args, 'tune_mm_mlp_adapter', False):
pass
else:
Expand All @@ -381,7 +383,7 @@ def compute_loss(
model: Any,
inputs: torch.Tensor,
return_outputs: bool = False,
) -> Union[float, Tuple[float, Any]]:
) -> float | tuple[float, Any]:
assert inputs['input_ids'].size(0) % 2 == 0, 'Batch size should be even.'

outputs: ScoreModelOutput = model(**inputs)
Expand All @@ -401,10 +403,10 @@ def compute_loss(
def prediction_step(
self,
model: nn.Module,
inputs: Dict[str, Union[torch.Tensor, Any]],
inputs: dict[str, torch.Tensor | Any],
prediction_loss_only: bool, # pylint: disable=unused-argument
ignore_keys: Optional[List[str]] = None, # pylint: disable=unused-argument
) -> Tuple[Optional[torch.Tensor], Optional[torch.Tensor], Optional[torch.Tensor]]:
ignore_keys: list[str] | None = None, # pylint: disable=unused-argument
) -> tuple[torch.Tensor | None, torch.Tensor | None, torch.Tensor | None]:

inputs = self._prepare_inputs(inputs)

Expand Down

0 comments on commit 49994a5

Please sign in to comment.