forked from lm-sys/FastChat
-
Notifications
You must be signed in to change notification settings - Fork 0
/
vicuna-13.slurm
31 lines (24 loc) · 927 Bytes
/
vicuna-13.slurm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
#!/bin/bash
#SBATCH --job-name=vic1315
#SBATCH --output=/p/haicluster/llama/FastChat/logs/%j.txt
#SBATCH --error=/p/haicluster/llama/FastChat/logs/%j.txt
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=16
#SBATCH --time=100:00:00
#SBATCH --gres=gpu:1
echo "I AM ON "$(hostname) " running vicuna 13 with 1 gpus"
export BLABLADOR_DIR="/p/haicluster/llama/FastChat"
export LOGDIR=$BLABLADOR_DIR/logs
export NCCL_P2P_DISABLE=1 # 3090s do not support p2p
cd $BLABLADOR_DIR
source $BLABLADOR_DIR/sc_venv_falcon/activate.sh
#srun python3 $BLABLADOR_DIR/fastchat/serve/vllm_worker.py \
srun python3 fastchat/serve/model_worker.py \
--controller http://haicluster1.fz-juelich.de:21001 \
--port 31010 --worker http://$(hostname):31010 \
--num-gpus 1 \
--host 0.0.0.0 \
--model-path /p/haicluster/llama/FastChat/models/vicuna-13b-v1.5 \
--load-8bit
# --max-gpu-memory 22Gb \