forked from lm-sys/FastChat
-
Notifications
You must be signed in to change notification settings - Fork 0
/
mistral-7.slurm
34 lines (28 loc) · 1.13 KB
/
mistral-7.slurm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
#!/bin/bash
#SBATCH --job-name=mistral7
#SBATCH --output=/p/haicluster/llama/FastChat/logs/%j.txt
#SBATCH --error=/p/haicluster/llama/FastChat/logs/%j.txt
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=8
#SBATCH --gres=gpu:1
# #SBATCH --time=100:00:00
echo "I AM ON "$(hostname) " running mistral 7"
export BLABLADOR_DIR="/p/haicluster/llama/FastChat"
source $BLABLADOR_DIR/config-blablador.sh
cd $BLABLADOR_DIR
# Avoid the ModuleNotFoundError
source $BLABLADOR_DIR/sc_venv_sglang2/activate.sh
export PATH=$BLABLADOR_DIR:$PATH
export PYTHONPATH=$BLABLADOR_DIR:$PYTHONPATH
export NUMEXPR_MAX_THREADS=8
#srun python3 fastchat/serve/model_worker.py \
srun python3 $BLABLADOR_DIR/fastchat/serve/sglang_worker.py \
--controller $BLABLADOR_CONTROLLER:$BLABLADOR_CONTROLLER_PORT \
--port 31029 --worker-address http://$(hostname).fz-juelich.de:31029 \
--num-gpus 1 \
--host 0.0.0.0 \
--model-path models/Mistral-7B-Instruct-v0.2 \
--model-name "alias-fast,1 - Mistral-7B-Instruct-v0.2 - the best option in general - fast and good"
# --load-8bit It's faster without it
# --max-gpu-memory 22Gb \