From bb0061d0585862069d70419bbd6fad3823676892 Mon Sep 17 00:00:00 2001 From: Gilbert Gong Date: Thu, 31 Oct 2024 15:50:26 -0700 Subject: [PATCH] llama.cpp: update binaries called in entry.sh --- llama.cpp/entry.sh | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/llama.cpp/entry.sh b/llama.cpp/entry.sh index d779411..e220180 100755 --- a/llama.cpp/entry.sh +++ b/llama.cpp/entry.sh @@ -69,20 +69,21 @@ export LLAMA_CACHE=$WORKSPACE/models if [ "$MODE" = "SLEEP" ]; then /usr/bin/sleep infinity exit +# this probably doesn't work right now elif [ "$MODE" = "BENCH_FIRST" ]; then # if we auto restart after crash, don't run benchmark CHECK_FILE=$WORKSPACE/bench_complete if [ ! -f "$CHECKFILE" ]; then echo Downloading $filename and running short benchmark cd $WORKSPACE/models && wget $LLAMA_MU - cd $WORKSPACE && /llama.cpp/batched-bench models/$filename $LLAMA_CONTEXT $LLAMA_UB $LLAMA_UB 0 0 999 2048 256 1,1,1,1,2 - echo benchmark complete, running /llama.cpp/server in 5 seconds.. + cd $WORKSPACE && /llama.cpp/llama-batched-bench models/$filename $LLAMA_CONTEXT $LLAMA_UB $LLAMA_UB 0 0 999 2048 256 1,1,1,1,2 + echo benchmark complete, running /llama.cpp/llama-server in 5 seconds.. touch $CHECK_FILE sleep 5 fi fi -/llama.cpp/server $MODEL_ARG $LLAMA_FA $UB_ARG -c $LLAMA_CONTEXT -t $LLAMA_NP \ +/llama.cpp/llama-server $MODEL_ARG $LLAMA_FA $UB_ARG -c $LLAMA_CONTEXT -t $LLAMA_NP \ -np $LLAMA_NP $LLAMA_NGL --rope-freq-scale $LLAMA_ROPE_FREQ_SCALE \ --port $LLAMA_PORT $LLAMA_ADDITIONAL_ARGS $API_KEY_ARG $CTK_ARG $CTV_ARG \ --threads-http $LLAMA_THREADS_HTTP