From bb0061d0585862069d70419bbd6fad3823676892 Mon Sep 17 00:00:00 2001
From: Gilbert Gong <gilbert.gong@gmail.com>
Date: Thu, 31 Oct 2024 15:50:26 -0700
Subject: [PATCH] llama.cpp: update binaries called in entry.sh

---
 llama.cpp/entry.sh | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/llama.cpp/entry.sh b/llama.cpp/entry.sh
index d779411..e220180 100755
--- a/llama.cpp/entry.sh
+++ b/llama.cpp/entry.sh
@@ -69,20 +69,21 @@ export LLAMA_CACHE=$WORKSPACE/models
 if [ "$MODE" = "SLEEP" ]; then
   /usr/bin/sleep infinity
   exit
+# this probably doesn't work right now
 elif [ "$MODE" = "BENCH_FIRST" ]; then
   # if we auto restart after crash, don't run benchmark
   CHECK_FILE=$WORKSPACE/bench_complete
   if [ ! -f "$CHECKFILE" ]; then
     echo Downloading $filename and running short benchmark
     cd $WORKSPACE/models && wget $LLAMA_MU
-    cd $WORKSPACE && /llama.cpp/batched-bench models/$filename $LLAMA_CONTEXT $LLAMA_UB $LLAMA_UB 0 0 999 2048 256 1,1,1,1,2
-    echo benchmark complete, running /llama.cpp/server in 5 seconds..
+    cd $WORKSPACE && /llama.cpp/llama-batched-bench models/$filename $LLAMA_CONTEXT $LLAMA_UB $LLAMA_UB 0 0 999 2048 256 1,1,1,1,2
+    echo benchmark complete, running /llama.cpp/llama-server in 5 seconds..
     touch $CHECK_FILE
     sleep 5
   fi
 fi
 
-/llama.cpp/server $MODEL_ARG $LLAMA_FA $UB_ARG -c $LLAMA_CONTEXT -t $LLAMA_NP \
+/llama.cpp/llama-server $MODEL_ARG $LLAMA_FA $UB_ARG -c $LLAMA_CONTEXT -t $LLAMA_NP \
   -np $LLAMA_NP $LLAMA_NGL --rope-freq-scale $LLAMA_ROPE_FREQ_SCALE \
   --port $LLAMA_PORT $LLAMA_ADDITIONAL_ARGS $API_KEY_ARG $CTK_ARG $CTV_ARG \
   --threads-http $LLAMA_THREADS_HTTP