From 4e3f4075428ac0c0cdbfa143dd935b96180ed5dd Mon Sep 17 00:00:00 2001
From: Advay Pal <advay@modal.com>
Date: Tue, 6 Aug 2024 13:32:51 -0400
Subject: [PATCH 1/4] Add WIP cpu inference example

---
 misc/cpu_inference.py | 72 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 72 insertions(+)
 create mode 100644 misc/cpu_inference.py

diff --git a/misc/cpu_inference.py b/misc/cpu_inference.py
new file mode 100644
index 000000000..3981c6183
--- /dev/null
+++ b/misc/cpu_inference.py
@@ -0,0 +1,72 @@
+from modal import App, Image
+
+app = App("cpu-inference")
+BATCH_SIZE = 64
+NUM_CORES = 64
+NUM_OUTPUT_TOKENS = 128
+
+PROMPT = """You are an expert at adding tags to pieces of text. Add a list of comma separated tags to the following pieces of text. Here are some examples:
+
+Example 1
+
+Text: 	
+IIJA Bureau of Land Management Idaho Threatened and Endangered Species Program Department of the Interior - Bureau of Land Management Idaho Threatened and Endangered Species Program
+Tags: ["Wildlife Conservation", "Environmental Protection", "Species Preservation", "Conservation Efforts", "Ecosystem Management" ]
+
+-------------------
+
+Example 2
+
+Text: Scaling Apprenticeship Readiness Across the Building Trades Initiative A Cooperative Agreement will be awarded for $19,821,832 to TradesFutures to substantially increase the number of participants from underrepresented populations and underserved communities in registered apprenticeship programs within the construction industry sector.	
+Tags: [ "Apprenticeship", "Building Trades", "Construction Industry", "Underrepresented Populations", "Underserved Communities" ]
+
+"""
+
+llama_cpp_image = Image.debian_slim(python_version="3.11").apt_install(["curl", "unzip"]).run_commands([
+    'curl -L -O https://github.com/ggerganov/llama.cpp/releases/download/b3367/llama-b3367-bin-ubuntu-x64.zip',
+    'unzip llama-b3367-bin-ubuntu-x64.zip',
+    'curl -L -O https://huggingface.co/bartowski/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct-Q5_K_M.gguf',
+])
+
+def batch_iterator(dataset):
+    for i in range(0, len(dataset), BATCH_SIZE):
+        yield dataset[i : i + BATCH_SIZE]["text"]
+    
+def prepare_dataset(dataset):
+    return dataset
+
+
+@app.function(image = llama_cpp_image)
+def llama_cpp_inference(batch):
+    import subprocess
+    import time
+
+    print(batch)
+    start = time.monotonic()
+    # TODO: Add support for batching, check if it's tagging correctly
+    subprocess.run([
+        '/build/bin/llama-cli', 
+        '-m', '/Meta-Llama-3-8B-Instruct-Q5_K_M.gguf', 
+        '-b', f'{BATCH_SIZE}', 
+        '-n', f'{NUM_OUTPUT_TOKENS}',
+        '-p', f'{PROMPT} \n batch'
+    ])
+
+    end = time.monotonic()
+    return end - start
+
+
+@app.function(image = Image.debian_slim().pip_install("datasets"))
+def process_data():
+    from datasets import load_dataset
+    dataset = prepare_dataset(load_dataset("youngermax/text-tagging", split="train"))
+    max_duration = 0
+    for duration in llama_cpp_inference.map(batch_iterator(dataset)):
+        max_duration = max(max_duration, duration)
+
+    # TODO: Fix throughput measurement
+    print(f"The throughput is f{NUM_OUTPUT_TOKENS * len(dataset) / max_duration}")
+
+@app.local_entrypoint()
+def main():
+    process_data.remote()

From d7170f860077896c7a44b2c0d212949d51c77d90 Mon Sep 17 00:00:00 2001
From: Advay Pal <advay@modal.com>
Date: Tue, 6 Aug 2024 13:35:58 -0400
Subject: [PATCH 2/4] remove unused function

---
 misc/cpu_inference.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/misc/cpu_inference.py b/misc/cpu_inference.py
index 3981c6183..9384ff5d1 100644
--- a/misc/cpu_inference.py
+++ b/misc/cpu_inference.py
@@ -31,10 +31,6 @@
 def batch_iterator(dataset):
     for i in range(0, len(dataset), BATCH_SIZE):
         yield dataset[i : i + BATCH_SIZE]["text"]
-    
-def prepare_dataset(dataset):
-    return dataset
-
 
 @app.function(image = llama_cpp_image)
 def llama_cpp_inference(batch):
@@ -59,7 +55,7 @@ def llama_cpp_inference(batch):
 @app.function(image = Image.debian_slim().pip_install("datasets"))
 def process_data():
     from datasets import load_dataset
-    dataset = prepare_dataset(load_dataset("youngermax/text-tagging", split="train"))
+    dataset = load_dataset("youngermax/text-tagging", split="train")
     max_duration = 0
     for duration in llama_cpp_inference.map(batch_iterator(dataset)):
         max_duration = max(max_duration, duration)

From 4b04486297e337d528b4feef86c8bbb24e73c7af Mon Sep 17 00:00:00 2001
From: Advay Pal <advay@modal.com>
Date: Tue, 6 Aug 2024 13:41:29 -0400
Subject: [PATCH 3/4] Remove print statement

---
 misc/cpu_inference.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/misc/cpu_inference.py b/misc/cpu_inference.py
index 9384ff5d1..04118e93a 100644
--- a/misc/cpu_inference.py
+++ b/misc/cpu_inference.py
@@ -37,7 +37,6 @@ def llama_cpp_inference(batch):
     import subprocess
     import time
 
-    print(batch)
     start = time.monotonic()
     # TODO: Add support for batching, check if it's tagging correctly
     subprocess.run([

From 734bb242ea80cffc2bd92de752668ea13e4b552d Mon Sep 17 00:00:00 2001
From: Advay Pal <advay@modal.com>
Date: Tue, 6 Aug 2024 15:01:46 -0400
Subject: [PATCH 4/4] Fix typo

---
 misc/cpu_inference.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/misc/cpu_inference.py b/misc/cpu_inference.py
index 04118e93a..31fb32c0d 100644
--- a/misc/cpu_inference.py
+++ b/misc/cpu_inference.py
@@ -38,13 +38,13 @@ def llama_cpp_inference(batch):
     import time
 
     start = time.monotonic()
-    # TODO: Add support for batching, check if it's tagging correctly
+    # TODO: Add support for batching, check if it's tagging correctly, figure out way to pass entire batch
     subprocess.run([
         '/build/bin/llama-cli', 
         '-m', '/Meta-Llama-3-8B-Instruct-Q5_K_M.gguf', 
         '-b', f'{BATCH_SIZE}', 
         '-n', f'{NUM_OUTPUT_TOKENS}',
-        '-p', f'{PROMPT} \n batch'
+        '-p', f'{PROMPT} \n {batch[:10]}'
     ])
 
     end = time.monotonic()