From 4e3f4075428ac0c0cdbfa143dd935b96180ed5dd Mon Sep 17 00:00:00 2001 From: Advay Pal Date: Tue, 6 Aug 2024 13:32:51 -0400 Subject: [PATCH 1/4] Add WIP cpu inference example --- misc/cpu_inference.py | 72 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 misc/cpu_inference.py diff --git a/misc/cpu_inference.py b/misc/cpu_inference.py new file mode 100644 index 000000000..3981c6183 --- /dev/null +++ b/misc/cpu_inference.py @@ -0,0 +1,72 @@ +from modal import App, Image + +app = App("cpu-inference") +BATCH_SIZE = 64 +NUM_CORES = 64 +NUM_OUTPUT_TOKENS = 128 + +PROMPT = """You are an expert at adding tags to pieces of text. Add a list of comma separated tags to the following pieces of text. Here are some examples: + +Example 1 + +Text: +IIJA Bureau of Land Management Idaho Threatened and Endangered Species Program Department of the Interior - Bureau of Land Management Idaho Threatened and Endangered Species Program +Tags: ["Wildlife Conservation", "Environmental Protection", "Species Preservation", "Conservation Efforts", "Ecosystem Management" ] + +------------------- + +Example 2 + +Text: Scaling Apprenticeship Readiness Across the Building Trades Initiative A Cooperative Agreement will be awarded for $19,821,832 to TradesFutures to substantially increase the number of participants from underrepresented populations and underserved communities in registered apprenticeship programs within the construction industry sector. +Tags: [ "Apprenticeship", "Building Trades", "Construction Industry", "Underrepresented Populations", "Underserved Communities" ] + +""" + +llama_cpp_image = Image.debian_slim(python_version="3.11").apt_install(["curl", "unzip"]).run_commands([ + 'curl -L -O https://github.com/ggerganov/llama.cpp/releases/download/b3367/llama-b3367-bin-ubuntu-x64.zip', + 'unzip llama-b3367-bin-ubuntu-x64.zip', + 'curl -L -O https://huggingface.co/bartowski/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct-Q5_K_M.gguf', +]) + +def batch_iterator(dataset): + for i in range(0, len(dataset), BATCH_SIZE): + yield dataset[i : i + BATCH_SIZE]["text"] + +def prepare_dataset(dataset): + return dataset + + +@app.function(image = llama_cpp_image) +def llama_cpp_inference(batch): + import subprocess + import time + + print(batch) + start = time.monotonic() + # TODO: Add support for batching, check if it's tagging correctly + subprocess.run([ + '/build/bin/llama-cli', + '-m', '/Meta-Llama-3-8B-Instruct-Q5_K_M.gguf', + '-b', f'{BATCH_SIZE}', + '-n', f'{NUM_OUTPUT_TOKENS}', + '-p', f'{PROMPT} \n batch' + ]) + + end = time.monotonic() + return end - start + + +@app.function(image = Image.debian_slim().pip_install("datasets")) +def process_data(): + from datasets import load_dataset + dataset = prepare_dataset(load_dataset("youngermax/text-tagging", split="train")) + max_duration = 0 + for duration in llama_cpp_inference.map(batch_iterator(dataset)): + max_duration = max(max_duration, duration) + + # TODO: Fix throughput measurement + print(f"The throughput is f{NUM_OUTPUT_TOKENS * len(dataset) / max_duration}") + +@app.local_entrypoint() +def main(): + process_data.remote() From d7170f860077896c7a44b2c0d212949d51c77d90 Mon Sep 17 00:00:00 2001 From: Advay Pal Date: Tue, 6 Aug 2024 13:35:58 -0400 Subject: [PATCH 2/4] remove unused function --- misc/cpu_inference.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/misc/cpu_inference.py b/misc/cpu_inference.py index 3981c6183..9384ff5d1 100644 --- a/misc/cpu_inference.py +++ b/misc/cpu_inference.py @@ -31,10 +31,6 @@ def batch_iterator(dataset): for i in range(0, len(dataset), BATCH_SIZE): yield dataset[i : i + BATCH_SIZE]["text"] - -def prepare_dataset(dataset): - return dataset - @app.function(image = llama_cpp_image) def llama_cpp_inference(batch): @@ -59,7 +55,7 @@ def llama_cpp_inference(batch): @app.function(image = Image.debian_slim().pip_install("datasets")) def process_data(): from datasets import load_dataset - dataset = prepare_dataset(load_dataset("youngermax/text-tagging", split="train")) + dataset = load_dataset("youngermax/text-tagging", split="train") max_duration = 0 for duration in llama_cpp_inference.map(batch_iterator(dataset)): max_duration = max(max_duration, duration) From 4b04486297e337d528b4feef86c8bbb24e73c7af Mon Sep 17 00:00:00 2001 From: Advay Pal Date: Tue, 6 Aug 2024 13:41:29 -0400 Subject: [PATCH 3/4] Remove print statement --- misc/cpu_inference.py | 1 - 1 file changed, 1 deletion(-) diff --git a/misc/cpu_inference.py b/misc/cpu_inference.py index 9384ff5d1..04118e93a 100644 --- a/misc/cpu_inference.py +++ b/misc/cpu_inference.py @@ -37,7 +37,6 @@ def llama_cpp_inference(batch): import subprocess import time - print(batch) start = time.monotonic() # TODO: Add support for batching, check if it's tagging correctly subprocess.run([ From 734bb242ea80cffc2bd92de752668ea13e4b552d Mon Sep 17 00:00:00 2001 From: Advay Pal Date: Tue, 6 Aug 2024 15:01:46 -0400 Subject: [PATCH 4/4] Fix typo --- misc/cpu_inference.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/misc/cpu_inference.py b/misc/cpu_inference.py index 04118e93a..31fb32c0d 100644 --- a/misc/cpu_inference.py +++ b/misc/cpu_inference.py @@ -38,13 +38,13 @@ def llama_cpp_inference(batch): import time start = time.monotonic() - # TODO: Add support for batching, check if it's tagging correctly + # TODO: Add support for batching, check if it's tagging correctly, figure out way to pass entire batch subprocess.run([ '/build/bin/llama-cli', '-m', '/Meta-Llama-3-8B-Instruct-Q5_K_M.gguf', '-b', f'{BATCH_SIZE}', '-n', f'{NUM_OUTPUT_TOKENS}', - '-p', f'{PROMPT} \n batch' + '-p', f'{PROMPT} \n {batch[:10]}' ]) end = time.monotonic()