From 8ebd68296283d7cb933dd6df6a61e3a991c47927 Mon Sep 17 00:00:00 2001 From: Louis Date: Tue, 27 Aug 2024 12:16:47 +0700 Subject: [PATCH] chore: add llama 3.1 8B gguf model --- .../inference-nitro-extension/package.json | 2 +- .../models/llama3.1-8b-instruct/model.json | 42 +++++++++++++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) create mode 100644 extensions/inference-nitro-extension/resources/models/llama3.1-8b-instruct/model.json diff --git a/extensions/inference-nitro-extension/package.json b/extensions/inference-nitro-extension/package.json index 24c8870240..7be4be69a1 100644 --- a/extensions/inference-nitro-extension/package.json +++ b/extensions/inference-nitro-extension/package.json @@ -1,7 +1,7 @@ { "name": "@janhq/inference-cortex-extension", "productName": "Cortex Inference Engine", - "version": "1.0.14", + "version": "1.0.15", "description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://nitro.jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.", "main": "dist/index.js", "node": "dist/node/index.cjs.js", diff --git a/extensions/inference-nitro-extension/resources/models/llama3.1-8b-instruct/model.json b/extensions/inference-nitro-extension/resources/models/llama3.1-8b-instruct/model.json new file mode 100644 index 0000000000..b990dbffc5 --- /dev/null +++ b/extensions/inference-nitro-extension/resources/models/llama3.1-8b-instruct/model.json @@ -0,0 +1,42 @@ +{ + "sources": [ + { + "filename": "Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf", + "url": "https://huggingface.co/mradermacher/Meta-Llama-3.1-8B-Instruct-GGUF/resolve/main/Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf" + } + ], + "id": "llama3.1-8b-instruct", + "object": "model", + "name": "Llama 3.1 8B Q4", + "version": "1.0", + "description": "Meta's Llama 3 excels at general usage situations, including chat, general world knowledge, and coding.", + "format": "gguf", + "settings": { + "ctx_len": 131072, + "prompt_template": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n", + "llama_model_path": "Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf", + "ngl": 33 + }, + "parameters": { + "temperature": 0.7, + "top_p": 0.95, + "stream": true, + "max_tokens": 8192, + "stop": [ + "<|end_of_text|>", + "<|eot_id|>", + "<|eom_id|>" + ], + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "MetaAI", + "tags": [ + "8B", + "Featured" + ], + "size": 4920000000 + }, + "engine": "nitro" +} \ No newline at end of file