diff --git a/extensions/inference-nitro-extension/package.json b/extensions/inference-nitro-extension/package.json index 1903eafef3..ce19734d28 100644 --- a/extensions/inference-nitro-extension/package.json +++ b/extensions/inference-nitro-extension/package.json @@ -1,7 +1,7 @@ { "name": "@janhq/inference-cortex-extension", "productName": "Cortex Inference Engine", - "version": "1.0.10", + "version": "1.0.11", "description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://nitro.jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.", "main": "dist/index.js", "node": "dist/node/index.cjs.js", diff --git a/extensions/inference-nitro-extension/resources/models/codestral-22b/model.json b/extensions/inference-nitro-extension/resources/models/codestral-22b/model.json new file mode 100644 index 0000000000..8e026e340d --- /dev/null +++ b/extensions/inference-nitro-extension/resources/models/codestral-22b/model.json @@ -0,0 +1,36 @@ +{ + "sources": [ + { + "filename": "Codestral-22B-v0.1-Q4_K_M.gguf", + "url": "https://huggingface.co/bartowski/Codestral-22B-v0.1-GGUF/resolve/main/Codestral-22B-v0.1-Q4_K_M.gguf" + } + ], + "id": "codestral-22b", + "object": "model", + "name": "Codestral 22B Q4", + "version": "1.0", + "description": "Latest model from MistralAI optimized for code generation tasks.", + "format": "gguf", + "settings": { + "ctx_len": 32000, + "prompt_template": "{system_message} [INST] {prompt} [/INST]", + "llama_model_path": "Codestral-22B-v0.1-Q4_K_M.gguf", + "ngl": 56 + }, + "parameters": { + "temperature": 0.7, + "top_p": 0.95, + "stream": true, + "max_tokens": 32000, + "stop": [", [/INST]"], + "frequency_penalty": 0, + "presence_penalty": 0 + }, + "metadata": { + "author": "MistralAI", + "tags": ["22B", "Finetuned", "Featured"], + "size": 13341237440 + }, + "engine": "nitro" + } + diff --git a/extensions/inference-nitro-extension/resources/models/mistral-ins-7b-q4/model.json b/extensions/inference-nitro-extension/resources/models/mistral-ins-7b-q4/model.json index c372aa3295..21dcea8652 100644 --- a/extensions/inference-nitro-extension/resources/models/mistral-ins-7b-q4/model.json +++ b/extensions/inference-nitro-extension/resources/models/mistral-ins-7b-q4/model.json @@ -1,20 +1,20 @@ { "sources": [ { - "filename": "mistral-7b-instruct-v0.2.Q4_K_M.gguf", - "url": "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf" + "filename": "Mistral-7B-Instruct-v0.3-Q4_K_M.gguf", + "url": "https://huggingface.co/bartowski/Mistral-7B-Instruct-v0.3-GGUF/resolve/main/Mistral-7B-Instruct-v0.3-Q4_K_M.gguf" } ], "id": "mistral-ins-7b-q4", "object": "model", "name": "Mistral Instruct 7B Q4", - "version": "1.1", + "version": "1.2", "description": "Mistral Instruct 7b model, specifically designed for a comprehensive understanding of the world.", "format": "gguf", "settings": { "ctx_len": 32768, - "prompt_template": "[INST] {prompt} [/INST]", - "llama_model_path": "mistral-7b-instruct-v0.2.Q4_K_M.gguf", + "prompt_template": "{system_message} [INST] {prompt} [/INST]", + "llama_model_path": "Mistral-7B-Instruct-v0.3-Q4_K_M.gguf", "ngl": 32 }, "parameters": { diff --git a/extensions/inference-nitro-extension/resources/models/phi3-medium/model.json b/extensions/inference-nitro-extension/resources/models/phi3-medium/model.json index 63dda8f0a6..8f5bfa1c3a 100644 --- a/extensions/inference-nitro-extension/resources/models/phi3-medium/model.json +++ b/extensions/inference-nitro-extension/resources/models/phi3-medium/model.json @@ -8,7 +8,7 @@ "id": "phi3-medium", "object": "model", "name": "Phi-3 Medium", - "version": "1.0", + "version": "1.1", "description": "Phi-3 Medium is Microsoft's latest SOTA model.", "format": "gguf", "settings": { @@ -29,7 +29,7 @@ "metadata": { "author": "Microsoft", "tags": [ - "7B", + "14B", "Finetuned" ], "size": 8366000000 diff --git a/extensions/inference-nitro-extension/rollup.config.ts b/extensions/inference-nitro-extension/rollup.config.ts index c28d5b64e9..3a790b5016 100644 --- a/extensions/inference-nitro-extension/rollup.config.ts +++ b/extensions/inference-nitro-extension/rollup.config.ts @@ -38,6 +38,7 @@ const llama3Hermes8bJson = require('./resources/models/llama3-hermes-8b/model.js const aya8bJson = require('./resources/models/aya-23-8b/model.json') const aya35bJson = require('./resources/models/aya-23-35b/model.json') const phimediumJson = require('./resources/models/phi3-medium/model.json') +const codestralJson = require('./resources/models/codestral-22b/model.json') export default [ { @@ -82,7 +83,8 @@ export default [ llama3Hermes8bJson, phimediumJson, aya8bJson, - aya35bJson + aya35bJson, + codestralJson ]), NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`), DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson), diff --git a/extensions/model-extension/resources/default-model.json b/extensions/model-extension/resources/default-model.json index f2e15d2c9b..c02008cd64 100644 --- a/extensions/model-extension/resources/default-model.json +++ b/extensions/model-extension/resources/default-model.json @@ -23,7 +23,7 @@ "top_p": 0.95, "stream": true, "max_tokens": 2048, - "stop": [""], + "stop": ["<|END_OF_TURN_TOKEN|>", "", "[/INST]", "<|end_of_text|>", "<|eot_id|>", "<|im_end|>", "<|end|>"], "frequency_penalty": 0, "presence_penalty": 0 },