Chore: Model Hub update (#2966)

* fix: correct size * version bump * add: codestral 22b * add: codestral 22b * versino bump * upgrade to v3 * Update stop token default-model.json confirmed with Rex * fix: whitespace --------- Co-authored-by: Van Pham <[email protected]>
janhq · May 30, 2024 · bd5a0ea · bd5a0ea
1 parent b662c25
commit bd5a0ea
Show file tree

Hide file tree

Showing 6 changed files with 48 additions and 10 deletions.
diff --git a/extensions/inference-nitro-extension/package.json b/extensions/inference-nitro-extension/package.json
@@ -1,7 +1,7 @@
 {
   "name": "@janhq/inference-cortex-extension",
   "productName": "Cortex Inference Engine",
-  "version": "1.0.10",
+  "version": "1.0.11",
   "description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://nitro.jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
   "main": "dist/index.js",
   "node": "dist/node/index.cjs.js",

diff --git a/extensions/inference-nitro-extension/resources/models/codestral-22b/model.json b/extensions/inference-nitro-extension/resources/models/codestral-22b/model.json
@@ -0,0 +1,36 @@
+{
+    "sources": [
+      {
+        "filename": "Codestral-22B-v0.1-Q4_K_M.gguf",
+        "url": "https://huggingface.co/bartowski/Codestral-22B-v0.1-GGUF/resolve/main/Codestral-22B-v0.1-Q4_K_M.gguf"
+      }
+    ],
+    "id": "codestral-22b",
+    "object": "model",
+    "name": "Codestral 22B Q4",
+    "version": "1.0",
+    "description": "Latest model from MistralAI optimized for code generation tasks.",
+    "format": "gguf",
+    "settings": {
+      "ctx_len": 32000,
+      "prompt_template": "{system_message} [INST] {prompt} [/INST]",
+      "llama_model_path": "Codestral-22B-v0.1-Q4_K_M.gguf",
+      "ngl": 56
+    },
+    "parameters": {
+      "temperature": 0.7,
+      "top_p": 0.95,
+      "stream": true,
+      "max_tokens": 32000,
+      "stop": ["<endofstring>, [/INST]"],
+      "frequency_penalty": 0,
+      "presence_penalty": 0
+    },
+    "metadata": {
+      "author": "MistralAI",
+      "tags": ["22B", "Finetuned", "Featured"],
+      "size": 13341237440
+    },
+    "engine": "nitro"
+  }
+
diff --git a/extensions/inference-nitro-extension/resources/models/mistral-ins-7b-q4/model.json b/extensions/inference-nitro-extension/resources/models/mistral-ins-7b-q4/model.json
@@ -1,20 +1,20 @@
 {
   "sources": [
     {
-      "filename": "mistral-7b-instruct-v0.2.Q4_K_M.gguf",
-      "url": "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf"
+      "filename": "Mistral-7B-Instruct-v0.3-Q4_K_M.gguf",
+      "url": "https://huggingface.co/bartowski/Mistral-7B-Instruct-v0.3-GGUF/resolve/main/Mistral-7B-Instruct-v0.3-Q4_K_M.gguf"
     }
   ],
   "id": "mistral-ins-7b-q4",
   "object": "model",
   "name": "Mistral Instruct 7B Q4",
-  "version": "1.1",
+  "version": "1.2",
   "description": "Mistral Instruct 7b model, specifically designed for a comprehensive understanding of the world.",
   "format": "gguf",
   "settings": {
     "ctx_len": 32768,
-    "prompt_template": "[INST] {prompt} [/INST]",
-    "llama_model_path": "mistral-7b-instruct-v0.2.Q4_K_M.gguf",
+    "prompt_template": "{system_message} [INST] {prompt} [/INST]",
+    "llama_model_path": "Mistral-7B-Instruct-v0.3-Q4_K_M.gguf",
     "ngl": 32
   },
   "parameters": {

diff --git a/extensions/inference-nitro-extension/resources/models/phi3-medium/model.json b/extensions/inference-nitro-extension/resources/models/phi3-medium/model.json
@@ -8,7 +8,7 @@
     "id": "phi3-medium",
     "object": "model",
     "name": "Phi-3 Medium",
-    "version": "1.0",
+    "version": "1.1",
     "description": "Phi-3 Medium is Microsoft's latest SOTA model.",
     "format": "gguf",
     "settings": {
@@ -29,7 +29,7 @@
     "metadata": {
       "author": "Microsoft",
       "tags": [
-        "7B",
+        "14B",
         "Finetuned"
       ],
       "size": 8366000000

diff --git a/extensions/inference-nitro-extension/rollup.config.ts b/extensions/inference-nitro-extension/rollup.config.ts
@@ -38,6 +38,7 @@ const llama3Hermes8bJson = require('./resources/models/llama3-hermes-8b/model.js
 const aya8bJson = require('./resources/models/aya-23-8b/model.json')
 const aya35bJson = require('./resources/models/aya-23-35b/model.json')
 const phimediumJson = require('./resources/models/phi3-medium/model.json')
+const codestralJson = require('./resources/models/codestral-22b/model.json')
 
 export default [
   {
@@ -82,7 +83,8 @@ export default [
           llama3Hermes8bJson,
           phimediumJson,
           aya8bJson,
-          aya35bJson
+          aya35bJson,
+          codestralJson
         ]),
         NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
         DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson),

diff --git a/extensions/model-extension/resources/default-model.json b/extensions/model-extension/resources/default-model.json
@@ -23,7 +23,7 @@
     "top_p": 0.95,
     "stream": true,
     "max_tokens": 2048,
-    "stop": ["<endofstring>"],
+    "stop": ["<|END_OF_TURN_TOKEN|>", "<end_of_turn>", "[/INST]", "<|end_of_text|>", "<|eot_id|>", "<|im_end|>", "<|end|>"],
     "frequency_penalty": 0,
     "presence_penalty": 0
   },