Skip to content

Commit

Permalink
Chore/add gemma2 model (#3471)
Browse files Browse the repository at this point in the history
* feat: add gemma 2

* feat: add gemma 2

* feat: correct ngl

---------

Co-authored-by: Van QA <[email protected]>
  • Loading branch information
Van-QA and Van QA committed Aug 27, 2024
1 parent 3833fca commit d66f4c3
Show file tree
Hide file tree
Showing 6 changed files with 145 additions and 15 deletions.
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
{
"sources": [
{
"filename": "gemma-2b-it-q4_k_m.gguf",
"url": "https://huggingface.co/lmstudio-ai/gemma-2b-it-GGUF/resolve/main/gemma-2b-it-q4_k_m.gguf"
"filename": "gemma-1.1-2b-it-q4_k_m.gguf",
"url": "https://huggingface.co/bartowski/gemma-1.1-2b-it-GGUF/resolve/main/gemma-1.1-2b-it-Q4_K_M.gguf"
}
],
"id": "gemma-2b",
"id": "gemma-1.1-2b-it",
"object": "model",
"name": "Gemma 2B Q4",
"name": "Gemma 1.1 2B Q4",
"version": "1.3",
"description": "Gemma is built from the same technology with Google's Gemini.",
"format": "gguf",
"settings": {
"ctx_len": 8192,
"prompt_template": "<start_of_turn>user\n{prompt}<end_of_turn>\n<start_of_turn>model",
"llama_model_path": "gemma-2b-it-q4_k_m.gguf",
"llama_model_path": "gemma-1.1-2b-it-Q4_K_M.gguf",
"ngl": 19
},
"parameters": {
Expand All @@ -29,7 +29,7 @@
"metadata": {
"author": "Google",
"tags": ["2B", "Finetuned", "Tiny"],
"size": 1500000000
"size": 1630000000
},
"engine": "nitro"
}
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
{
"sources": [
{
"filename": "gemma-7b-it-q4_K_M.gguf",
"url": "https://huggingface.co/mmnga/gemma-7b-it-gguf/resolve/main/gemma-7b-it-q4_K_M.gguf"
"filename": "gemma-1.1-7b-it-q4_K_M.gguf",
"url": "https://huggingface.co/bartowski/gemma-1.1-7b-it-GGUF/resolve/main/gemma-1.1-7b-it-Q4_K_M.gguf"
}
],
"id": "gemma-7b",
"id": "gemma-1.1-7b-it",
"object": "model",
"name": "Gemma 7B Q4",
"name": "Gemma 1.1 7B Q4",
"version": "1.2",
"description": "Google's Gemma is built for multilingual purpose",
"format": "gguf",
"settings": {
"ctx_len": 8192,
"prompt_template": "<start_of_turn>user\n{prompt}<end_of_turn>\n<start_of_turn>model",
"llama_model_path": "gemma-7b-it-q4_K_M.gguf",
"llama_model_path": "gemma-1.1-7b-it-q4_K_M.gguf",
"ngl": 29
},
"parameters": {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
{
"sources": [
{
"filename": "gemma-2-27b-it-Q4_K_M.gguf",
"url": "https://huggingface.co/bartowski/gemma-2-27b-it-GGUF/resolve/main/gemma-2-27b-it-Q4_K_M.gguf"
}
],
"id": "gemma-2-27b-it",
"object": "model",
"name": "Gemma 2 27B Q4",
"version": "1.0",
"description": "Gemma is built from the same technology with Google's Gemini.",
"format": "gguf",
"settings": {
"ctx_len": 8192,
"prompt_template": "<bos><start_of_turn>user\n{prompt}<end_of_turn>\n<start_of_turn>model\n<end_of_turn>\n<start_of_turn>model\n",
"llama_model_path": "gemma-2-27b-it-Q4_K_M.gguf",
"ngl": 47
},
"parameters": {
"temperature": 0.7,
"top_p": 0.95,
"stream": true,
"max_tokens": 8192,
"stop": [
"<end_of_turn>"
],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "Google",
"tags": [
"27B",
"Conversational",
"Text-generation"
],
"size": 16600000000
},
"engine": "nitro"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
{
"sources": [
{
"filename": "gemma-2-2b-it-Q4_K_M.gguf",
"url": "https://huggingface.co/bartowski/gemma-2-2b-it-GGUF/resolve/main/gemma-2-2b-it-Q4_K_M.gguf"
}
],
"id": "gemma-2-2b-it",
"object": "model",
"name": "Gemma 2 2B Q4",
"version": "1.0",
"description": "Gemma is built from the same technology with Google's Gemini.",
"format": "gguf",
"settings": {
"ctx_len": 8192,
"prompt_template": "<bos><start_of_turn>user\n{prompt}<end_of_turn>\n<start_of_turn>model\n<end_of_turn>\n<start_of_turn>model\n",
"llama_model_path": "gemma-2-2b-it-Q4_K_M.gguf",
"ngl": 27
},
"parameters": {
"temperature": 0.7,
"top_p": 0.95,
"stream": true,
"max_tokens": 8192,
"stop": [
"<end_of_turn>"
],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "Google",
"tags": [
"2B",
"Tiny",
"Conversational",
"Text-generation"
],
"size": 1710000000
},
"engine": "nitro"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
{
"sources": [
{
"filename": "gemma-2-9b-it-Q4_K_M.gguf",
"url": "https://huggingface.co/bartowski/gemma-2-9b-it-GGUF/resolve/main/gemma-2-9b-it-Q4_K_M.gguf"
}
],
"id": "gemma-2-9b-it",
"object": "model",
"name": "Gemma 2 9B Q4",
"version": "1.0",
"description": "Gemma is built from the same technology with Google's Gemini.",
"format": "gguf",
"settings": {
"ctx_len": 8192,
"prompt_template": "<bos><start_of_turn>user\n{prompt}<end_of_turn>\n<start_of_turn>model\n<end_of_turn>\n<start_of_turn>model\n",
"llama_model_path": "gemma-2-9b-it-Q4_K_M.gguf",
"ngl": 43
},
"parameters": {
"temperature": 0.7,
"top_p": 0.95,
"stream": true,
"max_tokens": 8192,
"stop": [
"<end_of_turn>"
],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "Google",
"tags": [
"9B",
"Conversational",
"Text-generation"
],
"size": 5760000000
},
"engine": "nitro"
}
14 changes: 10 additions & 4 deletions extensions/inference-nitro-extension/rollup.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ const codeninja7bJson = require('./resources/models/codeninja-1.0-7b/model.json'
const commandr34bJson = require('./resources/models/command-r-34b/model.json')
const deepseekCoder13bJson = require('./resources/models/deepseek-coder-1.3b/model.json')
const deepseekCoder34bJson = require('./resources/models/deepseek-coder-34b/model.json')
const gemma2bJson = require('./resources/models/gemma-2b/model.json')
const gemma7bJson = require('./resources/models/gemma-7b/model.json')
const gemma112bJson = require('./resources/models/gemma-1.1-2b/model.json')
const gemma117bJson = require('./resources/models/gemma-1.1-7b/model.json')
const llama2Chat70bJson = require('./resources/models/llama2-chat-70b/model.json')
const llama2Chat7bJson = require('./resources/models/llama2-chat-7b/model.json')
const llamacorn1bJson = require('./resources/models/llamacorn-1.1b/model.json')
Expand Down Expand Up @@ -42,6 +42,9 @@ const codestralJson = require('./resources/models/codestral-22b/model.json')
const qwen2Json = require('./resources/models/qwen2-7b/model.json')
const llama318bJson = require('./resources/models/llama3.1-8b-instruct/model.json')
const llama3170bJson = require('./resources/models/llama3.1-70b-instruct/model.json')
const gemma22bJson = require('./resources/models/gemma-2-2b/model.json')
const gemma29bJson = require('./resources/models/gemma-2-9b/model.json')
const gemma227bJson = require('./resources/models/gemma-2-27b/model.json')

export default [
{
Expand All @@ -61,8 +64,8 @@ export default [
commandr34bJson,
deepseekCoder13bJson,
deepseekCoder34bJson,
gemma2bJson,
gemma7bJson,
gemma112bJson,
gemma117bJson,
llama2Chat70bJson,
llama2Chat7bJson,
llamacorn1bJson,
Expand Down Expand Up @@ -91,6 +94,9 @@ export default [
qwen2Json,
llama318bJson,
llama3170bJson,
gemma22bJson,
gemma29bJson,
gemma227bJson
]),
NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson),
Expand Down

0 comments on commit d66f4c3

Please sign in to comment.