diff --git a/all.json b/all.json index 429e4b3..410bcd3 100644 --- a/all.json +++ b/all.json @@ -1 +1 @@ -{"_version": "1", "created_at": "2024-03-25T18:59:27.621062Z", "updated_at": "2024-03-25T18:59:27.621306Z", "models": [{"_version": "1.0.0", "id": "google/gemma-7b", "name": "gemma-7b", "creator": "google", "title": "Gemma 7B", "version": "1.0.0", "description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models. They are text-to-text, decoder-only large language models, available in English, with open weights, pre-trained variants, and instruction-tuned variants. Gemma models are well-suited for a variety of text generation tasks, including question answering, summarization, and reasoning. Their relatively small size makes it possible to deploy them in environments with limited resources such as a laptop, desktop or your own cloud infrastructure, democratizing access to state of the art AI models and helping foster innovation for everyone.", "author": "Google", "publisher": {"name": "Google", "url": "https://huggingface.co/google"}, "license": "Other", "tags": "transformer safetensors gguf gemma text-generation license:other autotrain_compatible endpoints_compatible has_space text-generation-inference region:us", "task_type": ["text-generation"], "languages": ["en"], "model_size": "2.78b", "context_size": 2048, "tensor_type": "float16", "base_model": "none", "model_type": "gemma", "library": "transformers", "private": true, "featured": true, "repository": "https://huggingface.co/google/gemma-7b", "include": [{"name": "gemma-7b.Q2_K.gguf", "recommendations": "smallest, significant quality loss - not recommended for most purposes", "quantization": "Q2_K", "bits": 2, "size": 3.48, "download": "https://huggingface.co/mlabonne/gemma-7b-GGUF/resolve/main/gemma-7b.Q2_K.gguf"}, {"name": "gemma-7b.Q3_K_S.gguf", "recommendations": "very small, high quality loss", "quantization": "Q3_K_S", "bits": 3, "size": 3.98, "download": "https://huggingface.co/mlabonne/gemma-7b-GGUF/resolve/main/gemma-7b.Q3_K_S.gguf"}, {"name": "gemma-7b.Q3_K_M.gguf", "recommendations": "very small, high quality loss", "quantization": "Q3_K_M", "bits": 3, "size": 4.37, "download": "https://huggingface.co/mlabonne/gemma-7b-GGUF/resolve/main/gemma-7b.Q3_K_M.gguf"}, {"name": "gemma-7b.Q3_K_L.gguf", "recommendations": "small, substantial quality loss", "quantization": "Q3_K_L", "bits": 3, "size": 4.71, "download": "https://huggingface.co/mlabonne/gemma-7b-GGUF/resolve/main/gemma-7b.Q3_K_L.gguf"}, {"name": "gemma-7b.Q4_0.gguf", "recommendations": "legacy; small, very high quality loss - prefer using Q3_K_M", "quantization": "Q4_0", "bits": 4, "size": 5.01, "deprecated": true, "download": "https://huggingface.co/mlabonne/gemma-7b-GGUF/resolve/main/gemma-7b.Q4_0.gguf"}, {"name": "gemma-7b.Q4_K_S.gguf", "recommendations": "small, greater quality loss", "quantization": "Q4_K_S", "bits": 4, "size": 5.05, "download": "https://huggingface.co/mlabonne/gemma-7b-GGUF/resolve/main/gemma-7b.Q4_K_S.gguf"}, {"name": "gemma-7b.Q4_K_M.gguf", "recommendations": "medium, balanced quality - recommended", "quantization": "Q4_K_M", "bits": 4, "size": 5.33, "recommended": true, "download": "https://huggingface.co/mlabonne/gemma-7b-GGUF/resolve/main/gemma-7b.Q4_K_M.gguf"}, {"name": "gemma-7b.Q5_0.gguf", "recommendations": "legacy; medium, balanced quality - prefer using Q4_K_M", "quantization": "Q5_0", "bits": 5, "size": 5.98, "deprecated": true, "download": "https://huggingface.co/mlabonne/gemma-7b-GGUF/resolve/main/gemma-7b.Q5_0.gguf"}, {"name": "gemma-7b.Q5_1.gguf", "recommendations": "legacy; medium, balanced quality - prefer using Q4_K_M", "quantization": "Q5_1", "bits": 5, "size": 6.47, "deprecated": true, "download": "https://huggingface.co/mlabonne/gemma-7b-GGUF/resolve/main/gemma-7b.Q5_1.gguf"}, {"name": "gemma-7b.Q5_K_S.gguf", "recommendations": "large, low quality loss - recommended", "quantization": "Q5_K_S", "bits": 5, "size": 5.98, "recommended": true, "download": "https://huggingface.co/mlabonne/gemma-7b-GGUF/resolve/main/gemma-7b.Q5_K_S.gguf"}, {"name": "gemma-7b.Q5_K_M.gguf", "recommendations": "large, very low quality loss - recommended", "quantization": "Q5_K_M", "bits": 5, "size": 6.14, "recommended": true, "download": "https://huggingface.co/mlabonne/gemma-7b-GGUF/resolve/main/gemma-7b.Q5_K_M.gguf"}, {"name": "gemma-7b.Q6_K.gguf", "recommendations": "very large, extremely low quality loss", "quantization": "Q6_K", "bits": 6, "size": 7.01, "download": "https://huggingface.co/mlabonne/gemma-7b-GGUF/resolve/main/gemma-7b.Q6_K.gguf"}, {"name": "gemma-7b.Q8_0.gguf", "recommendations": "very large, extremely low quality loss", "quantization": "Q8_0", "bits": 8, "size": 9.08, "download": "https://huggingface.co/mlabonne/gemma-7b-GGUF/resolve/main/gemma-7b.Q8_0.gguf"}]}, {"_version": "1.0.0", "id": "google/gemma-2b", "name": "gemma-2b", "creator": "google", "title": "Gemma 2B", "version": "1.0.0", "description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models. They are text-to-text, decoder-only large language models, available in English, with open weights, pre-trained variants, and instruction-tuned variants. Gemma models are well-suited for a variety of text generation tasks, including question answering, summarization, and reasoning. Their relatively small size makes it possible to deploy them in environments with limited resources such as a laptop, desktop or your own cloud infrastructure, democratizing access to state of the art AI models and helping foster innovation for everyone.", "author": "Google", "publisher": {"name": "Google", "url": "https://huggingface.co/google"}, "license": "Other", "tags": "transformer safetensors gguf gemma text-generation license:other autotrain_compatible endpoints_compatible has_space text-generation-inference region:us", "task_type": ["text-generation"], "languages": ["en"], "model_size": "2.78b", "context_size": 2048, "tensor_type": "float16", "base_model": "none", "model_type": "gemma", "library": "transformers", "private": true, "featured": true, "repository": "https://huggingface.co/google/gemma-2b", "include": [{"name": "gemma-2b.Q2_K.gguf", "recommendations": "smallest, significant quality loss - not recommended for most purposes", "quantization": "Q2_K", "bits": 2, "size": 0.9, "download": "https://huggingface.co/mlabonne/gemma-2b-GGUF/resolve/main/gemma-2b.Q2_K.gguf"}, {"name": "gemma-2b.Q3_K_S.gguf", "recommendations": "very small, high quality loss", "quantization": "Q3_K_S", "bits": 3, "size": 1.08, "download": "https://huggingface.co/mlabonne/gemma-2b-GGUF/resolve/main/gemma-2b.Q3_K_S.gguf"}, {"name": "gemma-2b.Q3_K_M.gguf", "recommendations": "very small, high quality loss", "quantization": "Q3_K_M", "bits": 3, "size": 1.18, "download": "https://huggingface.co/mlabonne/gemma-2b-GGUF/resolve/main/gemma-2b.Q3_K_M.gguf"}, {"name": "gemma-2b.Q3_K_L.gguf", "recommendations": "small, substantial quality loss", "quantization": "Q3_K_L", "bits": 3, "size": 1.26, "download": "https://huggingface.co/mlabonne/gemma-2b-GGUF/resolve/main/gemma-2b.Q3_K_L.gguf"}, {"name": "gemma-2b.Q4_0.gguf", "recommendations": "legacy; small, very high quality loss - prefer using Q3_K_M", "quantization": "Q4_0", "bits": 4, "size": 1.42, "deprecated": true, "download": "https://huggingface.co/mlabonne/gemma-2b-GGUF/resolve/main/gemma-2b.Q4_0.gguf"}, {"name": "gemma-2b.Q4_K_S.gguf", "recommendations": "small, greater quality loss", "quantization": "Q4_K_S", "bits": 4, "size": 1.42, "download": "https://huggingface.co/mlabonne/gemma-2b-GGUF/resolve/main/gemma-2b.Q4_K_S.gguf"}, {"name": "gemma-2b.Q4_K_M.gguf", "recommendations": "medium, balanced quality - recommended", "quantization": "Q4_K_M", "bits": 4, "size": 1.5, "recommended": true, "download": "https://huggingface.co/mlabonne/gemma-2b-GGUF/resolve/main/gemma-2b.Q4_K_M.gguf"}, {"name": "gemma-2b.Q5_0.gguf", "recommendations": "legacy; medium, balanced quality - prefer using Q4_K_M", "quantization": "Q5_0", "bits": 5, "size": 1.73, "deprecated": true, "download": "https://huggingface.co/mlabonne/gemma-2b-GGUF/resolve/main/gemma-2b.Q5_0.gguf"}, {"name": "gemma-2b.Q5_1.gguf", "recommendations": "legacy; medium, balanced quality - prefer using Q4_K_M", "quantization": "Q5_1", "bits": 5, "size": 1.89, "deprecated": true, "download": "https://huggingface.co/mlabonne/gemma-2b-GGUF/resolve/main/gemma-2b.Q5_1.gguf"}, {"name": "gemma-2b.Q5_K_S.gguf", "recommendations": "large, low quality loss - recommended", "quantization": "Q5_K_S", "bits": 5, "size": 1.73, "recommended": true, "download": "https://huggingface.co/mlabonne/gemma-2b-GGUF/resolve/main/gemma-2b.Q5_K_S.gguf"}, {"name": "gemma-2b.Q5_K_M.gguf", "recommendations": "large, very low quality loss - recommended", "quantization": "Q5_K_M", "bits": 5, "size": 1.5, "recommended": true, "download": "https://huggingface.co/mlabonne/gemma-2b-GGUF/resolve/main/gemma-2b.Q5_K_M.gguf"}, {"name": "gemma-2b.Q6_K.gguf", "recommendations": "very large, extremely low quality loss", "quantization": "Q6_K", "bits": 6, "size": 2.06, "download": "https://huggingface.co/mlabonne/gemma-2b-GGUF/resolve/main/gemma-2b.Q6_K.gguf"}, {"name": "gemma-2b.Q8_0.gguf", "recommendations": "very large, extremely low quality loss", "quantization": "Q8_0", "bits": 8, "size": 2.67, "download": "https://huggingface.co/mlabonne/gemma-2b-GGUF/resolve/main/gemma-2b.Q8_0.gguf"}]}, {"_version": "1.0.0", "id": "microsoft/phi-2", "name": "phi-2", "creator": "microsoft", "title": "Phi-2", "version": "2.0.0", "description": "Phi-2 is a Transformer with 2.7 billion parameters. It was trained using the same data sources as Phi-1.5, augmented with a new data source that consists of various NLP synthetic texts and filtered websites (for safety and educational value). When assessed against benchmarks testing common sense, language understanding, and logical reasoning, Phi-2 showcased a nearly state-of-the-art performance among models with less than 13 billion parameters.", "author": "Microsoft", "publisher": {"name": "Microsoft", "url": "https://huggingface.co/microsoft"}, "license": "MIT", "tags": "transformer safetensors phi-msft text-generation nlp code custom_code en license:mit autotrain_compatible has_space region:us", "task_type": ["text-generation"], "languages": ["en"], "model_size": "2.78b", "context_size": 2048, "tensor_type": "float16", "base_model": "none", "model_type": "phi-msft", "library": "PyTorch", "private": false, "featured": true, "use_cases": "Given the nature of the training data, the Phi-2 model is best suited for prompts using the QA format, the chat format, and the code format.", "out_of_scope_use_cases": "Limited Scope for code: Majority of Phi-2 training data is based in Python and use common packages such as 'typing, math, random, collections, datetime, itertools'. If the model generates Python scripts that utilize other packages or scripts in other languages, we strongly recommend users manually verify all API uses.", "bias_risks_limitations": "Generate Inaccurate Code and Facts: The model may produce incorrect code snippets and statements. Users should treat these outputs as suggestions or starting points, not as definitive or accurate solutions.", "repository": "https://huggingface.co/microsoft/phi-2", "download": "https://huggingface.co/microsoft/phi-2", "include": [{"name": "phi-2.Q2_K.gguf", "recommendations": "smallest, significant quality loss - not recommended for most purposes", "quantization": "Q2_K", "bits": 2, "size": 1.17, "max_ram": 3.67, "download": "https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q2_K.gguf"}, {"name": "phi-2.Q3_K_S.gguf", "recommendations": "very small, high quality loss", "quantization": "Q3_K_S", "bits": 3, "size": 1.25, "max_ram": 3.75, "download": "https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q3_K_S.gguf"}, {"name": "phi-2.Q3_K_M.gguf", "recommendations": "very small, high quality loss", "quantization": "Q3_K_M", "bits": 3, "size": 1.48, "max_ram": 3.98, "download": "https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q3_K_M.gguf"}, {"name": "phi-2.Q3_K_L.gguf", "recommendations": "small, substantial quality loss", "quantization": "Q3_K_L", "bits": 3, "size": 1.6, "max_ram": 4.1, "download": "https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q3_K_L.gguf"}, {"name": "phi-2.Q4_0.gguf", "recommendations": "legacy; small, very high quality loss - prefer using Q3_K_M", "quantization": "Q4_0", "bits": 4, "size": 1.6, "max_ram": 4.1, "deprecated": true, "download": "https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q4_0.gguf"}, {"name": "phi-2.Q4_K_S.gguf", "recommendations": "small, greater quality loss", "quantization": "Q4_K_S", "bits": 4, "size": 1.62, "max_ram": 4.12, "download": "https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q4_K_S.gguf"}, {"name": "phi-2.Q4_K_M.gguf", "recommendations": "medium, balanced quality - recommended", "quantization": "Q4_K_M", "bits": 4, "size": 1.79, "max_ram": 4.29, "recommended": true, "download": "https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q4_K_M.gguf"}, {"name": "phi-2.Q5_0.gguf", "recommendations": "legacy; medium, balanced quality - prefer using Q4_K_M", "quantization": "Q5_0", "bits": 5, "size": 1.93, "max_ram": 4.43, "deprecated": true, "download": "https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q5_0.gguf"}, {"name": "phi-2.Q5_K_S.gguf", "recommendations": "large, low quality loss - recommended", "quantization": "Q5_K_S", "bits": 5, "size": 1.93, "max_ram": 4.43, "recommended": true, "download": "https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q5_K_S.gguf"}, {"name": "phi-2.Q5_K_M.gguf", "recommendations": "large, very low quality loss - recommended", "quantization": "Q5_K_M", "bits": 5, "size": 2.07, "max_ram": 4.57, "recommended": true, "download": "https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q5_K_M.gguf"}, {"name": "phi-2.Q6_K.gguf", "recommendations": "very large, extremely low quality loss", "quantization": "Q6_K", "bits": 6, "size": 2.29, "max_ram": 4.79, "download": "https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q6_K.gguf"}, {"name": "phi-2.Q8_0.gguf", "recommendations": "very large, extremely low quality loss", "quantization": "Q8_0", "bits": 8, "size": 2.96, "max_ram": 5.46, "download": "https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q8_0.gguf"}]}, {"_version": "1.0.0", "id": "TinyLlama/TinyLlama-1.1B-Chat-v0.6", "name": "tinyllama-1.1b-chat-v0.6", "creator": "TinyLlama", "title": "TinyLlama-1.1B", "version": "0.6", "description": "This is the chat model finetuned on top of TinyLlama/TinyLlama-1.1B-intermediate-step-955k-2T. We follow HF's Zephyr's training recipe. The model was initially fine-tuned on a variant of the UltraChat dataset, which contains a diverse range of synthetic dialogues generated by ChatGPT. We then further aligned the model with \ud83e\udd17 TRL's DPOTrainer on the openbmb/UltraFeedback dataset, which contain 64k prompts and model completions that are ranked by GPT-4.", "license": "Apache-2.0", "tags": "llama", "task_type": ["conversational"], "languages": ["en"], "model_size": "1.1b", "context_size": 1024, "tensor_type": "float32", "base_model": "TinyLlama/TinyLlama-1.1B-intermediate-step-955k-2T", "model_type": "llama", "library": "PyTorch", "featured": true, "repository": "https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.6", "include": [{"name": "TinyLlama-1.1B-Chat-v0.6.q4_0.gguf", "base_model": "TinyLlama/TinyLlama-1.1B-Chat-v0.6", "library": "GGUF", "quantization": "Q4_0", "size": 0.637, "download": "https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.6/resolve/main/ggml-model-q4_0.gguf"}]}, {"_version": "1.0.0", "id": "meta-llama/Llama2-7b-chat-hf", "name": "llama2-7b-chat-hf", "creator": "meta-llama", "title": "Llama 2 7B Chat", "version": "2.0.0", "summary": "Llama 2 7B Chat is a large language model fine-tuned for dialogue use cases.", "description": "Meta developed and publicly released the Llama 2 family of large language models (LLMs), a collection of pretrained and fine-tuned generative text models ranging in scale from 7 billion to 70 billion parameters. Our fine-tuned LLMs, called Llama-2-Chat, are optimized for dialogue use cases. Llama-2-Chat models outperform open-source chat models on most benchmarks we tested, and in our human evaluations for helpfulness and safety, are on par with some popular closed-source models like ChatGPT and PaLM.", "author": "AI Research by Meta", "publisher": {"name": "meta-llama", "url": "https://huggingface.co/meta-llama"}, "license": {"name": "Meta Research License Agreement v1.0"}, "tags": "llama llama2 facebook meta english", "task_type": ["conversational"], "languages": ["en"], "model_size": "7b", "context_size": 1024, "tensor_type": "float32", "base_model": "none", "model_type": "llama", "library": "PyTorch", "private": true, "featured": true, "use_cases": " Llama 2 is intended for commercial and research use in English. Tuned models are intended for assistant-like chat, whereas pretrained models can be adapted for a variety of natural language generation tasks.", "out_of_scope_use_cases": "Use in any manner that violates applicable laws or regulations (including trade compliance laws).Use in languages other than English. Use in any other way that is prohibited by the Acceptable Use Policy and Licensing Agreement for Llama 2.", "bias_risks_limitations": "Llama-2-Chat models are trained on a large corpus of English text, which may contain bias. We recommend that you evaluate the model for your use case before deploying it.", "repository": "https://huggingface.co/meta-llama/Llama-2-7b-chat-hf", "download": "https://huggingface.co/meta-llama/Llama-2-7b-chat-hf", "paper": "https://ai.meta.com/research/publications/llama-2-open-foundation-and-fine-tuned-chat-models/", "include": [{"name": "Llama-2-7B-Chat-GGML", "publisher": {"name": "TheBloke", "url": "https://huggingface.co/TheBloke"}, "description": "This repo contains GGUF format model files for Meta Llama 2's Llama 2 7B Chat.", "base_model": "meta-llama/llama-2-7b-chat-hf", "model_type": "llama", "library": "GGUF", "repository": "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q2_K.gguf", "include": [{"name": "llama-2-7b-chat.Q2_K.gguf", "recommendations": "smallest, significant quality loss - not recommended for most purposes", "quantization": "Q2_K", "bits": 2, "size": 2.83, "max_ram": 5.33, "download": "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q2_K.gguf"}, {"name": "llama-2-7b-chat.Q3_K_S.gguf", "recommendations": "very small, high quality loss", "quantization": "Q3_K_S", "bits": 3, "size": 2.95, "max_ram": 5.45, "download": "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q3_K_S.gguf"}, {"name": "llama-2-7b-chat.Q3_K_M.gguf", "recommendations": "very small, high quality loss", "quantization": "Q3_K_M", "bits": 3, "size": 3.3, "max_ram": 5.8, "download": "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q3_K_M.gguf"}, {"name": "llama-2-7b-chat.Q3_K_L.gguf", "recommendations": "small, substantial quality loss", "quantization": "Q3_K_L", "bits": 3, "size": 3.6, "max_ram": 6.1, "download": "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q3_K_L.gguf"}, {"name": "llama-2-7b-chat.Q4_0.gguf", "recommendations": "legacy; small, very high quality loss - prefer using Q3_K_M", "quantization": "Q4_0", "bits": 4, "size": 3.83, "max_ram": 6.33, "deprecated": true, "download": "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_0.gguf"}, {"name": "llama-2-7b-chat.Q4_K_S.gguf", "recommendations": "small, greater quality loss", "quantization": "Q4_K_S", "bits": 4, "size": 3.86, "max_ram": 6.36, "download": "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_S.gguf"}, {"name": "llama-2-7b-chat.Q4_K_M.gguf", "recommendations": "medium, balanced quality - recommended", "quantization": "Q4_K_M", "bits": 4, "size": 4.08, "max_ram": 6.58, "recommended": true, "download": "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf"}, {"name": "llama-2-7b-chat.Q5_0.gguf", "recommendations": "legacy; medium, balanced quality - prefer using Q4_K_M", "quantization": "Q5_0", "bits": 5, "size": 4.65, "max_ram": 7.15, "deprecated": true, "download": "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_0.gguf"}, {"name": "llama-2-7b-chat.Q5_K_S.gguf", "recommendations": "large, low quality loss - recommended", "quantization": "Q5_K_S", "bits": 5, "size": 4.65, "max_ram": 7.15, "recommended": true, "download": "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf"}, {"name": "llama-2-7b-chat.Q5_K_M.gguf", "recommendations": "large, very low quality loss - recommended", "quantization": "Q5_K_M", "bits": 5, "size": 4.78, "max_ram": 7.28, "recommended": true, "download": "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_M.gguf"}, {"name": "llama-2-7b-chat.Q6_K.gguf", "recommendations": "very large, extremely low quality loss", "quantization": "Q6_K", "bits": 6, "size": 5.53, "max_ram": 8.03, "download": "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q6_K.gguf"}, {"name": "llama-2-7b-chat.Q8_0.gguf", "recommendations": "very large, extremely low quality loss", "quantization": "Q8_0", "bits": 8, "size": 7.16, "max_ram": 9.66, "download": "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q8_0.gguf"}]}]}]} \ No newline at end of file +{"_version": "1", "created_at": "2024-03-28T19:14:18.409334Z", "updated_at": "2024-03-28T19:14:18.409580Z", "models": [{"_version": "1.0.0", "id": "google/gemma-7b", "name": "gemma-7b", "creator": "google", "title": "Gemma 7B", "version": "1.0.0", "description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models. They are text-to-text, decoder-only large language models, available in English, with open weights, pre-trained variants, and instruction-tuned variants. Gemma models are well-suited for a variety of text generation tasks, including question answering, summarization, and reasoning. Their relatively small size makes it possible to deploy them in environments with limited resources such as a laptop, desktop or your own cloud infrastructure, democratizing access to state of the art AI models and helping foster innovation for everyone.", "author": "Google", "publisher": {"name": "Google", "url": "https://huggingface.co/google"}, "license": "Other", "tags": "transformer safetensors gguf gemma text-generation license:other autotrain_compatible endpoints_compatible has_space text-generation-inference region:us", "task_type": ["text-generation"], "languages": ["en"], "model_size": "2.78b", "context_size": 2048, "tensor_type": "float16", "base_model": "none", "model_type": "gemma", "library": "transformers", "private": true, "featured": true, "repository": "https://huggingface.co/google/gemma-7b", "include": [{"name": "gemma-7b.Q2_K.gguf", "recommendations": "smallest, significant quality loss - not recommended for most purposes", "quantization": "Q2_K", "bits": 2, "size": 3.48, "download": "https://huggingface.co/mlabonne/gemma-7b-GGUF/resolve/main/gemma-7b.Q2_K.gguf"}, {"name": "gemma-7b.Q3_K_S.gguf", "recommendations": "very small, high quality loss", "quantization": "Q3_K_S", "bits": 3, "size": 3.98, "download": "https://huggingface.co/mlabonne/gemma-7b-GGUF/resolve/main/gemma-7b.Q3_K_S.gguf"}, {"name": "gemma-7b.Q3_K_M.gguf", "recommendations": "very small, high quality loss", "quantization": "Q3_K_M", "bits": 3, "size": 4.37, "download": "https://huggingface.co/mlabonne/gemma-7b-GGUF/resolve/main/gemma-7b.Q3_K_M.gguf"}, {"name": "gemma-7b.Q3_K_L.gguf", "recommendations": "small, substantial quality loss", "quantization": "Q3_K_L", "bits": 3, "size": 4.71, "download": "https://huggingface.co/mlabonne/gemma-7b-GGUF/resolve/main/gemma-7b.Q3_K_L.gguf"}, {"name": "gemma-7b.Q4_0.gguf", "recommendations": "legacy; small, very high quality loss - prefer using Q3_K_M", "quantization": "Q4_0", "bits": 4, "size": 5.01, "deprecated": true, "download": "https://huggingface.co/mlabonne/gemma-7b-GGUF/resolve/main/gemma-7b.Q4_0.gguf"}, {"name": "gemma-7b.Q4_K_S.gguf", "recommendations": "small, greater quality loss", "quantization": "Q4_K_S", "bits": 4, "size": 5.05, "download": "https://huggingface.co/mlabonne/gemma-7b-GGUF/resolve/main/gemma-7b.Q4_K_S.gguf"}, {"name": "gemma-7b.Q4_K_M.gguf", "recommendations": "medium, balanced quality - recommended", "quantization": "Q4_K_M", "bits": 4, "size": 5.33, "recommended": true, "download": "https://huggingface.co/mlabonne/gemma-7b-GGUF/resolve/main/gemma-7b.Q4_K_M.gguf"}, {"name": "gemma-7b.Q5_0.gguf", "recommendations": "legacy; medium, balanced quality - prefer using Q4_K_M", "quantization": "Q5_0", "bits": 5, "size": 5.98, "deprecated": true, "download": "https://huggingface.co/mlabonne/gemma-7b-GGUF/resolve/main/gemma-7b.Q5_0.gguf"}, {"name": "gemma-7b.Q5_1.gguf", "recommendations": "legacy; medium, balanced quality - prefer using Q4_K_M", "quantization": "Q5_1", "bits": 5, "size": 6.47, "deprecated": true, "download": "https://huggingface.co/mlabonne/gemma-7b-GGUF/resolve/main/gemma-7b.Q5_1.gguf"}, {"name": "gemma-7b.Q5_K_S.gguf", "recommendations": "large, low quality loss - recommended", "quantization": "Q5_K_S", "bits": 5, "size": 5.98, "recommended": true, "download": "https://huggingface.co/mlabonne/gemma-7b-GGUF/resolve/main/gemma-7b.Q5_K_S.gguf"}, {"name": "gemma-7b.Q5_K_M.gguf", "recommendations": "large, very low quality loss - recommended", "quantization": "Q5_K_M", "bits": 5, "size": 6.14, "recommended": true, "download": "https://huggingface.co/mlabonne/gemma-7b-GGUF/resolve/main/gemma-7b.Q5_K_M.gguf"}, {"name": "gemma-7b.Q6_K.gguf", "recommendations": "very large, extremely low quality loss", "quantization": "Q6_K", "bits": 6, "size": 7.01, "download": "https://huggingface.co/mlabonne/gemma-7b-GGUF/resolve/main/gemma-7b.Q6_K.gguf"}, {"name": "gemma-7b.Q8_0.gguf", "recommendations": "very large, extremely low quality loss", "quantization": "Q8_0", "bits": 8, "size": 9.08, "download": "https://huggingface.co/mlabonne/gemma-7b-GGUF/resolve/main/gemma-7b.Q8_0.gguf"}]}, {"_version": "1.0.0", "id": "google/gemma-2b", "name": "gemma-2b", "creator": "google", "title": "Gemma 2B", "version": "1.0.0", "description": "Gemma is a family of lightweight, state-of-the-art open models from Google, built from the same research and technology used to create the Gemini models. They are text-to-text, decoder-only large language models, available in English, with open weights, pre-trained variants, and instruction-tuned variants. Gemma models are well-suited for a variety of text generation tasks, including question answering, summarization, and reasoning. Their relatively small size makes it possible to deploy them in environments with limited resources such as a laptop, desktop or your own cloud infrastructure, democratizing access to state of the art AI models and helping foster innovation for everyone.", "author": "Google", "publisher": {"name": "Google", "url": "https://huggingface.co/google"}, "license": "Other", "tags": "transformer safetensors gguf gemma text-generation license:other autotrain_compatible endpoints_compatible has_space text-generation-inference region:us", "task_type": ["text-generation"], "languages": ["en"], "model_size": "2.78b", "context_size": 2048, "tensor_type": "float16", "base_model": "none", "model_type": "gemma", "library": "transformers", "private": true, "featured": true, "repository": "https://huggingface.co/google/gemma-2b", "include": [{"name": "gemma-2b.Q2_K.gguf", "recommendations": "smallest, significant quality loss - not recommended for most purposes", "quantization": "Q2_K", "bits": 2, "size": 0.9, "download": "https://huggingface.co/mlabonne/gemma-2b-GGUF/resolve/main/gemma-2b.Q2_K.gguf"}, {"name": "gemma-2b.Q3_K_S.gguf", "recommendations": "very small, high quality loss", "quantization": "Q3_K_S", "bits": 3, "size": 1.08, "download": "https://huggingface.co/mlabonne/gemma-2b-GGUF/resolve/main/gemma-2b.Q3_K_S.gguf"}, {"name": "gemma-2b.Q3_K_M.gguf", "recommendations": "very small, high quality loss", "quantization": "Q3_K_M", "bits": 3, "size": 1.18, "download": "https://huggingface.co/mlabonne/gemma-2b-GGUF/resolve/main/gemma-2b.Q3_K_M.gguf"}, {"name": "gemma-2b.Q3_K_L.gguf", "recommendations": "small, substantial quality loss", "quantization": "Q3_K_L", "bits": 3, "size": 1.26, "download": "https://huggingface.co/mlabonne/gemma-2b-GGUF/resolve/main/gemma-2b.Q3_K_L.gguf"}, {"name": "gemma-2b.Q4_0.gguf", "recommendations": "legacy; small, very high quality loss - prefer using Q3_K_M", "quantization": "Q4_0", "bits": 4, "size": 1.42, "deprecated": true, "download": "https://huggingface.co/mlabonne/gemma-2b-GGUF/resolve/main/gemma-2b.Q4_0.gguf"}, {"name": "gemma-2b.Q4_K_S.gguf", "recommendations": "small, greater quality loss", "quantization": "Q4_K_S", "bits": 4, "size": 1.42, "download": "https://huggingface.co/mlabonne/gemma-2b-GGUF/resolve/main/gemma-2b.Q4_K_S.gguf"}, {"name": "gemma-2b.Q4_K_M.gguf", "recommendations": "medium, balanced quality - recommended", "quantization": "Q4_K_M", "bits": 4, "size": 1.5, "recommended": true, "download": "https://huggingface.co/mlabonne/gemma-2b-GGUF/resolve/main/gemma-2b.Q4_K_M.gguf"}, {"name": "gemma-2b.Q5_0.gguf", "recommendations": "legacy; medium, balanced quality - prefer using Q4_K_M", "quantization": "Q5_0", "bits": 5, "size": 1.73, "deprecated": true, "download": "https://huggingface.co/mlabonne/gemma-2b-GGUF/resolve/main/gemma-2b.Q5_0.gguf"}, {"name": "gemma-2b.Q5_1.gguf", "recommendations": "legacy; medium, balanced quality - prefer using Q4_K_M", "quantization": "Q5_1", "bits": 5, "size": 1.89, "deprecated": true, "download": "https://huggingface.co/mlabonne/gemma-2b-GGUF/resolve/main/gemma-2b.Q5_1.gguf"}, {"name": "gemma-2b.Q5_K_S.gguf", "recommendations": "large, low quality loss - recommended", "quantization": "Q5_K_S", "bits": 5, "size": 1.73, "recommended": true, "download": "https://huggingface.co/mlabonne/gemma-2b-GGUF/resolve/main/gemma-2b.Q5_K_S.gguf"}, {"name": "gemma-2b.Q5_K_M.gguf", "recommendations": "large, very low quality loss - recommended", "quantization": "Q5_K_M", "bits": 5, "size": 1.5, "recommended": true, "download": "https://huggingface.co/mlabonne/gemma-2b-GGUF/resolve/main/gemma-2b.Q5_K_M.gguf"}, {"name": "gemma-2b.Q6_K.gguf", "recommendations": "very large, extremely low quality loss", "quantization": "Q6_K", "bits": 6, "size": 2.06, "download": "https://huggingface.co/mlabonne/gemma-2b-GGUF/resolve/main/gemma-2b.Q6_K.gguf"}, {"name": "gemma-2b.Q8_0.gguf", "recommendations": "very large, extremely low quality loss", "quantization": "Q8_0", "bits": 8, "size": 2.67, "download": "https://huggingface.co/mlabonne/gemma-2b-GGUF/resolve/main/gemma-2b.Q8_0.gguf"}]}, {"_version": "1.0.0", "id": "microsoft/phi-2", "name": "phi-2", "creator": "microsoft", "title": "Phi-2", "version": "2.0.0", "description": "Phi-2 is a Transformer with 2.7 billion parameters. It was trained using the same data sources as Phi-1.5, augmented with a new data source that consists of various NLP synthetic texts and filtered websites (for safety and educational value). When assessed against benchmarks testing common sense, language understanding, and logical reasoning, Phi-2 showcased a nearly state-of-the-art performance among models with less than 13 billion parameters.", "author": "Microsoft", "publisher": {"name": "Microsoft", "url": "https://huggingface.co/microsoft"}, "license": "MIT", "tags": "transformer safetensors phi-msft text-generation nlp code custom_code en license:mit autotrain_compatible has_space region:us", "task_type": ["text-generation"], "languages": ["en"], "model_size": "2.78b", "context_size": 2048, "tensor_type": "float16", "base_model": "none", "model_type": "phi-msft", "library": "PyTorch", "private": false, "featured": true, "use_cases": "Given the nature of the training data, the Phi-2 model is best suited for prompts using the QA format, the chat format, and the code format.", "out_of_scope_use_cases": "Limited Scope for code: Majority of Phi-2 training data is based in Python and use common packages such as 'typing, math, random, collections, datetime, itertools'. If the model generates Python scripts that utilize other packages or scripts in other languages, we strongly recommend users manually verify all API uses.", "bias_risks_limitations": "Generate Inaccurate Code and Facts: The model may produce incorrect code snippets and statements. Users should treat these outputs as suggestions or starting points, not as definitive or accurate solutions.", "repository": "https://huggingface.co/microsoft/phi-2", "download": "https://huggingface.co/microsoft/phi-2", "include": [{"name": "phi-2.Q2_K.gguf", "recommendations": "smallest, significant quality loss - not recommended for most purposes", "quantization": "Q2_K", "bits": 2, "size": 1.17, "max_ram": 3.67, "download": "https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q2_K.gguf"}, {"name": "phi-2.Q3_K_S.gguf", "recommendations": "very small, high quality loss", "quantization": "Q3_K_S", "bits": 3, "size": 1.25, "max_ram": 3.75, "download": "https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q3_K_S.gguf"}, {"name": "phi-2.Q3_K_M.gguf", "recommendations": "very small, high quality loss", "quantization": "Q3_K_M", "bits": 3, "size": 1.48, "max_ram": 3.98, "download": "https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q3_K_M.gguf"}, {"name": "phi-2.Q3_K_L.gguf", "recommendations": "small, substantial quality loss", "quantization": "Q3_K_L", "bits": 3, "size": 1.6, "max_ram": 4.1, "download": "https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q3_K_L.gguf"}, {"name": "phi-2.Q4_0.gguf", "recommendations": "legacy; small, very high quality loss - prefer using Q3_K_M", "quantization": "Q4_0", "bits": 4, "size": 1.6, "max_ram": 4.1, "deprecated": true, "download": "https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q4_0.gguf"}, {"name": "phi-2.Q4_K_S.gguf", "recommendations": "small, greater quality loss", "quantization": "Q4_K_S", "bits": 4, "size": 1.62, "max_ram": 4.12, "download": "https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q4_K_S.gguf"}, {"name": "phi-2.Q4_K_M.gguf", "recommendations": "medium, balanced quality - recommended", "quantization": "Q4_K_M", "bits": 4, "size": 1.79, "max_ram": 4.29, "recommended": true, "download": "https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q4_K_M.gguf"}, {"name": "phi-2.Q5_0.gguf", "recommendations": "legacy; medium, balanced quality - prefer using Q4_K_M", "quantization": "Q5_0", "bits": 5, "size": 1.93, "max_ram": 4.43, "deprecated": true, "download": "https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q5_0.gguf"}, {"name": "phi-2.Q5_K_S.gguf", "recommendations": "large, low quality loss - recommended", "quantization": "Q5_K_S", "bits": 5, "size": 1.93, "max_ram": 4.43, "recommended": true, "download": "https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q5_K_S.gguf"}, {"name": "phi-2.Q5_K_M.gguf", "recommendations": "large, very low quality loss - recommended", "quantization": "Q5_K_M", "bits": 5, "size": 2.07, "max_ram": 4.57, "recommended": true, "download": "https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q5_K_M.gguf"}, {"name": "phi-2.Q6_K.gguf", "recommendations": "very large, extremely low quality loss", "quantization": "Q6_K", "bits": 6, "size": 2.29, "max_ram": 4.79, "download": "https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q6_K.gguf"}, {"name": "phi-2.Q8_0.gguf", "recommendations": "very large, extremely low quality loss", "quantization": "Q8_0", "bits": 8, "size": 2.96, "max_ram": 5.46, "download": "https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q8_0.gguf"}]}, {"_version": "1.0.0", "id": "TinyLlama/TinyLlama-1.1B-Chat-v0.6", "name": "tinyllama-1.1b-chat-v0.6", "creator": "TinyLlama", "title": "TinyLlama-1.1B", "version": "0.6", "description": "This is the chat model finetuned on top of TinyLlama/TinyLlama-1.1B-intermediate-step-955k-2T. We follow HF's Zephyr's training recipe. The model was initially fine-tuned on a variant of the UltraChat dataset, which contains a diverse range of synthetic dialogues generated by ChatGPT. We then further aligned the model with \ud83e\udd17 TRL's DPOTrainer on the openbmb/UltraFeedback dataset, which contain 64k prompts and model completions that are ranked by GPT-4.", "license": "Apache-2.0", "tags": "llama", "task_type": ["conversational"], "languages": ["en"], "model_size": "1.1b", "context_size": 1024, "tensor_type": "float32", "base_model": "TinyLlama/TinyLlama-1.1B-intermediate-step-955k-2T", "model_type": "llama", "library": "PyTorch", "featured": true, "repository": "https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.6", "include": [{"name": "TinyLlama-1.1B-Chat-v0.6.q4_0.gguf", "base_model": "TinyLlama/TinyLlama-1.1B-Chat-v0.6", "library": "GGUF", "quantization": "Q4_0", "size": 0.637, "download": "https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.6/resolve/main/ggml-model-q4_0.gguf"}]}, {"_version": "1.0.0", "id": "meta-llama/Llama2-7b-chat-hf", "name": "llama2-7b-chat-hf", "creator": "meta-llama", "title": "Llama 2 7B Chat", "version": "2.0.0", "summary": "Llama 2 7B Chat is a large language model fine-tuned for dialogue use cases.", "description": "Meta developed and publicly released the Llama 2 family of large language models (LLMs), a collection of pretrained and fine-tuned generative text models ranging in scale from 7 billion to 70 billion parameters. Our fine-tuned LLMs, called Llama-2-Chat, are optimized for dialogue use cases. Llama-2-Chat models outperform open-source chat models on most benchmarks we tested, and in our human evaluations for helpfulness and safety, are on par with some popular closed-source models like ChatGPT and PaLM.", "author": "AI Research by Meta", "publisher": {"name": "meta-llama", "url": "https://huggingface.co/meta-llama"}, "license": {"name": "Meta Research License Agreement v1.0"}, "tags": "llama llama2 facebook meta english", "task_type": ["conversational"], "languages": ["en"], "model_size": "7b", "context_size": 1024, "tensor_type": "float32", "base_model": "none", "model_type": "llama", "library": "PyTorch", "private": true, "featured": true, "use_cases": " Llama 2 is intended for commercial and research use in English. Tuned models are intended for assistant-like chat, whereas pretrained models can be adapted for a variety of natural language generation tasks.", "out_of_scope_use_cases": "Use in any manner that violates applicable laws or regulations (including trade compliance laws).Use in languages other than English. Use in any other way that is prohibited by the Acceptable Use Policy and Licensing Agreement for Llama 2.", "bias_risks_limitations": "Llama-2-Chat models are trained on a large corpus of English text, which may contain bias. We recommend that you evaluate the model for your use case before deploying it.", "repository": "https://huggingface.co/meta-llama/Llama-2-7b-chat-hf", "download": "https://huggingface.co/meta-llama/Llama-2-7b-chat-hf", "paper": "https://ai.meta.com/research/publications/llama-2-open-foundation-and-fine-tuned-chat-models/", "include": [{"name": "Llama-2-7B-Chat-GGML", "publisher": {"name": "TheBloke", "url": "https://huggingface.co/TheBloke"}, "description": "This repo contains GGUF format model files for Meta Llama 2's Llama 2 7B Chat.", "base_model": "meta-llama/llama-2-7b-chat-hf", "model_type": "llama", "library": "GGUF", "repository": "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q2_K.gguf", "include": [{"name": "llama-2-7b-chat.Q2_K.gguf", "recommendations": "smallest, significant quality loss - not recommended for most purposes", "quantization": "Q2_K", "bits": 2, "size": 2.83, "max_ram": 5.33, "download": "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q2_K.gguf"}, {"name": "llama-2-7b-chat.Q3_K_S.gguf", "recommendations": "very small, high quality loss", "quantization": "Q3_K_S", "bits": 3, "size": 2.95, "max_ram": 5.45, "download": "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q3_K_S.gguf"}, {"name": "llama-2-7b-chat.Q3_K_M.gguf", "recommendations": "very small, high quality loss", "quantization": "Q3_K_M", "bits": 3, "size": 3.3, "max_ram": 5.8, "download": "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q3_K_M.gguf"}, {"name": "llama-2-7b-chat.Q3_K_L.gguf", "recommendations": "small, substantial quality loss", "quantization": "Q3_K_L", "bits": 3, "size": 3.6, "max_ram": 6.1, "download": "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q3_K_L.gguf"}, {"name": "llama-2-7b-chat.Q4_0.gguf", "recommendations": "legacy; small, very high quality loss - prefer using Q3_K_M", "quantization": "Q4_0", "bits": 4, "size": 3.83, "max_ram": 6.33, "deprecated": true, "download": "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_0.gguf"}, {"name": "llama-2-7b-chat.Q4_K_S.gguf", "recommendations": "small, greater quality loss", "quantization": "Q4_K_S", "bits": 4, "size": 3.86, "max_ram": 6.36, "download": "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_S.gguf"}, {"name": "llama-2-7b-chat.Q4_K_M.gguf", "recommendations": "medium, balanced quality - recommended", "quantization": "Q4_K_M", "bits": 4, "size": 4.08, "max_ram": 6.58, "recommended": true, "download": "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf"}, {"name": "llama-2-7b-chat.Q5_0.gguf", "recommendations": "legacy; medium, balanced quality - prefer using Q4_K_M", "quantization": "Q5_0", "bits": 5, "size": 4.65, "max_ram": 7.15, "deprecated": true, "download": "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_0.gguf"}, {"name": "llama-2-7b-chat.Q5_K_S.gguf", "recommendations": "large, low quality loss - recommended", "quantization": "Q5_K_S", "bits": 5, "size": 4.65, "max_ram": 7.15, "recommended": true, "download": "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_S.gguf"}, {"name": "llama-2-7b-chat.Q5_K_M.gguf", "recommendations": "large, very low quality loss - recommended", "quantization": "Q5_K_M", "bits": 5, "size": 4.78, "max_ram": 7.28, "recommended": true, "download": "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_M.gguf"}, {"name": "llama-2-7b-chat.Q6_K.gguf", "recommendations": "very large, extremely low quality loss", "quantization": "Q6_K", "bits": 6, "size": 5.53, "max_ram": 8.03, "download": "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q6_K.gguf"}, {"name": "llama-2-7b-chat.Q8_0.gguf", "recommendations": "very large, extremely low quality loss", "quantization": "Q8_0", "bits": 8, "size": 7.16, "max_ram": 9.66, "download": "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q8_0.gguf"}]}]}], "featured": [{"model_id": "TinyLlama/TinyLlama-1.1B-Chat-v0.6"}], "default": {"model_id": "TinyLlama/TinyLlama-1.1B-Chat-v0.6"}} \ No newline at end of file diff --git a/schema/v1/models_collection.schema.json b/schema/v1/models_collection.schema.json index f2b0782..8e6a61e 100644 --- a/schema/v1/models_collection.schema.json +++ b/schema/v1/models_collection.schema.json @@ -21,6 +21,38 @@ "$ref": "https://opla.github.io/models/schema/v1/model.schema.json" }, "uniqueItems": true + }, + "featured": { + "type": "array", + "items": { + "type": "object", + "properties": { + "model_id": { + "type": "string" + }, + "headline": { + "type": "string" + } + }, + "required": [ + "model_id" + ] + }, + "uniqueItems": true + }, + "default": { + "type": "object", + "properties": { + "model_id": { + "type": "string" + }, + "headline": { + "type": "string" + } + }, + "required": [ + "model_id" + ] } }, "required": [ diff --git a/scripts/build.py b/scripts/build.py index 7ffeac1..46650f5 100644 --- a/scripts/build.py +++ b/scripts/build.py @@ -23,7 +23,13 @@ "_version": "1", "created_at": datetime.now(tz=UTC).strftime("%Y-%m-%dT%H:%M:%S%.%fZ"), "updated_at": datetime.now(tz=UTC).strftime("%Y-%m-%dT%H:%M:%S.%fZ"), - "models": [] + "models": [], + "featured": [{ + "model_id": "TinyLlama/TinyLlama-1.1B-Chat-v0.6" + }], + "default": { + "model_id": "TinyLlama/TinyLlama-1.1B-Chat-v0.6" + } } def add_model(model):