Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Dynamically scrape Ollama model names #14

Merged
merged 7 commits into from
Oct 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .github/workflows/release-python-package.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,13 @@ jobs:
- name: Write release notes
run: |
python -m pip install -e .
python -m pip install pre-commit
llamabot configure api-key --api-key="${{ secrets.OPENAI_API_KEY }}"
llamabot configure default-model --model-name="${{ secrets.OPENAI_DEFAULT_MODEL }}"
llamabot git write-release-notes

- name: Commit release notes
run: |
python -m pip install pre-commit
pre-commit run --all-files || true
git add .
git commit -m "Add release notes for ${{ env.version_number }}"
Expand Down
8 changes: 8 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,11 @@ repos:
- id: ruff
args: [--fix, --exit-non-zero-on-fix]
types_or: [python]
- repo: local
hooks:
- id: autoupdate-ollama-models
name: Autoupdate Ollama Models
entry: python scripts/autoupdate_ollama_models.py
language: python
additional_dependencies: ["beautifulsoup4", "lxml", "requests"]
pass_filenames: false
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# Some references to help:
# https://wiki.python.org/moin/Distutils/Tutorial
# https://stackoverflow.com/a/24727824/1274908
include llamabot/bot/ollama_model_names.txt
2 changes: 1 addition & 1 deletion docs/releases/v0.0.86.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,4 @@ This version includes several enhancements and updates to the codebase, includin

### Chore

- Updated the versions of pre-commit hooks for pre-commit-hooks, black, and ruff-pre-commit. It also replaces the darglint hook with pydoclint for better documentation linting. (9cc49022) (Eric Ma)
- Updated the versions of pre-commit hooks for pre-commit-hooks, black, and ruff-pre-commit. It also replaces the darglint hook with pydoclint for better documentation linting. (9cc49022) (Eric Ma)
50 changes: 15 additions & 35 deletions llamabot/bot/model_dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,41 +13,21 @@
from time import sleep
from loguru import logger
from functools import partial
from pathlib import Path
from functools import lru_cache

# get this list from: https://ollama.ai/library
ollama_model_keywords = [
"mistral",
"llama2",
"codellama",
"vicuna",
"orca-mini",
"llama2-uncensored",
"wizard-vicuna-uncensored",
"nous-hermes",
"phind-codellama",
"mistral-openorca",
"wizardcoder",
"wizard-math",
"llama2-chinese",
"stable-beluga",
"codeup",
"everythinglm",
"medllama2",
"wizardlm-uncensored",
"zephyr",
"falcon",
"wizard-vicuna",
"open-orca-platypus2",
"starcoder",
"samantha-mistral",
"openhermes2-mistral",
"wizardlm",
"sqlcoder",
"dolphin2.1-mistral",
"nexusraven",
"dolphin2.2-mistral",
"codebooga",
]

@lru_cache(maxsize=128)
def ollama_model_keywords() -> list:
"""Return ollama model keywords.

This is stored within a the `ollama_model_names.txt` file
that is distributed with this package.

:returns: The list of model names.
"""
with open(Path(__file__).parent / "ollama_model_names.txt") as f:
return [line.strip() for line in f.readlines()]


def create_model(
Expand Down Expand Up @@ -82,7 +62,7 @@ def create_model(
# We use a `partial` here to ensure that we have the correct way of specifying
# a model name between ChatOpenAI and ChatOllama.
ModelClass = partial(ChatOpenAI, model_name=model_name)
if model_name.split(":")[0] in ollama_model_keywords:
if model_name.split(":")[0] in ollama_model_keywords():
ModelClass = partial(ChatOllama, model=model_name)

return ModelClass(
Expand Down
31 changes: 31 additions & 0 deletions llamabot/bot/ollama_model_names.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
mistral
llama2
codellama
vicuna
orca-mini
llama2-uncensored
wizard-vicuna-uncensored
nous-hermes
phind-codellama
mistral-openorca
wizardcoder
wizard-math
llama2-chinese
stable-beluga
codeup
everythinglm
medllama2
wizardlm-uncensored
zephyr
falcon
wizard-vicuna
open-orca-platypus2
starcoder
samantha-mistral
openhermes2-mistral
wizardlm
sqlcoder
dolphin2.1-mistral
nexusraven
dolphin2.2-mistral
codebooga
1 change: 1 addition & 0 deletions mkdocs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ theme:
features:
- instant
- tabs
- content.code.copy
language: en

# We customize the navigation by hand to control the order
Expand Down
78 changes: 78 additions & 0 deletions scratch_notebooks/scrape_ollama_models.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%load_ext autoreload\n",
"%autoreload 2\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from bs4 import BeautifulSoup\n",
"import requests\n",
"\n",
"# Your HTML snippet\n",
"response = requests.get(\"https://ollama.ai/library\")\n",
"if response.status_code == 200:\n",
"\n",
" html_content = response.text\n",
"\n",
" # Parse the HTML snippet with BeautifulSoup\n",
" soup = BeautifulSoup(html_content, \"lxml\")\n",
"\n",
" # Find all h2 tags that contain the model names\n",
" model_names = [h2.text for h2 in soup.find_all(\"h2\")]\n",
"\n",
" # Print out the model names\n",
" for name in model_names:\n",
" model_name = (name.strip('\\n').strip(' ').strip('\\n'))\n",
" print(f'\"{model_name}\",')\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"name\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "llamabot",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
21 changes: 21 additions & 0 deletions scripts/autoupdate_ollama_models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
"""Automatically update the list of Ollama models in llamabot/bot/ollama_model_names.txt"""
from bs4 import BeautifulSoup
import requests

# Your HTML snippet
response = requests.get("https://ollama.ai/library")
if response.status_code == 200:
html_content = response.text

# Parse the HTML snippet with BeautifulSoup
soup = BeautifulSoup(html_content, "lxml")

# Find all h2 tags that contain the model names
model_names = [
h2.text.strip("\n").strip(" ").strip("\n") for h2 in soup.find_all("h2")
]

# Write model names to llamabot/bot/ollama_model_names.txt
with open("llamabot/bot/ollama_model_names.txt", "w") as f:
f.write("\n".join(model_names))
f.write("\n")