Skip to content

Commit

Permalink
fix: repo
Browse files Browse the repository at this point in the history
  • Loading branch information
bojiang committed Jul 12, 2024
1 parent 1c86d3e commit a5a64cf
Show file tree
Hide file tree
Showing 5 changed files with 68 additions and 54 deletions.
4 changes: 1 addition & 3 deletions pyrightconfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,12 @@
},
"venvPath": ".",
"venv": ".venv",
"pythonVersion": "3.12",
"pythonVersion": "3.9",
"enableExperimentalFeatures": true,
"reportMissingImports": "warning",
"reportMissingTypeStubs": false,
"reportPrivateUsage": "warning",
"reportUnknownArgumentType": "warning",
"reportUnknownMemberType": "warning",
"reportUnknownVariableType": "warning",
"reportUnsupportedDunderAll": "warning",
"reportWildcardImportFromLibrary": "warning"
}
10 changes: 5 additions & 5 deletions src/openllm/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from openllm.clean import app as clean_app
from openllm.cloud import deploy as cloud_deploy
from openllm.cloud import ensure_cloud_context, get_cloud_machine_spec
from openllm.common import CHECKED, INTERACTIVE, VERBOSE_LEVEL, output
from openllm.common import CHECKED, INTERACTIVE, VERBOSE_LEVEL, BentoInfo, output
from openllm.local import run as local_run
from openllm.local import serve as local_serve
from openllm.model import app as model_app
Expand All @@ -32,15 +32,15 @@
app.add_typer(clean_app, name='clean')


def _select_bento_name(models, target):
def _select_bento_name(models: list[BentoInfo], target: DeploymentTarget):
from tabulate import tabulate

options = []
model_infos = [[model.repo.name, model.name, can_run(model, target)] for model in models]
model_name_groups = defaultdict(lambda: 0)
model_infos = [(model.repo.name, model.name, can_run(model, target)) for model in models]
model_name_groups = defaultdict(lambda: 0.0)
for repo, name, score in model_infos:
model_name_groups[(repo, name)] += score
table_data = [[name, repo, CHECKED if score > 0 else ''] for (repo, name), score in model_name_groups.items()]
table_data = [(name, repo, CHECKED if score > 0 else '') for (repo, name), score in model_name_groups.items()]
if not table_data:
output('No model found', style='red')
raise typer.Exit(1)
Expand Down
2 changes: 1 addition & 1 deletion src/openllm/cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def ensure_cloud_context():
raise typer.Exit(1)


def get_cloud_machine_spec():
def get_cloud_machine_spec() -> list[DeploymentTarget]:
ensure_cloud_context()
cmd = ['bentoml', 'deployment', 'list-instance-types', '-o', 'json']
try:
Expand Down
11 changes: 5 additions & 6 deletions src/openllm/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@

CONFIG_FILE = OPENLLM_HOME / 'config.json'

CHECKED = ''
CHECKED = 'Yes'

T = typing.TypeVar('T')

Expand Down Expand Up @@ -84,7 +84,7 @@ def output(content, level=0, style=None, end=None):


class Config(SimpleNamespace):
repos: dict[str, str] = {'default': 'git+https://github.com/bentoml/openllm-models@main'}
repos: dict[str, str] = {'default': 'https://github.com/bentoml/openllm-models@main'}
default_repo: str = 'default'

def tolist(self):
Expand Down Expand Up @@ -117,18 +117,17 @@ class RepoInfo(SimpleNamespace):

def tolist(self):
if VERBOSE_LEVEL.get() <= 0:
return f'{self.name} ({self.url})'
return f'{self.name} ({self.url}@{self.branch})'
if VERBOSE_LEVEL.get() <= 10:
return dict(name=self.name, url=self.url, path=str(self.path))
return dict(name=self.name, url=f"{self.url}@{self.branch}", path=str(self.path))
if VERBOSE_LEVEL.get() <= 20:
return dict(
name=self.name,
url=self.url,
url=f"{self.url}@{self.branch}",
path=str(self.path),
server=self.server,
owner=self.owner,
repo=self.repo,
branch=self.branch,
)


Expand Down
95 changes: 56 additions & 39 deletions src/openllm/repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,39 +56,26 @@ def update():
repos_in_use = set()
for repo_name, repo in config.repos.items():
repo = parse_repo_url(repo, repo_name)
repos_in_use.add((repo.server, repo.owner, repo.repo))
if repo.path.exists(): # TODO: use update instead of remove and clone
repos_in_use.add((repo.server, repo.owner, repo.repo, repo.branch))
if repo.path.exists():
shutil.rmtree(repo.path, ignore_errors=True)
if not repo.path.exists():
repo.path.parent.mkdir(parents=True, exist_ok=True)
try:
dulwich.porcelain.clone(
f'https://{repo.server}/{repo.owner}/{repo.repo}.git',
str(repo.path),
checkout=True,
depth=1,
branch=repo.branch,
)
output('')
output(f'Repo `{repo.name}` updated', style='green')
except:
shutil.rmtree(repo.path, ignore_errors=True)
output(f'Failed to clone repo {repo.name}', style='red')
else:
try:
import dulwich.porcelain

dulwich.porcelain.pull(
str(repo.path), f'https://{repo.server}/{repo.owner}/{repo.repo}.git', refspecs=repo.branch, force=True
)
dulwich.porcelain.clean(str(repo.path), str(repo.path))
output('')
output(f'Repo `{repo.name}` updated', style='green')
except:
shutil.rmtree(repo.path, ignore_errors=True)
output(f'Failed to update repo {repo.name}', style='red')
for c in REPO_DIR.glob('*/*/*'):
repo_spec = tuple(c.parts[-3:])
repo.path.parent.mkdir(parents=True, exist_ok=True)
try:
dulwich.porcelain.clone(
repo.url,
str(repo.path),
checkout=True,
depth=1,
branch=repo.branch,
)
output('')
output(f'Repo `{repo.name}` updated', style='green')
except Exception as e:
shutil.rmtree(repo.path, ignore_errors=True)
output(f'Failed to clone repo {repo.name}', style='red')
output(e)
for c in REPO_DIR.glob('*/*/*/*'):
repo_spec = tuple(c.parts[-4:])
if repo_spec not in repos_in_use:
shutil.rmtree(c, ignore_errors=True)
output(f'Removed unused repo cache {c}')
Expand Down Expand Up @@ -127,26 +114,50 @@ def ensure_repo_updated():
)


GIT_REPO_RE = re.compile(r'git\+https://(?P<server>.+)/(?P<owner>.+)/(?P<repo>.+?)(@(?P<branch>.+))?$')
GIT_HTTP_RE = re.compile(r'(?P<schema>git|ssh|http|https):\/\/(?P<server>[\.\w\d\-]+)\/(?P<owner>[\w\d\-]+)\/(?P<repo>[\w\d\-\_\.]+)(@(?P<branch>.+))?(\/)?$')
GIT_SSH_RE = re.compile(r'git@(?P<server>[\.\w\d-]+):(?P<owner>[\w\d\-]+)\/(?P<repo>[\w\d\-\_\.]+)(@(?P<branch>.+))?(\/)?$')


def parse_repo_url(repo_url: str, repo_name: typing.Optional[str] = None) -> RepoInfo:
"""
parse the git repo url to server, owner, repo name, branch
>>> parse_repo_url('git+https://github.com/bentoml/bentovllm@main')
>>> parse_repo_url('https://github.com/bentoml/bentovllm@main')
('github.com', 'bentoml', 'bentovllm', 'main')
>>> parse_repo_url('git+https://github.com/bentoml/bentovllm')
>>> parse_repo_url('https://github.com/bentoml/bentovllm.git@main')
('github.com', 'bentoml', 'bentovllm', 'main')
>>> parse_repo_url('https://github.com/bentoml/bentovllm')
('github.com', 'bentoml', 'bentovllm', 'main')
>>> parse_repo_url('[email protected]:bentoml/openllm-models.git')
('github.com', 'bentoml', 'openllm-models', 'main')
"""
match = GIT_REPO_RE.match(repo_url)
if not match:
raise ValueError(f'Invalid git repo url: {repo_url}')
match = GIT_HTTP_RE.match(repo_url)
if match:
schema = match.group('schema')
else:
match = GIT_SSH_RE.match(repo_url)
if not match:
raise ValueError(f'Invalid git repo url: {repo_url}')
schema = None

if match.group('branch') is not None:
repo_url = repo_url[:match.start('branch') - 1]

server = match.group('server')
owner = match.group('owner')
repo = match.group('repo')
if repo.endswith('.git'):
repo = repo[:-4]
branch = match.group('branch') or 'main'
path = REPO_DIR / server / owner / repo

if schema is not None:
repo_url = f'{schema}://{server}/{owner}/{repo}'
else:
repo_url = f'git@{server}:{owner}/{repo}'

path = REPO_DIR / server / owner / repo / branch
return RepoInfo(
name=repo if repo_name is None else repo_name,
url=repo_url,
Expand All @@ -165,6 +176,12 @@ def add(name: str, repo: str):
output(f'Invalid repo name: {name}, should only contain letters, numbers and underscores', style='red')
return

try:
parse_repo_url(repo)
except ValueError as e:
output(f'Invalid repo url: {repo}', style='red')
return

config = load_config()
if name in config.repos:
override = questionary.confirm(f'Repo {name} already exists({config.repos[name]}), override?').ask()
Expand Down

0 comments on commit a5a64cf

Please sign in to comment.