Skip to content

Update gitignore implementation to support more cases #2687

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,6 @@ build/
.fleet
.env
.aider*
uv.lock
.local/
.claude/settings.local.json
162 changes: 162 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
[project]
name = "dstack"
dynamic = ["version", "readme"]
authors = [{ name = "Andrey Cheptsov", email = "[email protected]" }]
description = "dstack is an open-source orchestration engine for running AI workloads on any cloud or on-premises."
requires-python = ">=3.9"
classifiers = [
"Development Status :: 4 - Beta",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
"License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)",
"Programming Language :: Python :: 3",
]
dependencies = [
"pyyaml",
"requests",
"typing-extensions>=4.0.0",
"cryptography",
"packaging",
"python-dateutil",
"cachetools",
"gitpython",
"jsonschema",
"paramiko>=3.2.0",
"cursor",
"rich",
"rich-argparse",
"tqdm",
"simple-term-menu",
"pydantic>=1.10.10,<2.0.0",
"pydantic-duality>=1.2.4",
"websocket-client",
"python-multipart>=0.0.16",
"filelock",
"psutil",
"gpuhunt==0.1.6",
"argcomplete>=3.5.0",
"gitignore-parser>=0.1.12",
]

[project.urls]
Homepage = "https://dstack.ai"
Source = "https://github.com/dstackai/dstack"
Documentation = "https://dstack.ai/docs"
Issues = "https://github.com/dstackai/dstack/issues"
Changelog = "https://github.com/dstackai/dstack/releases"
Discord = "https://discord.gg/u8SmfwPpMd"

[build-system]
requires = ["hatchling", "hatch-fancy-pypi-readme"]
build-backend = "hatchling.build"

[project.scripts]
dstack = "dstack._internal.cli.main:main"

[tool.hatch.version]
path = "src/dstack/version.py"

[tool.hatch.build.targets.sdist]
artifacts = ["src/dstack/_internal/server/statics/**"]

[tool.hatch.build.targets.wheel]
artifacts = ["src/dstack/_internal/server/statics/**"]

[tool.hatch.metadata.hooks.fancy-pypi-readme]
content-type = "text/markdown"

[[tool.hatch.metadata.hooks.fancy-pypi-readme.fragments]]
path = "README.md"

[[tool.hatch.metadata.hooks.fancy-pypi-readme.substitutions]]
pattern = '<picture>\s*|<source[^>]*>\s*|\s*</picture>|<video[^>]*>\s*|</video>\s*|### Demo\s*'
replacement = ''
ignore-case = true

[dependency-groups]
dev = [
"build>=1.2.2.post1",
"httpx>=0.28.1",
"pre-commit>=4.2.0",
"pytest-asyncio>=0.23.8",
"pytest-httpbin>=2.1.0",
"httpbin>=0.10.2", # indirect to make compatible with Werkzeug 3
"pytest~=7.2",
"pytest-socket>=0.7.0",
"requests-mock>=1.12.1",
"openai>=1.68.2",
"freezegun>=1.5.1",
"ruff==0.11.6", # should match .pre-commit-config.yaml
"testcontainers>=4.9.2",
"pytest-xdist>=3.6.1",
]

[project.optional-dependencies]
gateway = [
"fastapi",
"starlette>=0.26.0",
"uvicorn",
"aiorwlock",
"aiocache",
"httpx",
"jinja2",
]
server = [
"fastapi",
"starlette>=0.26.0",
"uvicorn",
"aiorwlock",
"aiocache",
"httpx",
"jinja2",
"watchfiles",
"sqlalchemy[asyncio]>=2.0.0",
"sqlalchemy_utils>=0.40.0",
"alembic>=1.10.2",
"apscheduler<4",
"aiosqlite",
"docker>=6.0.0",
"python-dxf==12.1.0",
"sentry-sdk[fastapi]",
"alembic-postgresql-enum",
"asyncpg",
"python-json-logger>=3.1.0",
"prometheus-client",
"grpcio>=1.50",
"backports.entry-points-selectable",
]
aws = ["boto3>=1.38.13", "botocore", "dstack[server]"]
azure = [
"azure-identity>=1.12.0",
"azure-mgmt-subscription>=3.1.1",
"azure-mgmt-compute>=29.1.0",
"azure-mgmt-network>=23.0.0,<28.0.0",
"azure-mgmt-resource>=22.0.0",
"azure-mgmt-authorization>=3.0.0",
"azure-mgmt-msi>=7.0.0",
"dstack[server]",
]
gcp = [
"google-auth>=2.3.0",
"google-cloud-storage>=2.0.0",
"google-cloud-compute>=1.5.0",
"google-cloud-logging>=2.0.0",
"google-api-python-client>=2.80.0",
"google-cloud-billing>=1.11.0",
"google-cloud-tpu>=1.18.3",
"dstack[server]",
]
datacrunch = ["datacrunch", "dstack[server]"]
kubernetes = ["kubernetes", "dstack[server]"]
lambda = ["boto3>=1.38.13", "botocore", "dstack[server]"]
oci = [
"oci>=2.150.0",
"cryptography>=44.0.3",
# pyopenssl is indirect to avoid uv falling back to the old version
# due to an upper limit from oci
"pyopenssl>=23.2.0",
"dstack[server]",
]
nebius = ["nebius>=0.2.19,<0.3; python_version >= '3.10'", "dstack[server]"]
all = [
"dstack[gateway,server,aws,azure,gcp,datacrunch,kubernetes,lambda,nebius,oci]",
]
6 changes: 6 additions & 0 deletions src/dstack/_internal/core/models/repos/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,14 @@
from typing_extensions import Literal

from dstack._internal.core.models.repos.base import BaseRepoInfo, Repo
from dstack._internal.utils.common import sizeof_fmt
from dstack._internal.utils.hash import get_sha256, slugify
from dstack._internal.utils.ignore import GitIgnore
from dstack._internal.utils.logging import get_logger
from dstack._internal.utils.path import PathLike

logger = get_logger(__name__)


class LocalRepoInfo(BaseRepoInfo):
repo_type: Literal["local"] = "local"
Expand Down Expand Up @@ -75,6 +79,8 @@ def write_code_file(self, fp: BinaryIO) -> str:
arcname="",
filter=TarIgnore(self.run_repo_data.repo_dir, globs=[".git"]),
)

logger.debug(f"Code file size: {sizeof_fmt(fp.tell())} bytes")
return get_sha256(fp)

def get_repo_info(self) -> LocalRepoInfo:
Expand Down
139 changes: 74 additions & 65 deletions src/dstack/_internal/utils/ignore.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import fnmatch
from itertools import zip_longest
import os
from pathlib import Path
from typing import Dict, List, Optional
from typing import List

from gitignore_parser import parse_gitignore_str

from dstack._internal.utils.path import PathLike

Expand All @@ -16,75 +17,83 @@ def __init__(
if ignore_files is not None
else [".gitignore", ".git/info/exclude", ".dstackignore"]
)
self.ignore_globs: Dict[str, List[str]] = {".": globs or []}
self.load_recursive()
self.parser = None
self._create_combined_parser(globs or [])

def load_ignore_file(self, path: str, ignore_file: Path):
if path not in self.ignore_globs:
self.ignore_globs[path] = []
with ignore_file.open("r") as f:
for line in f:
line = self.rstrip(line.rstrip("\n")).rstrip("/")
line = line.replace("\\ ", " ")
if line.startswith("#") or not line:
continue
self.ignore_globs[path].append(line)
def _create_combined_parser(self, additional_globs: List[str]):
"""Create a single parser from all ignore files and additional globs."""
all_patterns = []

def load_recursive(self, path: Optional[Path] = None):
path = path or self.root_dir
for ignore_file in self.ignore_files:
ignore_file = path / ignore_file
if ignore_file.exists():
self.load_ignore_file(str(path.relative_to(self.root_dir)), ignore_file)
# Collect patterns from all ignore files recursively
self._collect_patterns_recursive(self.root_dir, all_patterns)

for subdir in path.iterdir():
if not subdir.is_dir() or self.ignore(subdir.relative_to(self.root_dir)):
continue
self.load_recursive(subdir)
# Add additional glob patterns
all_patterns.extend(additional_globs)

@staticmethod
def rstrip(value: str) -> str:
end = len(value) - 1
while end >= 0:
if not value[end].isspace():
break
if end > 0 and value[end - 1] == "\\":
break # escaped space
end -= 1
else:
return ""
return value[: end + 1]
self.parser = parse_gitignore_str("\n".join(all_patterns), self.root_dir)

@staticmethod
def fnmatch(name: str, pattern: str, sep="/") -> bool:
if pattern.startswith(sep):
name = sep + name
for n, p in zip_longest(
reversed(name.split(sep)), reversed(pattern.split(sep)), fillvalue=None
):
if p == "**":
raise NotImplementedError()
if p is None:
return True
if n is None or not fnmatch.fnmatch(n, p):
return False
return True
def _collect_patterns_recursive(self, path: Path, patterns: List[str]):
"""
Recursively collect patterns from all ignore files and combine them into a single gitignore,
with the root directory as the base path.
"""
for ignore_file_name in self.ignore_files:
ignore_file = path / ignore_file_name
if ignore_file.exists():
try:
# Get relative path from root to this directory
if path == self.root_dir:
prefix = ""
else:
prefix = path.relative_to(self.root_dir)

def ignore(self, path: PathLike, sep="/") -> bool:
if not path:
# Read patterns and prefix them with directory path
with ignore_file.open("r", encoding="utf-8", errors="ignore") as f:
for line in f:
line = line.strip()
if line and not line.startswith("#"):
if prefix:
# Prefix patterns with directory path for subdirectories
if line.startswith("/"):
# Absolute pattern within subdirectory
patterns.append(os.path.join(prefix, line[1:]))
else:
# Relative pattern within subdirectory
# Add pattern that matches files directly in the subdirectory
patterns.append(os.path.join(prefix, line))
# Add pattern that matches files in deeper subdirectories
patterns.append(os.path.join(prefix, "**", line))
else:
# Root directory patterns
patterns.append(line)
except (OSError, UnicodeDecodeError):
# Skip files we can't read
continue

# Recursively process subdirectories
# Note: We need to check if directories should be ignored, but we can't
# use self.ignore() yet since we're still building the parser
# So we'll process all directories and let gitignore_parser handle the logic
try:
for subdir in path.iterdir():
if subdir.is_dir():
self._collect_patterns_recursive(subdir, patterns)
except (OSError, PermissionError):
# Skip directories we can't read
pass

def ignore(self, path: PathLike) -> bool:
"""Check if a path should be ignored."""
if not path or not self.parser:
return False

path = Path(path)
if path.is_absolute():
path = path.relative_to(self.root_dir)
try:
path = path.relative_to(self.root_dir)
except ValueError:
return False

tokens = ("." + sep + str(path)).split(sep)
for i in range(1, len(tokens)):
parent = sep.join(tokens[:-i])
globs = self.ignore_globs.get(parent)
if not globs:
continue
name = sep.join(tokens[-i:])
for glob in globs:
if self.fnmatch(name, glob, sep=sep):
return True
return False
# Convert to absolute path for gitignore_parser
abs_path = str(self.root_dir / path)
return self.parser(abs_path)
Loading