diff --git a/.github/workflows/check_code_quality.yml b/.github/workflows/check_code_quality.yml index fec93652..1ca2450b 100644 --- a/.github/workflows/check_code_quality.yml +++ b/.github/workflows/check_code_quality.yml @@ -45,10 +45,6 @@ jobs: source venv/bin/activate pip install --upgrade pip pip install .[quality] - - name: Check style with black - run: | - source venv/bin/activate - black --check . - name: Check style with ruff run: | source venv/bin/activate diff --git a/Makefile b/Makefile index 5a0e0843..82a4acea 100644 --- a/Makefile +++ b/Makefile @@ -51,12 +51,10 @@ tpu-tgi: # Run code quality checks style_check: - black --check . ruff . style: - black . - ruff . --fix + ruff check . --fix # Utilities to release to PyPi build_dist_install_tools: @@ -70,7 +68,7 @@ pypi_upload: ${PACKAGE_DIST} ${PACKAGE_WHEEL} # Tests test_installs: - python -m pip install .[tpu,tests] + python -m pip install .[tests] tests: test_installs python -m pytest -sv tests diff --git a/optimum/tpu/__init__.py b/optimum/tpu/__init__.py index adb37a59..df32aeb3 100644 --- a/optimum/tpu/__init__.py +++ b/optimum/tpu/__init__.py @@ -12,4 +12,5 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .version import __version__, VERSION # noqa: F401 +from .version import __version__, VERSION # noqa: F401 +from .modeling import AutoModelForCausalLM # noqa: F401 diff --git a/optimum/tpu/modeling.py b/optimum/tpu/modeling.py index 5d6e7ac6..86b1945f 100644 --- a/optimum/tpu/modeling.py +++ b/optimum/tpu/modeling.py @@ -19,13 +19,13 @@ from typing import Any from loguru import logger -from transformers import AutoModelForCausalLM +from transformers import AutoModelForCausalLM as BaseAutoModelForCausalLM from transformers.utils import is_accelerate_available # TODO: For now TpuModelForCausalLM is just a shallow wrapper of # AutoModelForCausalLM, later this could be replaced by a custom class. -class TpuModelForCausalLM(AutoModelForCausalLM): +class AutoModelForCausalLM(BaseAutoModelForCausalLM): @classmethod def from_pretrained( @@ -46,11 +46,11 @@ def from_pretrained( else: device = "xla" if is_accelerate_available(): - model = AutoModelForCausalLM.from_pretrained( + model = BaseAutoModelForCausalLM.from_pretrained( pretrained_model_name_or_path, device_map=device, *model_args, **kwargs ) else: - model = AutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path, *model_args, **kwargs) + model = BaseAutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path, *model_args, **kwargs) model.to(device) # Update config with specific data) if task is not None or getattr(model.config, "task", None) is None: diff --git a/pyproject.toml b/pyproject.toml index f5957a18..90675c21 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,6 +48,10 @@ dependencies = [ "loguru == 0.6.0" ] +[build-system] +requires = ["setuptools>=64", "setuptools_scm>=8"] +build-backend = "setuptools.build_meta" + [project.optional-dependencies] tests = ["pytest", "safetensors"] quality = ["black", "ruff", "isort",] @@ -58,8 +62,8 @@ Documentation = "https://hf.co/docs/optimum/tpu" Repository = "https://github.com/huggingface/optimum-tpu" Issues = "https://github.com/huggingface/optimum-tpu/issues" -[tool.setuptools.dynamic] -version = {attr = "optimum.tpu.__version__"} +[tool.setuptools_scm] + [tool.setuptools.packages.find] include = ["optimum.tpu"] diff --git a/text-generation-inference/server/text_generation_server/generator.py b/text-generation-inference/server/text_generation_server/generator.py index 17fa439f..5df4bd4e 100644 --- a/text-generation-inference/server/text_generation_server/generator.py +++ b/text-generation-inference/server/text_generation_server/generator.py @@ -11,7 +11,7 @@ from loguru import logger from transformers import AutoTokenizer, PreTrainedTokenizerBase, StaticCache from transformers.generation import GenerationConfig -from optimum.tpu.modeling import TpuModelForCausalLM +from optimum.tpu import AutoModelForCausalLM from optimum.tpu.generation import TokenSelector from .pb.generate_pb2 import ( @@ -301,7 +301,7 @@ class TpuGenerator(Generator): def __init__( self, - model: TpuModelForCausalLM, + model, tokenizer: PreTrainedTokenizerBase, ): self.model = model @@ -633,7 +633,7 @@ def from_pretrained( """ logger.info("Loading model (this can take a few minutes).") start = time.time() - model = TpuModelForCausalLM.from_pretrained(model_path) + model = AutoModelForCausalLM.from_pretrained(model_path) end = time.time() logger.info(f"Model successfully loaded in {end - start:.2f} s.") tokenizer = AutoTokenizer.from_pretrained(model_path)