Skip to content

Commit

Permalink
Merge pull request #92 from aws/add-inf2-optimum
Browse files Browse the repository at this point in the history
Add `optimum-neuron` pipeline support for zero code deployment.
  • Loading branch information
philschmid authored Jul 14, 2023
2 parents 2237804 + d00cf8e commit ba3b7b6
Show file tree
Hide file tree
Showing 4 changed files with 222 additions and 0 deletions.
16 changes: 16 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,22 @@ The `HF_API_TOKEN` environment variable defines the your Hugging Face authorizat
HF_API_TOKEN="api_XXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
```

#### `HF_OPTIMUM_BATCH_SIZE`

The `HF_OPTIMUM_BATCH_SIZE` environment variable defines the batch size, which is used when compiling the model to Neuron. The default value is `1`. Not required when model is already converted.

```bash
HF_OPTIMUM_BATCH_SIZE="1"
```

#### `HF_OPTIMUM_SEQUENCE_LENGTH`

The `HF_OPTIMUM_SEQUENCE_LENGTH` environment variable defines the sequence length, which is used when compiling the model to Neuron. There is no default value. Not required when model is already converted.

```bash
HF_OPTIMUM_SEQUENCE_LENGTH="128"
```

---

## 🧑🏻‍💻 User defined code/modules
Expand Down
99 changes: 99 additions & 0 deletions src/sagemaker_huggingface_inference_toolkit/optimum_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
# Copyright 2023 The HuggingFace Team, Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import importlib.util
import logging
import os


_optimum_neuron = False
if importlib.util.find_spec("optimum") is not None:
if importlib.util.find_spec("optimum.neuron") is not None:
_optimum_neuron = True

logger = logging.getLogger(__name__)


def is_optimum_neuron_available():
return _optimum_neuron


def get_input_shapes(model_dir):
"""Method to get input shapes from model config file. If config file is not present, default values are returned."""
from transformers import AutoConfig

input_shapes = {}
input_shapes_available = False
# try to get input shapes from config file
try:
config = AutoConfig.from_pretrained(model_dir)
if hasattr(config, "neuron_batch_size") and hasattr(config, "neuron_sequence_length"):
input_shapes["batch_size"] = config.neuron_batch_size
input_shapes["sequence_length"] = config.neuron_sequence_length
input_shapes_available = True
logger.info(
f"Input shapes found in config file. Using input shapes from config with batch size {input_shapes['batch_size']} and sequence length {input_shapes['sequence_length']}"
)
if os.environ.get("HF_OPTIMUM_BATCH_SIZE", None) is not None:
logger.warning(
"HF_OPTIMUM_BATCH_SIZE environment variable is set. Environment variable will be ignored and input shapes from config file will be used."
)
if os.environ.get("HF_OPTIMUM_SEQUENCE_LENGTH", None) is not None:
logger.warning(
"HF_OPTIMUM_SEQUENCE_LENGTH environment variable is set. Environment variable will be ignored and input shapes from config file will be used."
)
except Exception:
input_shapes_available = False

# return input shapes if available
if input_shapes_available:
return input_shapes

# extract input shapes from environment variables
sequence_length = os.environ.get("HF_OPTIMUM_SEQUENCE_LENGTH", None)
if not int(sequence_length) > 0:
raise ValueError(
f"HF_OPTIMUM_SEQUENCE_LENGTH must be set to a positive integer. Current value is {sequence_length}"
)
batch_size = os.environ.get("HF_OPTIMUM_BATCH_SIZE", 1)
logger.info(
f"Using input shapes from environment variables with batch size {batch_size} and sequence length {sequence_length}"
)
return {"batch_size": int(batch_size), "sequence_length": int(sequence_length)}


def get_optimum_neuron_pipeline(task, model_dir):
"""Method to get optimum neuron pipeline for a given task. Method checks if task is supported by optimum neuron and if required environment variables are set, in case model is not converted. If all checks pass, optimum neuron pipeline is returned. If checks fail, an error is raised."""
from optimum.neuron.pipelines import NEURONX_SUPPORTED_TASKS, pipeline
from optimum.neuron.utils import NEURON_FILE_NAME

# check task support
if task not in NEURONX_SUPPORTED_TASKS:
raise ValueError(
f"Task {task} is not supported by optimum neuron and inf2. Supported tasks are: {list(NEURONX_SUPPORTED_TASKS.keys())}"
)

# check if model is already converted and has input shapes available
export = True
if NEURON_FILE_NAME in os.listdir(model_dir):
export = False
if export:
logger.info("Model is not converted. Checking if required environment variables are set and converting model.")

# get static input shapes to run inference
input_shapes = get_input_shapes(model_dir)
# get optimum neuron pipeline
neuron_pipe = pipeline(task, model=model_dir, export=export, input_shapes=input_shapes)

return neuron_pipe
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
from transformers.file_utils import is_tf_available, is_torch_available
from transformers.pipelines import Conversation, Pipeline

from sagemaker_huggingface_inference_toolkit.optimum_utils import is_optimum_neuron_available


if is_tf_available():
import tensorflow as tf
Expand Down Expand Up @@ -73,6 +75,9 @@ def is_aws_neuron_available():
"pooling_config.json",
]

if is_optimum_neuron_available():
FILE_LIST_NAMES.append("model.neuron")

REPO_ID_SEPARATOR = "__"

ARCHITECTURES_2_TASK = {
Expand Down
102 changes: 102 additions & 0 deletions tests/unit/test_optimum_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
# Copyright 2021 The HuggingFace Team, Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import tempfile

import pytest
from transformers.testing_utils import require_torch

from sagemaker_huggingface_inference_toolkit.optimum_utils import (
get_input_shapes,
get_optimum_neuron_pipeline,
is_optimum_neuron_available,
)
from sagemaker_huggingface_inference_toolkit.transformers_utils import _load_model_from_hub


require_inferentia = pytest.mark.skipif(
not is_optimum_neuron_available(),
reason="Skipping tests, since optimum neuron is not available or not running on inf2 instances.",
)


REMOTE_NOT_CONVERTED_MODEL = "hf-internal-testing/tiny-random-BertModel"
REMOTE_CONVERTED_MODEL = "optimum/tiny_random_bert_neuron"
TASK = "text-classification"


@require_torch
@require_inferentia
def test_not_supported_task():
os.environ["HF_TASK"] = "not-supported-task"
with pytest.raises(Exception):
get_optimum_neuron_pipeline(task=TASK, model_dir=os.getcwd())


@require_torch
@require_inferentia
def test_get_input_shapes_from_file():
with tempfile.TemporaryDirectory() as tmpdirname:
storage_folder = _load_model_from_hub(
model_id=REMOTE_CONVERTED_MODEL,
model_dir=tmpdirname,
)
input_shapes = get_input_shapes(model_dir=storage_folder)
assert input_shapes["batch_size"] == 1
assert input_shapes["sequence_length"] == 16


@require_torch
@require_inferentia
def test_get_input_shapes_from_env():
os.environ["HF_OPTIMUM_BATCH_SIZE"] = "4"
os.environ["HF_OPTIMUM_SEQUENCE_LENGTH"] = "32"
with tempfile.TemporaryDirectory() as tmpdirname:
storage_folder = _load_model_from_hub(
model_id=REMOTE_NOT_CONVERTED_MODEL,
model_dir=tmpdirname,
)
input_shapes = get_input_shapes(model_dir=storage_folder)
assert input_shapes["batch_size"] == 4
assert input_shapes["sequence_length"] == 32


@require_torch
@require_inferentia
def test_get_optimum_neuron_pipeline_from_converted_model():
with tempfile.TemporaryDirectory() as tmpdirname:
os.system(
f"optimum-cli export neuron --model philschmid/tiny-distilbert-classification --sequence_length 32 --batch_size 1 {tmpdirname}"
)
pipe = get_optimum_neuron_pipeline(task=TASK, model_dir=tmpdirname)
r = pipe("This is a test")

assert r[0]["score"] > 0.0
assert isinstance(r[0]["label"], str)


@require_torch
@require_inferentia
def test_get_optimum_neuron_pipeline_from_non_converted_model():
os.environ["OPTIMUM_NEURON_SEQUENCE_LENGTH"] = "32"
with tempfile.TemporaryDirectory() as tmpdirname:
storage_folder = _load_model_from_hub(
model_id=REMOTE_NOT_CONVERTED_MODEL,
model_dir=tmpdirname,
)
pipe = get_optimum_neuron_pipeline(task=TASK, model_dir=storage_folder)
r = pipe("This is a test")

assert r[0]["score"] > 0.0
assert isinstance(r[0]["label"], str)

0 comments on commit ba3b7b6

Please sign in to comment.