Merge branch 'huggingface:main' into ipex_infer

jiqing-feng · Aug 13, 2024 · 207c3b8 · 207c3b8
2 parents e1ee02e + 0c577d1
commit 207c3b8
Show file tree

Hide file tree

Showing 35 changed files with 843 additions and 683 deletions.
diff --git a/.github/workflows/dockerfile_sanity.yml b/.github/workflows/dockerfile_sanity.yml
@@ -0,0 +1,44 @@
+name: Build and Test Docker Image
+
+on:
+  push:
+    branches:
+      - main
+    paths:
+      - 'docker/Dockerfile.intel'
+
+  pull_request:
+    branches:
+      - main
+    paths:
+      - 'docker/Dockerfile.intel'
+
+jobs:
+  build_and_run:
+    runs-on: ubuntu-latest
+
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v4
+
+    - name: Set up Docker Buildx
+      uses: docker/setup-buildx-action@v3
+
+    - name: Build and Run Docker Image
+      run: |
+        IMAGE_NAME="intel_image:latest"
+        docker build -f docker/Dockerfile.intel -t $IMAGE_NAME .
+        if [ $? -ne 0 ]; then
+          echo "Docker image build failed."
+          exit 1
+        fi
+        CONTAINER_ID=$(docker run -d $IMAGE_NAME tail -f /dev/null)
+        if docker inspect -f '{{.State.Running}}' $CONTAINER_ID 2>/dev/null | grep -q 'true'; then
+          echo "Container is running."
+        else
+          echo "Container failed to start."
+          docker logs $CONTAINER_ID 2>/dev/null || echo "No container ID found."
+          exit 1
+        fi
+        docker stop $CONTAINER_ID
+        docker rm $CONTAINER_ID
diff --git a/.github/workflows/test_ipex.yml b/.github/workflows/test_ipex.yml
@@ -22,27 +22,27 @@ jobs:
       fail-fast: false
       matrix:
         python-version: [3.9]
-        transformers-version: [4.39.0, 4.42.3]
-        ipex-version: [2.2.0, 2.3.*]
+        transformers-version: ["4.39.0", "4.43.*"]
+        ipex-version: ["2.2.0", "2.3.*"]
         include:
           - python-version: 3.8
             transformers-version: 4.39.0
             ipex-version: 2.2.0
 
     steps:
-    - uses: actions/checkout@v2
-    - name: Setup Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v2
-      with:
-        python-version: ${{ matrix.python-version }}
-    - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        pip install torch==${{ matrix.ipex-version }} --extra-index-url https://download.pytorch.org/whl/cpu
-        pip install intel_extension_for_pytorch==${{ matrix.ipex-version }}
-        pip install Pillow parameterized
-        pip install transformers[testing]==${{ matrix.transformers-version }}
-        pip install .[ipex]
-    - name: Test with Pytest
-      run: |
-        pytest tests/ipex/
+      - uses: actions/checkout@v2
+      - name: Setup Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install torch==${{ matrix.ipex-version }} --extra-index-url https://download.pytorch.org/whl/cpu
+          pip install intel_extension_for_pytorch==${{ matrix.ipex-version }}
+          pip install Pillow parameterized
+          pip install transformers[testing]==${{ matrix.transformers-version }}
+          pip install .[ipex]
+      - name: Test with Pytest
+        run: |
+          pytest tests/ipex/
diff --git a/.github/workflows/test_openvino.yml b/.github/workflows/test_openvino.yml
@@ -21,36 +21,37 @@ jobs:
       fail-fast: false
       matrix:
         python-version: ["3.8", "3.12"]
-        transformers-version: ["4.36.0", "4.42.*"]
+        transformers-version: ["4.36.0", "4.43.*"]
         os: [ubuntu-latest]
 
     runs-on: ${{ matrix.os }}
     steps:
-    - uses: actions/checkout@v4
-    - name: Setup Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v5
-      with:
-        python-version: ${{ matrix.python-version }}
-    - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        # install PyTorch CPU version to avoid installing CUDA packages on GitHub runner without GPU
-        pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
-        pip install transformers==${{ matrix.transformers-version }}
-        pip install .[openvino,openvino-tokenizers,tests,diffusers] onnxruntime
-    - name: Test with Pytest
-      env:
-        HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
-      run: |
-        pytest tests/openvino/ --ignore tests/openvino/test_modeling_basic.py --durations=0
-    - name: Test basic
-      run: |
-        pip uninstall -y nncf
-        pytest tests/openvino/test_modeling_basic.py
-    - name: Test openvino-nightly
-      run: |
-        pip uninstall -y openvino
-        pip install openvino-nightly
-        python -c "from optimum.intel import OVModelForCausalLM; OVModelForCausalLM.from_pretrained('hf-internal-testing/tiny-random-gpt2', export=True, compile=False)"
-        optimum-cli export openvino -m hf-internal-testing/tiny-random-gpt2 gpt2-ov
+      - uses: actions/checkout@v4
+      - name: Setup Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
 
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          # install PyTorch CPU version to avoid installing CUDA packages on GitHub runner without GPU
+          pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
+          pip install .[openvino,openvino-tokenizers,tests,diffusers] onnxruntime
+          pip install transformers==${{ matrix.transformers-version }}
+
+      - name: Test with Pytest
+        env:
+          HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
+        run: |
+          pytest tests/openvino/ --ignore tests/openvino/test_modeling_basic.py --durations=0
+      - name: Test basic
+        run: |
+          pip uninstall -y nncf
+          pytest tests/openvino/test_modeling_basic.py
+      - name: Test openvino-nightly
+        run: |
+          pip uninstall -y openvino
+          pip install openvino-nightly
+          python -c "from optimum.intel import OVModelForCausalLM; OVModelForCausalLM.from_pretrained('hf-internal-testing/tiny-random-gpt2', export=True, compile=False)"
+          optimum-cli export openvino -m hf-internal-testing/tiny-random-gpt2 gpt2-ov
diff --git a/.github/workflows/test_openvino_basic.yml b/.github/workflows/test_openvino_basic.yml
@@ -3,7 +3,7 @@ name: OpenVINO - Basic Test
 on:
   workflow_dispatch:
   schedule:
-    - cron:  '41 1 * * *'  # run every day at 1:41
+    - cron: "41 1 * * *" # run every day at 1:41
   push:
     branches:
       - v*-release
@@ -24,40 +24,41 @@ jobs:
         # This also ensures that the test fails if dependencies break for Python 3.7
         python-version: ["3.8", "3.12"]
         os: ["ubuntu-22.04", "windows-latest"]
-        transformers-version: ["4.42.*"]
+        transformers-version: ["4.43.*"]
         include:
-          - transformers-version: "4.36.0"
-            python-version: "3.12"
+          - python-version: "3.12"
             os: "ubuntu-22.04"
+            transformers-version: "4.36.0"
 
     runs-on: ${{ matrix.os }}
 
     steps:
-    - uses: actions/checkout@v4
-    - name: Setup Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v5
-      with:
-        python-version: ${{ matrix.python-version }}
-
-    - name: Install dependencies
-      run: |
-        # Install openvino manually to prevent dependency conflicts when .[openvino] pins
-        # optimum or transformers to a specific version
-        # Install PyTorch CPU to prevent unnecessary downloading/installing of CUDA packages
-        pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
-        pip install transformers==${{ matrix.transformers-version }}
-        pip install .[tests] openvino
-
-    - name: Pip freeze
-      run: pip freeze
-
-    - name: Test with Pytest
-      run: |
-        pytest tests/openvino/test_modeling_basic.py
-
-    - name: Slow tests
-      run: |
-        pip install nncf
-        pytest tests/openvino -s -m "run_slow" --durations=0
-      env:
-        RUN_SLOW: 1
+      - uses: actions/checkout@v4
+      - name: Setup Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          # Install PyTorch CPU to prevent unnecessary downloading/installing of CUDA packages
+          pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
+          # Install openvino manually to prevent dependency conflicts when .[openvino] pins
+          # optimum or transformers to a specific version
+          pip install .[tests] openvino
+          pip install transformers==${{ matrix.transformers-version }}
+
+      - name: Pip freeze
+        run: pip freeze
+
+      - name: Test with Pytest
+        run: |
+          pytest tests/openvino/test_modeling_basic.py
+
+      - name: Slow tests
+        run: |
+          pip install nncf
+          pytest tests/openvino -s -m "run_slow" --durations=0
+        env:
+          RUN_SLOW: 1
diff --git a/.github/workflows/test_openvino_notebooks.yml b/.github/workflows/test_openvino_notebooks.yml
@@ -40,8 +40,8 @@ jobs:
         # ffmpeg, torchaudio and pillow are required for image classification and audio classification pipelines
         sudo apt-get install ffmpeg
         pip install torch torchaudio --extra-index-url https://download.pytorch.org/whl/cpu
-        pip install ".[tests, openvino]" nbval
         pip install -r notebooks/openvino/requirements.txt
+        pip install .[tests,openvino] nbval
 
     - run: free -h
     - run: lscpu

diff --git a/README.md b/README.md
@@ -72,7 +72,7 @@ Below are examples of how to use OpenVINO and its [NNCF](https://docs.openvino.a
 
 #### Export:
 
-It is possible to export your model to the [OpenVINO IR](https://docs.openvino.ai/2024/documentation/openvino-ir-format.html) format with the CLI :
+It is also possible to export your model to the [OpenVINO IR](https://docs.openvino.ai/2024/documentation/openvino-ir-format.html) format with the CLI :
 
 ```plain
 optimum-cli export openvino --model gpt2 ov_model

diff --git a/docker/Dockerfile.intel b/docker/Dockerfile.intel
@@ -37,6 +37,7 @@ ARG TORCHVISION_VERSION=0.18.1+cpu
 ARG TORCHAUDIO_VERSION=2.3.1+cpu
 
 RUN python3 -m pip install --no-cache-dir \
+    intel-openmp \
     torch==${PYTORCH_VERSION}+cpu \
     torchvision==${TORCHVISION_VERSION} \
     torchaudio==${TORCHAUDIO_VERSION} \

diff --git a/docs/source/openvino/export.mdx b/docs/source/openvino/export.mdx
@@ -9,6 +9,8 @@ specific language governing permissions and limitations under the License.
 
 # Export your model
 
+To export a [model](https://huggingface.co/docs/optimum/main/en/intel/openvino/models) hosted on the [Hub](https://huggingface.co/models) you can use our [space](https://huggingface.co/spaces/echarlaix/openvino-export). After conversion, a repository will be pushed under your namespace, this repository can be either public or private.
+
 ## Using the CLI
 
 To export your model to the [OpenVINO IR](https://docs.openvino.ai/2024/documentation/openvino-ir-format.html) format with the CLI :

diff --git a/docs/source/openvino/optimization.mdx b/docs/source/openvino/optimization.mdx
@@ -64,7 +64,7 @@ model = OVModelForCausalLM.from_pretrained(model_id, quantization_config=quantiz
 You can tune quantization parameters to achieve a better performance accuracy trade-off as follows:
 
 ```python
-quantization_config = OVWeightQuantizationConfig(bits=4, sym=False, ratio=0.8, dataset="ptb")
+quantization_config = OVWeightQuantizationConfig(bits=4, sym=False, ratio=0.8, dataset="wikitext2")
 ```
 
 By default the quantization scheme will be [asymmetric](https://github.com/openvinotoolkit/nncf/blob/develop/docs/usage/training_time_compression/other_algorithms/LegacyQuantization.md#asymmetric-quantization), to make it [symmetric](https://github.com/openvinotoolkit/nncf/blob/develop/docs/usage/training_time_compression/other_algorithms/LegacyQuantization.md#symmetric-quantization) you can add `sym=True`.

diff --git a/examples/openvino/audio-classification/requirements.txt b/examples/openvino/audio-classification/requirements.txt
@@ -1,5 +1,5 @@
-datasets>=1.14.0
+datasets>=1.14.0,<2.20.0
 evaluate
 librosa
 torchaudio
-accelerate
+accelerate
diff --git a/examples/openvino/image-classification/requirements.txt b/examples/openvino/image-classification/requirements.txt
@@ -1,4 +1,4 @@
-datasets >= 1.8.0
+datasets>=1.14.0,<2.20.0
 torch >= 1.9.0
 torchvision>=0.6.0
 evaluate

diff --git a/examples/openvino/question-answering/requirements.txt b/examples/openvino/question-answering/requirements.txt
@@ -1,4 +1,4 @@
-datasets >= 1.8.0
+datasets>=1.14.0,<2.20.0
 torch >= 1.9.0
 evaluate
 accelerate
diff --git a/examples/openvino/text-classification/requirements.txt b/examples/openvino/text-classification/requirements.txt
@@ -1,4 +1,4 @@
-datasets >= 1.8.0
+datasets>=1.14.0,<2.20.0
 sentencepiece != 0.1.92
 scipy
 scikit-learn

diff --git a/optimum/commands/export/openvino.py b/optimum/commands/export/openvino.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 """Defines the command line for the export with OpenVINO."""
 
-import json
 import logging
 import sys
 from pathlib import Path
@@ -213,19 +212,7 @@ def parse_args(parser: "ArgumentParser"):
 
     def run(self):
         from ...exporters.openvino.__main__ import infer_task, main_export, maybe_convert_tokenizers
-        from ...intel.openvino.configuration import _DEFAULT_4BIT_CONFIG, _DEFAULT_4BIT_CONFIGS, OVConfig
-
-        def _get_default_int4_config(model_id_or_path, library_name):
-            if model_id_or_path in _DEFAULT_4BIT_CONFIGS:
-                return _DEFAULT_4BIT_CONFIGS[model_id_or_path]
-            if "transformers" in library_name and (Path(model_id_or_path) / "config.json").exists():
-                with (Path(model_id_or_path) / "config.json").open("r") as config_f:
-                    config = json.load(config_f)
-                    original_model_name = config.get("_name_or_path", "")
-                if original_model_name in _DEFAULT_4BIT_CONFIGS:
-                    return _DEFAULT_4BIT_CONFIGS[original_model_name]
-
-            return _DEFAULT_4BIT_CONFIG
+        from ...intel.openvino.configuration import _DEFAULT_4BIT_CONFIG, OVConfig, get_default_int4_config
 
         if self.args.library is None:
             # TODO: add revision, subfolder and token to args
@@ -260,7 +247,7 @@ def _get_default_int4_config(model_id_or_path, library_name):
                 and self.args.awq is None
                 and self.args.sensitivity_metric is None
             ):
-                quantization_config = _get_default_int4_config(self.args.model, library_name)
+                quantization_config = get_default_int4_config(self.args.model)
             else:
                 quantization_config = {
                     "bits": 8 if is_int8 else 4,
@@ -275,6 +262,9 @@ def _get_default_int4_config(model_id_or_path, library_name):
                     "scale_estimation": self.args.scale_estimation,
                 }
 
+            if quantization_config.get("dataset", None) is not None:
+                quantization_config["trust_remote_code"] = self.args.trust_remote_code
+
             if self.args.weight_format in {"int4_sym_g128", "int4_asym_g128", "int4_sym_g64", "int4_asym_g64"}:
                 logger.warning(
                     f"--weight-format {self.args.weight_format} is deprecated, possible choices are fp32, fp16, int8, int4"

diff --git a/optimum/exporters/ipex/model_patcher.py b/optimum/exporters/ipex/model_patcher.py
@@ -34,7 +34,7 @@
 
 # Please also update in the setup.py and .github/workflows/test_ipex.yml if you change the transformers version
 _TRANSFORMERS_MIN_VERSION = "4.39.0"
-_TRANSFORMERS_MAX_VERSION = "4.42.3"
+_TRANSFORMERS_MAX_VERSION = "4.43.99"
 
 _IPEX_EXPORTED_GENERATION_TASKS = ("text-generation",)