From da00c67bbdab0dbe5f6316d4b39a38732b0398cd Mon Sep 17 00:00:00 2001 From: Alexander Suvorov Date: Wed, 10 Jul 2024 10:21:20 +0200 Subject: [PATCH] Add CB CI tests (#572) --- .github/workflows/causal_lm_cpp.yml | 117 ++++++++++++++++++ .github/workflows/genai_python_lib.yml | 87 +++++++++++++ .gitignore | 1 + .../CMakeLists.txt | 1 - .../continuous_batching_benchmark.cpp | 13 +- tests/cpp/generate_config.cpp | 12 +- tests/python_tests/test_preemption.py | 13 +- tests/python_tests/test_sampling.py | 32 ++--- 8 files changed, 246 insertions(+), 30 deletions(-) diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml index ed5cbeaeef..c10708e869 100644 --- a/.github/workflows/causal_lm_cpp.yml +++ b/.github/workflows/causal_lm_cpp.yml @@ -14,6 +14,7 @@ concurrency: env: l_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.3.0-15945-a349dc82f9a/l_openvino_toolkit_ubuntu20_2024.3.0.dev20240708_x86_64.tgz + m_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.3.0-15945-a349dc82f9a/m_openvino_toolkit_macos_12_6_2024.3.0.dev20240708_x86_64.tgz w_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.3.0-15945-a349dc82f9a/w_openvino_toolkit_windows_2024.3.0.dev20240708_x86_64.zip jobs: cpp-multinomial-greedy_causal_lm-ubuntu: @@ -584,3 +585,119 @@ jobs: timeout 30s ./samples/python/chat_sample/chat_sample.py ./TinyLlama-1.1B-Chat-v1.0/ < input.txt > ./pred2.txt diff pred2.txt ref.txt echo "Chat sample python" passed + + cpp-continuous-batching-ubuntu: + runs-on: ubuntu-20.04-8-cores + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + - uses: actions/setup-python@v4 + with: + python-version: 3.8 + - name: Install OpenVINO + run: | + mkdir ./ov/ + curl ${{ env.l_ov_link }} | tar --directory ./ov/ --strip-components 1 -xz + sudo ./ov/install_dependencies/install_openvino_dependencies.sh + - name: Download, convert and build + run: | + source ./ov/setupvars.sh + python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly + python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly + optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0 + cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ + cmake --build ./build/ --config Release -j + - name: Run gtests + run: | + source ./ov/setupvars.sh + ./build/tests/cpp/tests_continuous_batching + - name: Run accuracy_sample + run: | + source ./ov/setupvars.sh + timeout 50s ./build/samples/cpp/continuous_batching_accuracy/continuous_batching_accuracy -m ./TinyLlama-1.1B-Chat-v1.0/ -n 5 + - name: Run throughput_benchmark + run: | + wget -q https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json + source ./ov/setupvars.sh + timeout 200s ./build/samples/cpp/continuous_batching_benchmark/continuous_batching_benchmark -n 10 --dynamic_split_fuse -m ./TinyLlama-1.1B-Chat-v1.0/ --dataset ./ShareGPT_V3_unfiltered_cleaned_split.json --cache_size 1 + + + cpp-continuous-batching-windows: + runs-on: windows-latest + defaults: + run: + shell: cmd + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + - uses: actions/setup-python@v4 + with: + python-version: 3.8 + - name: Install OpenVINO + run: | + curl --output ov.zip ${{ env.w_ov_link }} + unzip -d ov ov.zip + dirs=(ov/*) && mv ov/*/* ov && rmdir "${dirs[@]}" + shell: bash + - name: Install dependencies and build + run: | + call .\ov\setupvars.bat + python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly + python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly + optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0 + cmake -DCMAKE_BUILD_TYPE=Releas -S ./ -B ./build/ + cmake --build ./build/ --config Release -j + - name: Run gtests + run: | + set PATH=.\build\openvino_genai\;%PATH% + call .\ov\setupvars.bat + .\build\tests\cpp\Release\tests_continuous_batching.exe + - name: Run accuracy_sample + run: | + set PATH=.\build\openvino_genai\;%PATH% + call .\ov\setupvars.bat + .\build\samples\cpp\continuous_batching_accuracy\Release\continuous_batching_accuracy.exe -m .\TinyLlama-1.1B-Chat-v1.0\ -n 5 + - name: Run throughput_benchmark + run: | + curl -o .\ShareGPT_V3_unfiltered_cleaned_split.json -s -L "https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json" + set PATH=.\build\openvino_genai\;%PATH% + call .\ov\setupvars.bat + .\build\samples\cpp\continuous_batching_benchmark\Release\continuous_batching_benchmark.exe -n 2 --dynamic_split_fuse -m .\TinyLlama-1.1B-Chat-v1.0\ --dataset .\ShareGPT_V3_unfiltered_cleaned_split.json --cache_size 1 + + cpp-continuous-batching-macos: + runs-on: macos-12 + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + - uses: actions/setup-python@v4 + with: + python-version: 3.8 + - name: Install OpenVINO + run: | + mkdir ./ov/ + curl ${{ env.m_ov_link }} | tar --directory ./ov/ --strip-components 1 -xz + brew install coreutils scons + - name: Download, convert and build + run: | + source ./ov/setupvars.sh + python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly + python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly + optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0 + cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ + cmake --build ./build/ --config Release -j + - name: Run gtests + run: | + source ./ov/setupvars.sh + ./build/tests/cpp/tests_continuous_batching + - name: Run accuracy_sample + run: | + source ./ov/setupvars.sh + timeout 120s ./build/samples/cpp/continuous_batching_accuracy/continuous_batching_accuracy -m ./TinyLlama-1.1B-Chat-v1.0/ -n 5 + - name: Run throughput_benchmark + run: | + wget -q https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json + source ./ov/setupvars.sh + ./build/samples/cpp/continuous_batching_benchmark/continuous_batching_benchmark -n 5 --dynamic_split_fuse -m ./TinyLlama-1.1B-Chat-v1.0/ --dataset ./ShareGPT_V3_unfiltered_cleaned_split.json --cache_size 1 diff --git a/.github/workflows/genai_python_lib.yml b/.github/workflows/genai_python_lib.yml index 423ad0dc6e..640a293fa4 100644 --- a/.github/workflows/genai_python_lib.yml +++ b/.github/workflows/genai_python_lib.yml @@ -84,3 +84,90 @@ jobs: - run: set "PYTHONPATH=./build/" && call ./ov/setupvars.bat && python -m pytest ./tests/python_tests/test_generate_api.py -m precommit - run: call ./ov/setupvars.bat && python -m pip install . --verbose - run: python -m pytest ./tests/python_tests/test_generate_api.py -m precommit + + continuous_batching_python_lib_ubuntu: + # A tokenizers' dependency fails to compile on ubuntu-20 n CenOS7 env. + runs-on: ubuntu-22.04 + env: + # A tokenizers' dependency fails to compile with Ninja in CenOS7 env. + CMAKE_GENERATOR: Unix Makefiles + CMAKE_BUILD_PARALLEL_LEVEL: null + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + - uses: actions/setup-python@v4 + with: + python-version: 3.8 + # Install CentOS7 instead of Ubuntu to match PyPI distribution ABI. + - name: Install OpenVINO + run: | + mkdir ./ov/ + curl ${{ env.l_ov_centos_link }} | tar --directory ./ov/ --strip-components 1 -xz + sudo ./ov/install_dependencies/install_openvino_dependencies.sh + - name: Install dependencies and build + run: | + source ./ov/setupvars.sh + python -m pip install ./thirdparty/openvino_tokenizers/[transformers] -r ./tests/python_tests/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly --upgrade-strategy eager + cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ + cmake --build ./build/ --config Release -j + - run: source ./ov/setupvars.sh && PYTHONPATH=./build/:$PYTHONPATH python -m pytest ./tests/python_tests/test_sampling.py -m precommit + - run: source ./ov/setupvars.sh && PYTHONPATH=./build/:$PYTHONPATH python -m pytest ./tests/python_tests/test_preemption.py -m precommit + - run: source ./ov/setupvars.sh && python -m pip install . + - run: python -m pytest ./tests/python_tests/test_preemption.py -m precommit + + continuous_batching_python_lib_windows: + runs-on: windows-latest + defaults: + run: + shell: cmd + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + - uses: actions/setup-python@v4 + with: + python-version: 3.8 + + - name: Install OpenVINO + run: | + curl --output ov.zip ${{ env.w_ov_link }} + unzip -d ov ov.zip + dirs=(ov/*) && mv ov/*/* ov && rmdir "${dirs[@]}" + shell: bash + - name: Install dependencies and build + run: | + call .\ov\setupvars.bat + python -m pip install ./thirdparty/openvino_tokenizers/[transformers] -r ./tests/python_tests/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly --upgrade-strategy eager + cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ + cmake --build ./build/ --config Release -j + - run: set "PYTHONPATH=./build/" && call ./ov/setupvars.bat && python -m pytest ./tests/python_tests/test_sampling.py -m precommit + - run: set "PYTHONPATH=./build/" && call ./ov/setupvars.bat && python -m pytest ./tests/python_tests/test_preemption.py -m precommit + - run: call ./ov/setupvars.bat && python -m pip install . --verbose + - run: python -m pytest ./tests/python_tests/test_preemption.py -m precommit + + + continuous_batching_python_lib_macos: + runs-on: macos-12 + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + - uses: actions/setup-python@v4 + with: + python-version: 3.8 + - name: Install OpenVINO + run: | + mkdir ./ov/ + curl ${{ env.m_ov_link }} | tar --directory ./ov/ --strip-components 1 -xz + brew install coreutils scons + - name: Download, convert and build + run: | + source ./ov/setupvars.sh + python -m pip install ./thirdparty/openvino_tokenizers/[transformers] -r ./tests/python_tests/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly --upgrade-strategy eager + cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ + cmake --build ./build/ --config Release -j + - run: source ./ov/setupvars.sh && PYTHONPATH=./build/:$PYTHONPATH python -m pytest ./tests/python_tests/test_sampling.py -m precommit + - run: source ./ov/setupvars.sh && PYTHONPATH=./build/:$PYTHONPATH python -m pytest ./tests/python_tests/test_preemption.py -m precommit + - run: source ./ov/setupvars.sh && python -m pip install . + - run: python -m pytest ./tests/python_tests/test_preemption.py -m precommit diff --git a/.gitignore b/.gitignore index 10035877da..83f354d57a 100644 --- a/.gitignore +++ b/.gitignore @@ -34,3 +34,4 @@ CMakeUserPresets.json *.?env* *.pyc __pycache__ +.py-build-cmake_cache diff --git a/samples/cpp/continuous_batching_benchmark/CMakeLists.txt b/samples/cpp/continuous_batching_benchmark/CMakeLists.txt index 52f1066a11..fea5f3e7e1 100644 --- a/samples/cpp/continuous_batching_benchmark/CMakeLists.txt +++ b/samples/cpp/continuous_batching_benchmark/CMakeLists.txt @@ -24,4 +24,3 @@ find_package(Threads REQUIRED) set(TARGET_NAME continuous_batching_benchmark) add_executable(${TARGET_NAME} ${TARGET_NAME}.cpp) target_link_libraries(${TARGET_NAME} PRIVATE openvino::genai nlohmann_json::nlohmann_json cxxopts::cxxopts Threads::Threads) -target_compile_features(${TARGET_NAME} PRIVATE cxx_std_20) diff --git a/samples/cpp/continuous_batching_benchmark/continuous_batching_benchmark.cpp b/samples/cpp/continuous_batching_benchmark/continuous_batching_benchmark.cpp index 11a4953bc2..123f218eb4 100644 --- a/samples/cpp/continuous_batching_benchmark/continuous_batching_benchmark.cpp +++ b/samples/cpp/continuous_batching_benchmark/continuous_batching_benchmark.cpp @@ -466,13 +466,12 @@ int main(int argc, char* argv[]) try { Dataset dataset = filtered_dataset(models_path, dataset_path, num_prompts, max_input_len, max_output_len); // Perform the first inference - ov::genai::SchedulerConfig scheduler_config { - .max_num_batched_tokens = max_batch_size, - .cache_size = cache_size, - .block_size = 32, - .dynamic_split_fuse = dynamic_split_fuse, - .max_num_seqs = 256, // not used if dynamic_split_fuse=True - }; + ov::genai::SchedulerConfig scheduler_config; + scheduler_config.max_num_batched_tokens = max_batch_size, + scheduler_config.cache_size = cache_size, + scheduler_config.block_size = 32, + scheduler_config.dynamic_split_fuse = dynamic_split_fuse, + scheduler_config.max_num_seqs = 256, // not used if dynamic_split_fuse=True std::cout << "Benchmarking parameters: " << std::endl; std::cout << "\tMax number of batched tokens: " << scheduler_config.max_num_batched_tokens << std::endl; diff --git a/tests/cpp/generate_config.cpp b/tests/cpp/generate_config.cpp index 3bd53a4ca6..05180fb1a4 100644 --- a/tests/cpp/generate_config.cpp +++ b/tests/cpp/generate_config.cpp @@ -7,6 +7,7 @@ TEST(GenerationConfigTest, invalid_temperature) { ov::genai::GenerationConfig config; + config.max_new_tokens = 20; config.temperature = -0.1; config.do_sample = true; EXPECT_THROW(config.validate(), ov::Exception); @@ -14,6 +15,7 @@ TEST(GenerationConfigTest, invalid_temperature) { TEST(GenerationConfigTest, valid_temperature) { ov::genai::GenerationConfig config; + config.max_new_tokens = 20; config.do_sample = true; config.temperature = 0.1; EXPECT_NO_THROW(config.validate()); @@ -21,6 +23,7 @@ TEST(GenerationConfigTest, valid_temperature) { TEST(GenerationConfigTest, invalid_top_p) { ov::genai::GenerationConfig config; + config.max_new_tokens = 20; config.do_sample = true; config.top_p = -0.5; EXPECT_THROW(config.validate(), ov::Exception); @@ -30,6 +33,7 @@ TEST(GenerationConfigTest, invalid_top_p) { TEST(GenerationConfigTest, valid_top_p) { ov::genai::GenerationConfig config; + config.max_new_tokens = 20; config.do_sample = true; config.top_p = 0.1; EXPECT_NO_THROW(config.validate()); @@ -37,6 +41,7 @@ TEST(GenerationConfigTest, valid_top_p) { TEST(GenerationConfigTest, invalid_repeatition_penalty) { ov::genai::GenerationConfig config; + config.max_new_tokens = 20; config.do_sample = true; config.repetition_penalty = -3.0; EXPECT_THROW(config.validate(), ov::Exception); @@ -46,15 +51,17 @@ TEST(GenerationConfigTest, invalid_repeatition_penalty) { TEST(GenerationConfigTest, valid_repeatition_penalty) { ov::genai::GenerationConfig config; + config.max_new_tokens = 20; config.do_sample = true; config.repetition_penalty = 1.8; EXPECT_NO_THROW(config.validate()); - config.repetition_penalty = 0.0; + config.repetition_penalty = 0.1; EXPECT_NO_THROW(config.validate()); } TEST(GenerationConfigTest, invalid_presence_penalty) { ov::genai::GenerationConfig config; + config.max_new_tokens = 20; config.do_sample = true; config.presence_penalty = 3.0; EXPECT_THROW(config.validate(), ov::Exception); @@ -64,6 +71,7 @@ TEST(GenerationConfigTest, invalid_presence_penalty) { TEST(GenerationConfigTest, valid_presence_penalty) { ov::genai::GenerationConfig config; + config.max_new_tokens = 20; config.do_sample = true; config.presence_penalty = 1.8; EXPECT_NO_THROW(config.validate()); @@ -73,6 +81,7 @@ TEST(GenerationConfigTest, valid_presence_penalty) { TEST(GenerationConfigTest, invalid_frequency_penalty) { ov::genai::GenerationConfig config; + config.max_new_tokens = 20; config.do_sample = true; config.frequency_penalty = 3.0; EXPECT_THROW(config.validate(), ov::Exception); @@ -82,6 +91,7 @@ TEST(GenerationConfigTest, invalid_frequency_penalty) { TEST(GenerationConfigTest, valid_frequency_penalty) { ov::genai::GenerationConfig config; + config.max_new_tokens = 20; config.do_sample = true; config.frequency_penalty = 1.8; EXPECT_NO_THROW(config.validate()); diff --git a/tests/python_tests/test_preemption.py b/tests/python_tests/test_preemption.py index 6f9e6ad254..3b856e7111 100644 --- a/tests/python_tests/test_preemption.py +++ b/tests/python_tests/test_preemption.py @@ -1,11 +1,10 @@ # Copyright (C) 2018-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import sys import pytest -from dataclasses import dataclass -from typing import List -from openvino_genai.py_continuous_batching import GenerationConfig +from openvino_genai import GenerationConfig from common import get_model_and_tokenizer, save_ov_model_from_optimum, generate_and_compare_with_reference_text, \ DEFAULT_SCHEDULER_CONFIG, get_scheduler_config, run_test_pipeline, get_models_list, get_beam_search, get_greedy, \ get_multinomial_all_parameters, get_multinomial_temperature_and_num_return_sequence, \ @@ -20,11 +19,11 @@ def get_greedy_seq_len_300() -> GenerationConfig: def get_beam_search_seq_len_300() -> GenerationConfig: generation_config = GenerationConfig() - generation_config.num_groups = 3 - generation_config.group_size = 2 + generation_config.num_beam_groups = 3 + generation_config.num_beams = 6 generation_config.max_new_tokens = 300 generation_config.num_return_sequences = 3 - generation_config.num_return_sequences = generation_config.num_groups * generation_config.group_size + generation_config.num_return_sequences = generation_config.num_beams return generation_config scheduler_params_list = [({"num_kv_blocks": 2, "block_size": 32, "dynamic_split_fuse": True, "max_num_batched_tokens": 256, "max_num_seqs": 256}, get_greedy()), @@ -56,6 +55,7 @@ def test_preemption(tmp_path, params): # todo: Anastasiia Pnevskaya: fix the test because it is hanging according max_new_tokens = std::numeric_limits::max() @pytest.mark.parametrize("dynamic_split_fuse", [True, False]) @pytest.mark.precommit +@pytest.mark.xfail(raises=AssertionError, reason="assert ref_text == ov_text fails in CI.", condition=sys.platform in ["win32", "darwin"], strict=True) def test_preemption_with_multinomial(tmp_path, dynamic_split_fuse): generation_configs = multinomial_params.generation_config for config in generation_configs: @@ -99,6 +99,7 @@ def test_preemption_with_multinomial(tmp_path, dynamic_split_fuse): @pytest.mark.parametrize("dynamic_split_fuse", [True, False]) @pytest.mark.precommit +@pytest.mark.xfail(reason="assert ref_text == ov_text fails", condition=sys.platform in ["win32", "darwin"]) def test_preemption_with_multinomial_n_seq(tmp_path, dynamic_split_fuse): generation_configs = multinomial_params_n_seq.generation_config for config in generation_configs: diff --git a/tests/python_tests/test_sampling.py b/tests/python_tests/test_sampling.py index fb059ec3e4..f4f35deace 100644 --- a/tests/python_tests/test_sampling.py +++ b/tests/python_tests/test_sampling.py @@ -1,6 +1,7 @@ # Copyright (C) 2018-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 import os +import sys import pytest import shutil import sys @@ -20,10 +21,13 @@ get_multinomial_temperature_and_frequence_penalty, get_multinomial_temperature_and_presence_penalty, \ generate_and_compare_with_hf, get_multinomial_temperature_and_repetition_penalty, get_scheduler_config - @pytest.mark.precommit @pytest.mark.parametrize("model_id", get_models_list(os.path.join(os.path.dirname(os.path.realpath(__file__)), "models", "precommit"))) -@pytest.mark.xfail(reason='CPU: head size must be multiple of 16, current: 8. Ticket 145986.', raises=RuntimeError, strict=True) +@pytest.mark.xfail( + raises=RuntimeError, + reason="Test fails with error: CPU: head size must be multiple of 16, current: X. CVS-145986.", + strict=True, +) def test_sampling_precommit(tmp_path, model_id): run_test_pipeline(tmp_path, model_id) @@ -99,19 +103,21 @@ class RandomSamplingTestStruct: RandomSamplingTestStruct(generation_config=get_multinomial_temperature(), prompts=["What is OpenVINO?"], ref_texts=[ ["\n\nOpenVINO is a software development platform developed by OpenVINO, a set of technology companies and startups that enables developers to use the most"] ]), - RandomSamplingTestStruct(generation_config=get_multinomial_temperature_and_top_p(), + pytest.param(RandomSamplingTestStruct(generation_config=get_multinomial_temperature_and_top_p(), prompts=["What is OpenVINO?"], ref_texts=[ ["\nOpenVINO is an online application that allows users to create, test, and analyze their own software using a collection of software packages. The application"] ]), + marks=[pytest.mark.xfail(reason="assert ref_text == ov_text fails in CI.", strict=True, condition=sys.platform in ["darwin", "win32"])]), RandomSamplingTestStruct(generation_config=get_multinomial_temperature_and_top_k(), prompts=["What is OpenVINO?"], ref_texts=[ ["\n\nOpenVINO is a software that allows users to create a virtual machine with the ability to create a virtual machine in a virtual environment. Open"] ]), - RandomSamplingTestStruct(generation_config=get_multinomial_temperature_top_p_and_top_k(), + pytest.param(RandomSamplingTestStruct(generation_config=get_multinomial_temperature_top_p_and_top_k(), prompts=["What is OpenVINO?"], ref_texts=[ ["\nOpenVINO is an open source software that allows developers to create, manage, and distribute software. It is an open source project that allows developers"] ]), + marks=[pytest.mark.xfail(reason="assert ref_text == ov_text fails in CI.", strict=True, condition=sys.platform in ["darwin", "win32"])]), RandomSamplingTestStruct(generation_config=get_multinomial_temperature_and_repetition_penalty(), prompts=["What is OpenVINO?"], ref_texts=[ ["\nOpen Vino's are a new and improved way to find cheap, fast-investment frozen vegetables that have no waste or calories. They're"] ]), - RandomSamplingTestStruct(generation_config=get_multinomial_temperature_and_num_return_sequence(), + pytest.param(RandomSamplingTestStruct(generation_config=get_multinomial_temperature_and_num_return_sequence(), prompts=["What is location of"], ref_texts=[ [ @@ -119,8 +125,9 @@ class RandomSamplingTestStruct: ' map and where does the game player base base? I tend to like to do all draws on a specific spot (sometimes wide area,', ' them?\nJust the Mario Maker App, the location is they' ] - ]), - RandomSamplingTestStruct(generation_config=get_multinomial_all_parameters(), + ]), + marks=[pytest.mark.xfail(reason="assert ref_text == ov_text fails in CI.", strict=True)]), + pytest.param(RandomSamplingTestStruct(generation_config=get_multinomial_all_parameters(), prompts=["Tell me something about UAE"], ref_texts=[ [ @@ -130,6 +137,7 @@ class RandomSamplingTestStruct: '? I think that is a bit of an anomaly, but you might want to ask yourself this question: Where can some young people from Dubai or Bahrain' ] ]), + marks=[pytest.mark.xfail(reason="assert ref_text == ov_text fails in CI.", strict=True, condition=sys.platform in ["darwin", "win32"])]), RandomSamplingTestStruct(generation_config=get_multinomial_temperature_and_presence_penalty(), prompts=["What is OpenVINO?"], ref_texts=[ ["\n\nOpenVINO is a software development platform developed by OpenVINO, Inc., which uses a RESTful API for server-side web applications"] ]), @@ -139,7 +147,7 @@ class RandomSamplingTestStruct: RandomSamplingTestStruct(generation_config=get_greedy_with_penalties(), prompts=["What is OpenVINO?"], ref_texts=[ ["\nOpenVINO is a software that allows users to create and manage their own virtual machines. It's designed for use with Windows, Mac OS X"] ]), - RandomSamplingTestStruct(generation_config=get_multinomial_max_and_min_token(), + pytest.param(RandomSamplingTestStruct(generation_config=get_multinomial_max_and_min_token(), prompts=["What is OpenVINO?"], ref_texts=[ [ @@ -148,6 +156,7 @@ class RandomSamplingTestStruct: '\n\nOpenVINO is a social networking tool. OpenVINO is a free virtualization service that works at scale. The tool provides the ability' ] ]), + marks=[pytest.mark.xfail(reason="assert ref_text == ov_text fails in CI.", strict=True, condition=sys.platform in ["darwin", "win32"])]), ] @@ -165,13 +174,6 @@ class RandomSamplingTestStruct: "greedy_with_penalties", "multinomial_max_and_min_token"]) def test_individual_generation_configs_random(tmp_path, test_struct: RandomSamplingTestStruct): - if test_struct in ( - RANDOM_SAMPLING_TEST_CASES[1], - RANDOM_SAMPLING_TEST_CASES[3], - RANDOM_SAMPLING_TEST_CASES[6], - RANDOM_SAMPLING_TEST_CASES[10], - ) and sys.platform.startswith("win"): - pytest.xfail("assert ref_text == ov_text fails") generation_config = test_struct.generation_config prompts = test_struct.prompts