From 86acac5f4dd083bb1a0df5ea54420ff8cab52e67 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@intel.com>
Date: Fri, 3 Jan 2025 19:36:25 +0100
Subject: [PATCH] Fixed static LLM pipeline tests

---
 .../python_tests/test_llm_pipeline_static.py  | 40 +++++++++----------
 1 file changed, 18 insertions(+), 22 deletions(-)

diff --git a/tests/python_tests/test_llm_pipeline_static.py b/tests/python_tests/test_llm_pipeline_static.py
index c3500d15ac..6ef6162043 100644
--- a/tests/python_tests/test_llm_pipeline_static.py
+++ b/tests/python_tests/test_llm_pipeline_static.py
@@ -2,14 +2,18 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import openvino_genai as ov_genai
-from openvino.runtime import Core
 import pytest
+import platform
 import sys
 from ov_genai_test_utils import (
     get_models_list,
     get_chat_models_list,
+    read_model
 )
+from common import get_default_properties
 
+if sys.platform == 'darwin' or platform.machine() in ["aarch64", "arm64", "ARM64"]:
+    pytest.skip("NPU plugin is available only on Linux and Windows x86_64", allow_module_level=True)
 
 # This test suite is designed specifically to validate the functionality and robustness of the StaticLLMPipeline on NPUW:CPU.
 common_config = {
@@ -24,19 +28,18 @@
 def generate_chat_history(model_path, device, pipeline_config, questions):
     pipe = ov_genai.LLMPipeline(model_path, device, **pipeline_config)
     pipe.start_chat()
-    chat_history = [ pipe.generate(question, max_new_tokens=50) for question in questions ]
+    chat_history = [ pipe.generate(question, max_new_tokens=50, do_sample=False) for question in questions ]
     pipe.finish_chat()
     return chat_history
 
 
-@pytest.mark.skipif(sys.platform in ["darwin", "linux"], reason="Not supposed to work on mac. Segfault on linux CI")
 @pytest.mark.precommit
 @pytest.mark.nightly
 def test_generation_compare_with_stateful():
     prompt = 'The Sun is yellow because'
-    model_path = get_models_list()[0][1]
+    model_path = read_model(get_models_list()[0])[1]
 
-    stateful_pipe = ov_genai.LLMPipeline(model_path, "CPU")
+    stateful_pipe = ov_genai.LLMPipeline(model_path, "CPU", **get_default_properties())
     ref_out = stateful_pipe.generate(prompt, max_new_tokens=100)
 
     static_pipe = ov_genai.LLMPipeline(model_path, "NPU", **common_config)
@@ -48,11 +51,10 @@ def test_generation_compare_with_stateful():
     assert ref_out == actual_out
 
 
-@pytest.mark.skipif(sys.platform in ["darwin", "linux"], reason="Not supposed to work on mac. Segfault on linux CI")
 @pytest.mark.precommit
 @pytest.mark.nightly
 def test_length_properties_set_no_exception():
-    model_path = get_models_list()[0][1]
+    model_path = read_model(get_models_list()[0])[1]
     # NB: Check it doesn't throw any exception
     pipeline_config = { "MAX_PROMPT_LEN": 128, "MIN_RESPONSE_LEN": 64 }
     pipeline_config |= common_config
@@ -65,22 +67,20 @@ def test_length_properties_set_no_exception():
     { "MIN_RESPONSE_LEN": -1  },
     { "MIN_RESPONSE_LEN": "1" }
 ]
-@pytest.mark.skipif(sys.platform in ["darwin", "linux"], reason="Not supposed to work on mac. Segfault on linux CI")
 @pytest.mark.parametrize("pipeline_config", pipeline_configs)
 @pytest.mark.precommit
 @pytest.mark.nightly
 def test_invalid_length_properties_raise_error(pipeline_config):
-    model_path = get_models_list()[0][1]
+    model_path = read_model(get_models_list()[0])[1]
     pipeline_config |= common_config
     with pytest.raises(RuntimeError):
         pipe = ov_genai.LLMPipeline(model_path, "NPU", **pipeline_config)
 
 
-@pytest.mark.skipif(sys.platform in ["darwin", "linux"], reason="Not supposed to work on mac. Segfault on linux CI")
 @pytest.mark.precommit
 @pytest.mark.nightly
 def test_batch_one_no_exception():
-    model_path = get_models_list()[0][1]
+    model_path = read_model(get_models_list()[0])[1]
     prompt = 'The Sun is yellow because'
     static_pipe = ov_genai.LLMPipeline(model_path, "NPU", **common_config)
     # Check it doesn't throw any exception when batch of size 1 is provided
@@ -88,11 +88,10 @@ def test_batch_one_no_exception():
 
 
 # TODO: For the further batch support
-@pytest.mark.skipif(sys.platform in ["darwin", "linux"], reason="Not supposed to work on mac. Segfault on linux CI")
 @pytest.mark.precommit
 @pytest.mark.nightly
 def test_batch_raise_error():
-    model_path = get_models_list()[0][1]
+    model_path = read_model(get_models_list()[0])[1]
     prompt = 'The Sun is yellow because'
     pipe = ov_genai.LLMPipeline(model_path, "NPU", **common_config)
     with pytest.raises(RuntimeError):
@@ -101,26 +100,24 @@ def test_batch_raise_error():
 
 # TODO: For the further sampling support
 generation_configs = [
-    dict(num_beam_groups=3),
+    dict(num_beams=3),
     dict(do_sample=True)
 ]
-@pytest.mark.skipif(sys.platform in ["darwin", "linux"], reason="Not supposed to work on mac. Segfault on linux CI")
 @pytest.mark.parametrize("generation_config", generation_configs)
 @pytest.mark.precommit
 @pytest.mark.nightly
 def test_unsupported_sampling_raise_error(generation_config):
-    model_path = get_models_list()[0][1]
+    model_path = read_model(get_models_list()[0])[1]
     prompt = 'The Sun is yellow because'
     pipe = ov_genai.LLMPipeline(model_path, "NPU", **common_config)
     with pytest.raises(RuntimeError):
         pipe.generate(prompt, **generation_config)
 
 
-@pytest.mark.skipif(sys.platform in ["darwin", "linux"], reason="Not supposed to work on mac. Segfault on linux CI")
 @pytest.mark.precommit
 @pytest.mark.nightly
 def test_max_number_of_tokens():
-    model_path = get_models_list()[0][1]
+    model_path = read_model(get_models_list()[0])[1]
     prompt = 'The Sun is yellow because'
     num_tokens = 128
 
@@ -133,11 +130,10 @@ def test_max_number_of_tokens():
 
 
 # FIXME: Known problem, output differs from stateful pipeline starting from 3rd prompt!
-@pytest.mark.skipif(sys.platform in ["darwin", "linux"], reason="Not supposed to work on mac. Segfault on linux CI")
 @pytest.mark.skip(reason="JIRA-144780: Output differs from stateful pipeline")
 @pytest.mark.precommit
 @pytest.mark.nightly
-def test_chat_generation(model_descr):
+def test_chat_generation():
     questions = [
         '1+1=',
         'What is the previous answer?',
@@ -145,9 +141,9 @@ def test_chat_generation(model_descr):
         'What was my first question?'
     ]
 
-    model_path = get_chat_models_list()[0][1]
+    model_path = read_model(get_chat_models_list()[0])[1]
 
-    chat_history_stateful = generate_chat_history(model_path, "CPU", { }, questions)
+    chat_history_stateful = generate_chat_history(model_path, "CPU", get_default_properties(), questions)
     chat_history_static   = generate_chat_history(model_path, "NPU", common_config, questions)
 
     print('npu chat: \n{chat_history_static}\n')