diff --git a/.github/actions/install_python_deps/action.yml b/.github/actions/install_python_deps/action.yml index 064153bbcf..8ab2ac68c8 100644 --- a/.github/actions/install_python_deps/action.yml +++ b/.github/actions/install_python_deps/action.yml @@ -11,5 +11,4 @@ runs: shell: bash run: | source ${{ inputs.ov_dir }}/setupvars.sh - python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release - python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt + python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release --upgrade-strategy eager -r ./samples/requirements.txt diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml index c507df7e87..ca45a45b10 100644 --- a/.github/workflows/causal_lm_cpp.yml +++ b/.github/workflows/causal_lm_cpp.yml @@ -670,8 +670,7 @@ jobs: - name: Download and convert and model run: | source ./ov/setupvars.sh - python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt - python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release + python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0 - name: Compare env: @@ -679,7 +678,7 @@ jobs: run: | source ./ov/setupvars.sh printf 'What is 2 + 2?\nWhat is the previous answer?\nAdd 1 to it.\nSubtract 5 from it.\nWhy is the sun yellow?\nWhat was my first question?\n' > ./input.txt - timeout 30s ./build/samples/cpp/chat_sample/chat_sample ./TinyLlama-1.1B-Chat-v1.0/ < input.txt > ./pred.txt + timeout 60s ./build/samples/cpp/chat_sample/chat_sample ./TinyLlama-1.1B-Chat-v1.0/ < input.txt > ./pred.txt python -c " from transformers import LlamaTokenizer, AutoModelForCausalLM model_id = 'TinyLlama/TinyLlama-1.1B-Chat-v1.0' @@ -698,7 +697,7 @@ jobs: chat_history.append(gen_prompt(prompt)) chat_prompt = tokenizer.apply_chat_template(chat_history, tokenize=False, add_generation_prompt=True) tokenized = tokenizer(chat_prompt, return_tensors='pt', add_special_tokens=False) - answer = model.generate(**tokenized, max_length=1000, do_sample=False) + answer = model.generate(**tokenized, max_new_tokens=100, do_sample=False) answer_str = tokenizer.decode(answer[0, tokenized['input_ids'].numel():], skip_special_tokens=True) chat_history.append(gen_answer(answer_str)) output.write(answer_str) @@ -708,7 +707,7 @@ jobs: " diff pred.txt ref.txt echo "Chat sample cpp" passed - timeout 30s ./samples/python/chat_sample/chat_sample.py ./TinyLlama-1.1B-Chat-v1.0/ < input.txt > ./pred2.txt + timeout 60s ./samples/python/chat_sample/chat_sample.py ./TinyLlama-1.1B-Chat-v1.0/ < input.txt > ./pred2.txt diff pred2.txt ref.txt echo "Chat sample python" passed diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 5958dafe33..b3834dfd68 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -436,8 +436,7 @@ jobs: - name: Build and Install dependencies run: | source ${OV_INSTALL_DIR}/setupvars.sh - python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --find-links ${OV_INSTALL_DIR}/wheels - python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --find-links ${OV_INSTALL_DIR}/wheels + python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --find-links ${OV_INSTALL_DIR}/wheels --upgrade-strategy eager -r ./samples/requirements.txt --find-links ${OV_INSTALL_DIR}/wheels --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release optimum-cli export openvino --trust-remote-code --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0 optimum-cli export openvino --trust-remote-code --model openai/whisper-tiny whisper-tiny diff --git a/.github/workflows/llm_bench-python.yml b/.github/workflows/llm_bench-python.yml index 94c73d513c..d1ec8ea64d 100644 --- a/.github/workflows/llm_bench-python.yml +++ b/.github/workflows/llm_bench-python.yml @@ -43,8 +43,7 @@ jobs: run: | python -m pip install --upgrade pip python -m pip install flake8 pytest black - GIT_CLONE_PROTECTION_ACTIVE=false pip install -r ${{ env.LLM_BENCH_PYPATH }}/requirements.txt - python -m pip install -U --pre openvino openvino-tokenizers openvino-genai --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release + GIT_CLONE_PROTECTION_ACTIVE=false pip install -r ${{ env.LLM_BENCH_PYPATH }}/requirements.txt -U --pre openvino openvino-tokenizers openvino-genai --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release - name: Lint with flake8 run: | # stop the build if there are Python syntax errors or undefined names @@ -115,16 +114,8 @@ jobs: python-version: "3.10" - name: Test stateful run: | - GIT_CLONE_PROTECTION_ACTIVE=false python -m pip install -r tools/llm_bench/requirements.txt - python -m pip uninstall --yes openvino - python -m pip install -U --pre openvino openvino-tokenizers openvino-genai --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release + GIT_CLONE_PROTECTION_ACTIVE=false python -m pip install -r tools/llm_bench/requirements.txt -U --pre openvino openvino-tokenizers openvino-genai --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release git+https://github.com/huggingface/optimum.git tools/who_what_benchmark/ pytest -r tools/who_what_benchmark/requirements.txt python tools/llm_bench/convert.py --model_id TinyLlama/TinyLlama-1.1B-Chat-v1.0 --output_dir . --stateful grep beam_idx pytorch/dldt/FP32/openvino_model.xml - name: WWB Tests - run: | - GIT_CLONE_PROTECTION_ACTIVE=false pip install -r tools/who_what_benchmark/requirements.txt - pip install git+https://github.com/huggingface/optimum.git - GIT_CLONE_PROTECTION_ACTIVE=false pip install tools/who_what_benchmark/ - pip install pytest - python -m pip install -U --pre openvino openvino-tokenizers openvino-genai --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release --force-reinstall - python -m pytest -v tools/who_what_benchmark/tests + run: python -m pytest -v tools/who_what_benchmark/tests diff --git a/samples/cpp/chat_sample/chat_sample.cpp b/samples/cpp/chat_sample/chat_sample.cpp index 41d63fc0f1..41615c71a9 100644 --- a/samples/cpp/chat_sample/chat_sample.cpp +++ b/samples/cpp/chat_sample/chat_sample.cpp @@ -13,7 +13,7 @@ int main(int argc, char* argv[]) try { std::string device = "CPU"; // GPU, NPU can be used as well ov::genai::LLMPipeline pipe(models_path, device); - ov::genai::GenerationConfig config; + ov::genai::GenerationConfig config = pipe.get_generation_config(); config.max_new_tokens = 100; std::function streamer = [](std::string word) { std::cout << word << std::flush;