diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml index 21f35151bc..7c34ca7f66 100644 --- a/.github/workflows/causal_lm_cpp.yml +++ b/.github/workflows/causal_lm_cpp.yml @@ -347,7 +347,7 @@ jobs: - name: run and compare run: | source ./ov/setupvars.sh - ./build/speculative_decoding_lm ./dolly-v2-3b/ ./dolly-v2-7b/ "Alan Turing was a" > predictions_speculative.txt + ./build/text_generation/causal_lm/cpp/speculative_decoding_lm ./dolly-v2-3b/ ./dolly-v2-7b/ "Alan Turing was a" > predictions_speculative.txt ./build/text_generation/causal_lm/cpp/greedy_causal_lm ./dolly-v2-7b/ "Alan Turing was a" > predictions_greedy.txt python -c " with open('predictions_greedy.txt', 'r') as f: @@ -393,7 +393,7 @@ jobs: Question: Can you please add 2 and 3 A:' > ./prompt.txt - ./build/prompt_lookup_decoding_lm ./TinyLlama-1.1B-Chat-v1.0/ "$( predictions_prompt_lookup.txt + ./build/text_generation/causal_lm/cpp/prompt_lookup_decoding_lm ./TinyLlama-1.1B-Chat-v1.0/ "$( predictions_prompt_lookup.txt ./build/text_generation/causal_lm/cpp/greedy_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ "$( predictions_greedy.txt python -c " with open('predictions_greedy.txt', 'r') as f: diff --git a/text_generation/causal_lm/cpp/beam_search_causal_lm.cpp b/text_generation/causal_lm/cpp/beam_search_causal_lm.cpp index 056c923224..3b40529f38 100644 --- a/text_generation/causal_lm/cpp/beam_search_causal_lm.cpp +++ b/text_generation/causal_lm/cpp/beam_search_causal_lm.cpp @@ -14,7 +14,7 @@ int main(int argc, char* argv[]) try { ov::LLMPipeline pipe(model_path, device); ov::GenerationConfig config = pipe.get_generation_config(); - config.max_new_tokens = 20; + config.max_new_tokens = 100; config.num_beam_groups = 3; config.num_beams = 15; config.num_return_sequences = config.num_beams * prompts.size();