diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml index 23d9006d07..d1110fb05b 100644 --- a/.github/workflows/causal_lm_cpp.yml +++ b/.github/workflows/causal_lm_cpp.yml @@ -194,14 +194,18 @@ jobs: shell: cmd run: | call w_openvino_toolkit_windows_2024.1.0.15008.f4afc983258_x86_64\setupvars.bat - - .\build\Release\beam_search_causal_lm.exe .\TinyLlama-1.1B-Chat-v1.0\ "69" > .\pred.txt + .\build\text_generation\causal_lm\cpp\Release\beam_search_causal_lm.exe .\TinyLlama-1.1B-Chat-v1.0\ "69" > .\pred.txt + echo "sample outputs" + type .\pred.txt + echo "huggingface outputs" + echo import transformers > ref.py echo predictions = open('pred.txt', 'r').read() >> ref.py echo tokenizer = transformers.LlamaTokenizer.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0') >> ref.py echo tokenized = tokenizer('69', return_tensors='pt') >> ref.py echo for beam in transformers.LlamaForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False): >> ref.py echo ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) >> ref.py + echo print(ref) >> ref.py echo idx = predictions.find(ref) >> ref.py echo if -1 == idx: >> ref.py echo raise RuntimeError(f'Missing "{ref=}" from predictions') >> ref.py