diff --git a/tests/pytorch/nightly/llama2-model.libsonnet b/tests/pytorch/nightly/llama2-model.libsonnet index 8b8efcf82..04cdb21f3 100644 --- a/tests/pytorch/nightly/llama2-model.libsonnet +++ b/tests/pytorch/nightly/llama2-model.libsonnet @@ -45,23 +45,15 @@ local utils = import 'templates/utils.libsonnet'; }, command: self.paramsOverride.trainCommand, }, + local pjrt = self.pjrt, + pjrt:: common.PyTorchTpuVmMixin { + modelName: 'llama2-pjrt', + }, local infer = self.infer, - infer:: common.PyTorchTpuVmMixin { + infer:: common.PyTorchTpuVmMixin + pjrt { modelName+: '-infer', tpuSettings+: { tpuVmExtraSetup: ||| - pip3 uninstall torch torch_xla torchvision libtpu-nightly -y - sudo apt-get update -y - sudo apt-get install libomp5 -y - pip3 install mkl mkl-include - pip3 install tf-nightly tb-nightly tbp-nightly - pip3 install numpy - sudo apt-get install numactl -y - sudo apt-get install libopenblas-dev -y - pip3 install --user --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu - pip3 install https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-nightly-cp310-cp310-linux_x86_64.whl - pip3 install torch_xla[tpuvm] - # install tokenizer model wget https://storage.googleapis.com/tpu-pytorch/lsiyuan-experiment/llama/spiece.model @@ -93,7 +85,7 @@ local utils = import 'templates/utils.libsonnet'; }, }, local spmd = self.spmd, - spmd:: common.PyTorchTpuVmMixin { + spmd:: common.PyTorchTpuVmMixin + pjrt { modelName+: '-train-spmd', tpuSettings+: { tpuVmExports+: ||| @@ -110,19 +102,6 @@ local utils = import 'templates/utils.libsonnet'; export TPU_MEGACORE=megacore_dense |||, tpuVmExtraSetup: ||| - pip3 uninstall torch torch_xla torchvision libtpu-nightly -y - sudo apt update -y - sudo apt-get update -y - pip install accelerate -U - sudo apt-get install libomp5 -y - pip3 install mkl mkl-include - pip3 install numpy - sudo apt-get install numactl -y - sudo apt-get install libopenblas-dev -y - pip3 install --user --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu - pip3 install https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-nightly-cp310-cp310-linux_x86_64.whl - pip3 install torch_xla[tpuvm] - # install tokenizer model wget https://storage.googleapis.com/tpu-pytorch/lsiyuan-experiment/llama/spiece.model @@ -144,10 +123,23 @@ local utils = import 'templates/utils.libsonnet'; wget https://storage.googleapis.com/manfei_public_experimental/2B.json # save llama2 training - echo -e 'python transformers/examples/pytorch/language-modeling/run_clm.py --tokenizer_name gpt2 --dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 --per_device_train_batch_size 32 --per_device_eval_batch_size 8 --num_train_epochs 1 --do_train --output_dir /tmp/output --overwrite_output_dir --config_name transformers/7B/2B.json --save_strategy no --logging_strategy no --remove_unused_columns no --spmd_fsdp_sharding --torch_dtype bfloat16 --dataloader_drop_last yes --spmd_grad_chkpt --report_to none' >> llama2training.sh + echo -e 'XLA_USE_BF16=1 python3 transformers/examples/pytorch/language-modeling/run_clm.py --tokenizer_name hf-internal-testing/llama-tokenizer --dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 --per_device_train_batch_size 256 --per_device_eval_batch_size 8 --num_train_epochs 1 --do_train --output_dir /tmp/output --overwrite_output_dir --config_name transformers/7B/2B.json --save_strategy no --logging_strategy no --remove_unused_columns no --spmd_fsdp_sharding --torch_dtype bfloat16 --dataloader_drop_last yes --spmd_grad_chkpt --report_to none --optim adafactor > output.txt' >> llama2training.sh + echo -e 'import numpy as np' >> getvalue.py + echo -e 'file = open("output.txt")' >> getvalue.py + echo -e 'content = file.readlines()' >> getvalue.py + echo -e 'value_line = content[-1]' >> getvalue.py + echo -e 'value_value = float((value_line.split())[2])' >> getvalue.py + echo -e 'value_value = np.reciprocal(value_value)' >> getvalue.py + echo -e 'if value_value > 14.000 or value_value < 12.667 :' >> getvalue.py + echo -e ' raise ValueError("expose to train_steps_per_second exceeded throuhold 13.333 +- 5%")' >> getvalue.py + echo -e 'else:' >> getvalue.py + echo -e ' print("Finished llama2 test and warm latency/token within expected throuhold 13.333 +- 5%")' >> getvalue.py + echo -e 'cat output.txt' >> llama2training.sh + echo -e 'python3 transformers/getvalue.py' >> llama2training.sh cat llama2training.sh pwd ls + |||, }, }, diff --git a/tests/pytorch/r2.1/llama2-model.libsonnet b/tests/pytorch/r2.1/llama2-model.libsonnet index 4e8d55182..a2b653d3d 100644 --- a/tests/pytorch/r2.1/llama2-model.libsonnet +++ b/tests/pytorch/r2.1/llama2-model.libsonnet @@ -222,17 +222,17 @@ local utils = import 'templates/utils.libsonnet'; # save llama2 training cd .. - echo -e 'python3 transformers/examples/pytorch/language-modeling/run_clm.py --tokenizer_name gpt2 --dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 --per_device_train_batch_size 256 --per_device_eval_batch_size 8 --num_train_epochs 1 --do_train --output_dir /tmp/output --overwrite_output_dir --config_name transformers/2B/2B.json --save_strategy no --logging_strategy no --remove_unused_columns no --spmd_fsdp_sharding --torch_dtype bfloat16 --dataloader_drop_last yes --spmd_grad_chkpt --report_to none > output.txt' >> llama2training.sh + echo -e 'XLA_USE_BF16=1 python3 transformers/examples/pytorch/language-modeling/run_clm.py --tokenizer_name hf-internal-testing/llama-tokenizer --dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 --per_device_train_batch_size 256 --per_device_eval_batch_size 8 --num_train_epochs 1 --do_train --output_dir /tmp/output --overwrite_output_dir --config_name transformers/2B/2B.json --save_strategy no --logging_strategy no --remove_unused_columns no --spmd_fsdp_sharding --torch_dtype bfloat16 --dataloader_drop_last yes --spmd_grad_chkpt --report_to none --optim adafactor > output.txt' >> llama2training.sh echo -e 'import numpy as np' >> getvalue.py echo -e 'file = open("output.txt")' >> getvalue.py echo -e 'content = file.readlines()' >> getvalue.py echo -e 'value_line = content[-1]' >> getvalue.py echo -e 'value_value = float((value_line.split())[2])' >> getvalue.py echo -e 'value_value = np.reciprocal(value_value)' >> getvalue.py - echo -e 'if value_value > 6.863 or value_value < 6.209 :' >> getvalue.py - echo -e ' raise ValueError("expose to train_steps_per_second exceeded throuhold 6.536 +- 5%")' >> getvalue.py + echo -e 'if value_value > 14.000 or value_value < 12.667 :' >> getvalue.py + echo -e ' raise ValueError("expose to train_steps_per_second exceeded throuhold 13.333 +- 5%")' >> getvalue.py echo -e 'else:' >> getvalue.py - echo -e ' print("Finished llama2 test and warm latency/token within expected throuhold 6.536 +- 5%")' >> getvalue.py + echo -e ' print("Finished llama2 test and warm latency/token within expected throuhold 13.333 +- 5%")' >> getvalue.py echo -e 'cat output.txt' >> llama2training.sh echo -e 'python3 transformers/getvalue.py' >> llama2training.sh cat llama2training.sh @@ -383,7 +383,7 @@ local utils = import 'templates/utils.libsonnet'; llama2_inference + v4_8 + common.Functional + timeouts.Hours(3) + infer7B, llama2_inference + v4_8 + common.Functional + timeouts.Hours(3) + infer70B, llama2_training + v4_8 + common.Functional + timeouts.Hours(3) + spmd2B, - llama2_training + v4_8 + common.Functional + timeouts.Hours(3) + spmd2B128, + llama2_training + v4_8 + common.Functional + timeouts.Hours(3) + spmd2B256, llama2_training + convergence + v4_8 + common.Functional + timeouts.Hours(3) + spmd2Bconv, ], }