intel
diff --git a/‎README.md
Lines changed: 2 additions & 0 deletions b/‎README.md
Lines changed: 2 additions & 0 deletions
diff --git a/‎docker/pytorch/docker-compose.yml
Lines changed: 18 additions & 18 deletions b/‎docker/pytorch/docker-compose.yml
Lines changed: 18 additions & 18 deletions
diff --git a/‎docs/general/CPU_DEVCATALOG.md
Lines changed: 2 additions & 0 deletions b/‎docs/general/CPU_DEVCATALOG.md
Lines changed: 2 additions & 0 deletions
diff --git a/‎models_v2/pytorch/bert_large/inference/cpu/CONTAINER.md
Lines changed: 1 addition & 1 deletion b/‎models_v2/pytorch/bert_large/inference/cpu/CONTAINER.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎models_v2/pytorch/bert_large/inference/cpu/README.md
Lines changed: 1 addition & 1 deletion b/‎models_v2/pytorch/bert_large/inference/cpu/README.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎models_v2/pytorch/bert_large/training/cpu/CONTAINER.md
Lines changed: 96 additions & 0 deletions b/‎models_v2/pytorch/bert_large/training/cpu/CONTAINER.md
Lines changed: 96 additions & 0 deletions
@@ -70,12 +70,14 @@ For best performance on Intel® Data Center GPU Flex and Max Series, please chec
 | [BERT large](https://arxiv.org/pdf/1810.04805.pdf) [Sapphire Rapids](https://www.intel.com/content/www/us/en/newsroom/opinion/updates-next-gen-data-center-platform-sapphire-rapids.html#gs.blowcx) | Tensorflow | Training | [FP32 BFloat16 BFloat32](/quickstart/language_modeling/tensorflow/bert_large/training/cpu/README.md) | [SQuAD](https://github.com/IntelAI/models/tree/master/datasets/bert_data/README.md#inference) |
 | [BERT large (Hugging Face)](https://arxiv.org/pdf/1810.04805.pdf) | TensorFlow | Inference | [FP32 FP16 BFloat16 BFloat32](/benchmarks/language_modeling/tensorflow/bert_large_hf/inference/README.md) | [SQuAD](https://github.com/IntelAI/models/tree/master/datasets/bert_data/README.md#inference) |
 | [BERT large](https://arxiv.org/pdf/1810.04805.pdf)   | PyTorch | Inference | [FP32 Int8 BFloat16 BFloat32](/models_v2/pytorch/bert_large/inference/cpu/README.md) | BERT Large SQuAD1.1 |
+| [BERT large](https://arxiv.org/pdf/1810.04805.pdf)   | PyTorch | Training  | [FP32 BFloat16 BFloat32](/models_v2/pytorch/bert_large/training/cpu/README.md) | [preprocessed text dataset](https://drive.google.com/drive/folders/1cywmDnAsrP5-2vsr8GDc6QUc7VWe-M3v) |
 | [DistilBERT base](https://arxiv.org/abs/1910.01108)  | PyTorch | Inference | [FP32 BF32 BF16Int8-FP32 Int8-BFloat16 BFloat32](/models_v2/pytorch/distilbert/inference/cpu/README.md) | [ DistilBERT Base SQuAD1.1](https://huggingface.co/distilbert-base-uncased-distilled-squad) |
 | [RNN-T](https://arxiv.org/abs/2007.15188)            | PyTorch | Inference | [FP32 BFloat16 BFloat32](/models_v2/pytorch/rnnt/inference/cpu/README.md) | [RNN-T dataset](/models_v2/pytorch/rnnt/inference/cpu/download_dataset.sh) |
 | [RNN-T](https://arxiv.org/abs/2007.15188)            | PyTorch | Training  | [FP32 BFloat16 BFloat32](/models_v2/pytorch/rnnt/training/cpu/README.md) | [RNN-T dataset](/models_v2/pytorch/rnnt/training/cpu/download_dataset.sh) |
 | [GPTJ 6B](https://huggingface.co/EleutherAI/gpt-j-6b) | PyTorch | Inference | [FP32 FP16 BFloat16 BF32 INT8](/models_v2/pytorch/gptj/inference/cpu/README.md) | |
 | [GPTJ 6B MLPerf](https://github.com/mlcommons/inference/tree/master/language/gpt-j#datasets--models) | PyTorch | Inference | [INT4](/models_v2/pytorch/gpt-j_mlperf/inference/cpu/README.md) | [CNN-Daily Mail dataset](https://huggingface.co/datasets/cnn_dailymail)|
 | [LLAMA2 7B](https://huggingface.co/meta-llama/Llama-2-7b-hf) | PyTorch | Inference | [FP32 FP16 BFloat16 BF32 INT8](/models_v2/pytorch/llama/inference/cpu/README.md) | |
+| [LLAMA2 7B](https://huggingface.co/meta-llama/Llama-2-7b-hf) | PyTorch | Training | [FP32 FP16 BFloat16 BF32](/models_v2/pytorch/llama/training/cpu/README.md) | |
 | [LLAMA2 13B](https://huggingface.co/meta-llama/Llama-2-13b-hf) | PyTorch | Inference | [FP32 FP16 BFloat16 BF32 INT8](/models_v2/pytorch/llama/inference/cpu/README.md) | |
 | [ChatGLMv3 6B](https://huggingface.co/THUDM/chatglm3-6b) | PyTorch | Inference | [FP32 FP16 BFloat16 BF32 INT8](/models_v2/pytorch/chatglm/inference/cpu/README.md) | |
 
 
@@ -32,15 +32,15 @@ services:
       dockerfile: docker/pytorch/bert_large/inference/cpu/pytorch-bert-large-inference.Dockerfile-${BASE_IMAGE_NAME:-ubuntu}
     command: >
       bash -c "python -c 'import torch; import intel_extension_for_pytorch as ipex; print(\"torch:\", torch.__version__, \" ipex:\",ipex.__version__)'"
-  # bert_large-training-cpu:
-  #   image: ${REGISTRY}/aiops/mlops-ci:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-language-modeling-bert-large-training
-  #   pull_policy: always
-  #   build:
-  #     context: ../../
-  #     dockerfile: docker/pytorch/bert_large/training/cpu/pytorch-bert-large-training.Dockerfile-${BASE_IMAGE_NAME:-ubuntu}
-  #   extends: bert_large-inference-cpu
-  #   command: >
-  #     bash -c "python -c 'import torch; import intel_extension_for_pytorch as ipex; print(\"torch:\", torch.__version__, \" ipex:\",ipex.__version__)'"
+  bert_large-training-cpu:
+    image: ${REGISTRY}/aiops/mlops-ci:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-language-modeling-bert-large-training
+    pull_policy: always
+    build:
+      context: ../../
+      dockerfile: docker/pytorch/bert_large/training/cpu/pytorch-bert-large-training.Dockerfile-${BASE_IMAGE_NAME:-ubuntu}
+    extends: bert_large-inference-cpu
+    command: >
+      bash -c "python -c 'import torch; import intel_extension_for_pytorch as ipex; print(\"torch:\", torch.__version__, \" ipex:\",ipex.__version__)'"
   maskrcnn-inference-cpu:
     image: ${REGISTRY}/aiops/mlops-ci:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-object-detection-maskrcnn-inference
     pull_policy: always
@@ -185,15 +185,15 @@ services:
     extends: bert_large-inference-cpu
     command: >
       bash -c "python -c 'import torch; import intel_extension_for_pytorch as ipex; print(\"torch:\", torch.__version__, \" ipex:\",ipex.__version__)'"
-  # llama-training-cpu:
-  #   image: ${REGISTRY}/aiops/mlops-ci:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-generative-ai-llama-training
-  #   pull_policy: always
-  #   build:
-  #     context: ../../
-  #     dockerfile: docker/pytorch/llama/training/cpu/pytorch-llama-training.Dockerfile-${BASE_IMAGE_NAME:-ubuntu}
-  #   extends: bert_large-inference-cpu
-  #   command: >
-  #     bash -c "python -c 'import torch; import intel_extension_for_pytorch as ipex; print(\"torch:\", torch.__version__, \" ipex:\",ipex.__version__)'"
+  llama-training-cpu:
+    image: ${REGISTRY}/aiops/mlops-ci:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-generative-ai-llama-training
+    pull_policy: always
+    build:
+      context: ../../
+      dockerfile: docker/pytorch/llama/training/cpu/pytorch-llama-training.Dockerfile-${BASE_IMAGE_NAME:-ubuntu}
+    extends: bert_large-inference-cpu
+    command: >
+      bash -c "python -c 'import torch; import intel_extension_for_pytorch as ipex; print(\"torch:\", torch.__version__, \" ipex:\",ipex.__version__)'"
   vit-inference-cpu:
     image: ${REGISTRY}/aiops/mlops-ci:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-image-recognition-vit-inference
     pull_policy: always
 
@@ -13,6 +13,7 @@ The tables below link to documentation on how to run each use case using docker
 | --------| ------------------------------------------------------ | ---------- | ------| --------------------- |
 | PyTorch | [GPT-J](../../models_v2/pytorch/gptj/inference/cpu/CONTAINER.md) | FP32,BF32,BF16,FP16,INT8-FP32 | Inference | LAMBADA |
 | PyTorch | [Llama 2](../../models_v2/pytorch/llama/inference/cpu/CONTAINER.md) 7B,13B | FP32,BF32,BF16,FP16,INT8-FP32 | Inference | LAMBADA |
+| PyTorch | [Llama 2](../../models_v2/pytorch/llama/training/cpu/CONTAINER.md) 7B | FP32,BF32,BF16,FP16 | Training | LAMBADA | 
 | PyTorch | [ChatGLM](../../models_v2/pytorch/chatglm/inference/cpu/CONTAINER.md) | FP32,BF32,BF16,FP16,INT8-FP32 | Inference | LAMBADA | 
 | PyTorch | [LCM](../../models_v2/pytorch/LCM/inference/cpu/CONTAINER.md) |  FP32,BF32,BF16,FP16,INT8-FP32,INT8-BF16 | Inference | COCO 2017 |
 | PyTorch | [Stable Diffusion](../../models_v2/pytorch/stable_diffusion/inference/cpu/CONTAINER.md) | FP32,BF32,BF16,FP16,INT8-FP32,INT8-BF16 | Inference | COCO 2017 |
@@ -39,6 +40,7 @@ The tables below link to documentation on how to run each use case using docker
 
 | Framework | Model                                                  | Precisions | Mode |  Dataset |
 | --------| ------------------------------------------------------ | ---------- | ------| --------------------- |
+| PyTorch | [BERT large](../../models_v2/pytorch/bert_large/training/cpu/CONTAINER.md) | FP32,BF32,BF16,FP16 | Training | Preprocessed Text dataset |
 | PyTorch |[BERT large](../../models_v2/pytorch/bert_large/inference/cpu/CONTAINER.md) | FP32,BF32,BF16,INT8 | Inference | SQuAD1.0 |
 | PyTorch | [RNN-T](../../models_v2/pytorch/rnnt/training/cpu/CONTAINER.md) | FP32,BF32,BF16,INT8 | Inference | LibriSpeech |
 | PyTorch |[RNN-T](../../models_v2/pytorch/rnnt/inference/cpu/CONTAINER.md) | FP32,BF32,FP16 | Training | LibriSpeech |
 
@@ -45,7 +45,7 @@ To run the BERT Large inference scripts, set environment variables to specify th
 ```bash
 export EVAL_DATA_FILE=<path to the eval data>
 export OUTPUT_DIR=<directory where log files will be written>
-export PRECISION=<provide bf16, fp32, fp16, int8, avx-int8, avx-fp32 for throughput and bf16, bf32, fp32, fp16, int8, avx-fp32, avx-int8, fp8 for accuracy and realtime>
+export PRECISION=<specify the precision>
 export FINETUNED_MODELL=<path to pre-trained model>
 export TEST_MODE=<provide either REALTIME,THROUGHPUT OR ACCURACY mode>
 export DNNL_MAX_CPU_ISA=AVX512_CORE_AMX_FP16 (for FP16 precision)
 
@@ -95,7 +95,7 @@ export FINETUNED_MODEL=$(pwd)/bert_squad_model
 | **TEST_MODE** (THROUGHPUT, ACCURACY, REALTIME)              | `export TEST_MODE=THROUGHPUT (THROUGHPUT, ACCURACY, REALTIME)`                  |
 | **EVAL_DATA_FILE**              | `export EVAL_DATA_FILE=<path to dev-v1.1.json file>`                  |
 | **OUTPUT_DIR**               |                               `export OUTPUT_DIR=<path to an output directory>`                               |
-| **PRECISION**     |                  `export PRECISION=bf16` (bf16, fp32, fp16, int8, avx-int8, avx-fp32 for throughput and bf16, bf32, fp32, fp16, int8, avx-fp32, avx-int8, fp8 for accuracy and realtime) |
+| **PRECISION**     |                  `export PRECISION=bf16` (bf16, bf32, fp32, fp16, int8, avx-int8, avx-fp32 for throughput and bf16, bf32, fp32, fp16, int8, avx-fp32, avx-int8, fp8 for accuracy) |
 | **FINETUNED_MODEL**               |                               `export FINETUNED_MODEL=<path to the fine tuned model>`                               |
 | **MODEL_DIR**               |                               `export MODEL_DIR=$(pwd)`                               |
 | **BATCH_SIZE** (optional)    |                               `export BATCH_SIZE=<set a value for batch size, else it will run with default batch size>`                                |
 
@@ -0,0 +1,96 @@
+# PyTorch BERT Large training
+
+## Description 
+This document has instructions for running BERT-Large training using Intel Extension for PyTorch. 
+
+## Pull Command
+
+```bash
+docker pull intel/language-modeling:pytorch-cpu-bert-large-training
+```
+
+> [!NOTE]
+> The `avx-fp32` precision runs the same scripts as `fp32`, except that the `DNNL_MAX_CPU_ISA` environment variable is unset. The environment variable is otherwise set to `DNNL_MAX_CPU_ISA=AVX512_CORE_AMX`.
+
+## Datasets
+Follow instructions to [download and preprocess](./README.md#download-the-preprocessed-text-dataset)  the text dataset and set the `DATASET_DIR` to point to the pre-processed dataset.
+
+# BERT Config File
+BERT Training happens in two stages. Download the BERT Config file from [here](https://drive.google.com/drive/folders/1oQF4diVHNPCclykwdvQJw8n_VIWwV0PT) and export `BERT_MODEL_CONFIG` variable to point to this file path. 
+
+# Checkpoint Directory
+The checkpoint directory is created as a result of Phase 1 Training. Please set the `PRETRAINED_MODEL` to point to the pre-trained model path and volume mount it for Phase 2 training. 
+
+## Docker Run
+(Optional) Export related proxy into docker environment.
+```bash
+export DOCKER_RUN_ENVS="-e ftp_proxy=${ftp_proxy} \
+  -e FTP_PROXY=${FTP_PROXY} -e http_proxy=${http_proxy} \
+  -e HTTP_PROXY=${HTTP_PROXY} -e https_proxy=${https_proxy} \
+  -e HTTPS_PROXY=${HTTPS_PROXY} -e no_proxy=${no_proxy} \
+  -e NO_PROXY=${NO_PROXY} -e socks_proxy=${socks_proxy} \
+  -e SOCKS_PROXY=${SOCKS_PROXY}"
+```
+
+To run the BERT-Large training scripts, set environment variables to specify the dataset directory, precision and an output directory. 
+
+```bash
+export DATASET_DIR=<path to the dataset>
+export OUTPUT_DIR=<directory where log files will be written>
+export PRECISION=<specify the precision to run>
+export BERT_MODEL_CONFIG=<path to bert configuration file>
+export PRETRAINED_MODEL=<path to checkpoint to directory>
+export TRAINING_PHASE=<set either 1 or 2>
+export DNNL_MAX_CPU_ISA=<provide AVX512_CORE_AMX_FP16 for fp16 precision>
+export TRAIN_SCRIPT=/workspace/pytorch-bert-large-training/run_pretrain_mlperf.py 
+export DDP=false
+export TORCH_INDUCTOR=0
+
+DOCKER_ARGS="--rm -it"
+IMAGE_NAME=intel/language-modeling:pytorch-cpu-bert-large-training
+
+docker run \
+  --cap-add SYS_NICE \
+  --shm-size 16G \
+  --env PRECISION=${PRECISION} \
+  --env OUTPUT_DIR=${OUTPUT_DIR} \
+  --env TRAIN_SCRIPT=${TRAIN_SCRIPT} \
+  --env DATASET_DIR=${DATASET_DIR} \
+  --env TRAINING_PHASE=${TRAINING_PHASE} \
+  --env DDP=${DDP} \
+  --env TORCH_INDUCTOR=${TORCH_INDUCTOR} \
+  --env BERT_MODEL_CONFIG=${BERT_MODEL_CONFIG} \
+  --env PRETRAINED_MODEL=${PRETRAINED_MODEL} \
+  --env DNNL_MAX_CPU_ISA=${DNNL_MAX_CPU_ISA} \
+  --volume ${OUTPUT_DIR}:${OUTPUT_DIR} \
+  --volume ${DATASET_DIR}:${DATASET_DIR} \
+  --volume ${BERT_MODEL_CONFIG}:${BERT_MODEL_CONFIG} \
+  --volume ${PRETRAINED_MODEL}:${PRETRAINED_MODEL} \
+  ${DOCKER_RUN_ENVS} \
+  ${DOCKER_ARGS} \
+  $IMAGE_NAME \
+  /bin/bash run_model.sh
+```
+
+> [!NOTE]
+> The workload container was validated on a single node(`DDP=false`) with `TORCH_INDUCTOR=0`.
+
+## Documentation and Sources
+#### Get Started
+[Docker* Repository](https://hub.docker.com/r/intel/language-modeling)
+
+[Main GitHub*](https://github.com/IntelAI/models)
+
+[Release Notes](https://github.com/IntelAI/models/releases)
+
+[Get Started Guide](https://github.com/IntelAI/models/blob/master/models_v2/pytorch/bert_large/training/cpu/CONTAINER.md)
+
+#### Code Sources
+[Dockerfile](https://github.com/IntelAI/models/tree/master/docker/pytorch)
+
+[Report Issue](https://community.intel.com/t5/Intel-Optimized-AI-Frameworks/bd-p/optimized-ai-frameworks)
+
+## License Agreement
+LEGAL NOTICE: By accessing, downloading or using this software and any required dependent software (the “Software Package”), you agree to the terms and conditions of the software license agreements for the Software Package, which may also include notices, disclaimers, or license terms for third party software included with the Software Package. Please refer to the [license](https://github.com/IntelAI/models/tree/master/third_party) file for additional details.
+
+[View All Containers and Solutions 🡢](https://www.intel.com/content/www/us/en/developer/tools/software-catalog/containers.html?s=Newest)