From 0ccde011aef60cc236ee61520a689e3fb1c7c617 Mon Sep 17 00:00:00 2001
From: miss_k <86659575+daredevil3435@users.noreply.github.com>
Date: Fri, 7 Jun 2024 15:49:40 +0530
Subject: [PATCH 1/2] fixed broken link in
 sagemaker/03_distributed_training_data_parallelism

---
 .../sagemaker-notebook.ipynb                                    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/sagemaker/03_distributed_training_data_parallelism/sagemaker-notebook.ipynb b/sagemaker/03_distributed_training_data_parallelism/sagemaker-notebook.ipynb
index d34bbe22..bdb86487 100644
--- a/sagemaker/03_distributed_training_data_parallelism/sagemaker-notebook.ipynb
+++ b/sagemaker/03_distributed_training_data_parallelism/sagemaker-notebook.ipynb
@@ -36,7 +36,7 @@
    "source": [
     "# Introduction\n",
     "\n",
-    "Welcome to our end-to-end `distributed` Question-Answering example. In this demo, we will use the Hugging Face `transformers` and `datasets` library together with a custom Amazon sagemaker-sdk extension to fine-tune a pre-trained transformer for question-answering on multiple-gpus. In particular, the pre-trained model will be fine-tuned using the `squad` dataset. The demo will use the new `smdistributed` library to run training on multiple gpus as training scripting we are going to use one of the `transformers` [example scripts from the repository](https://github.com/huggingface/transformers/blob/master/examples/question-answering/run_qa.py).\n",
+    "Welcome to our end-to-end `distributed` Question-Answering example. In this demo, we will use the Hugging Face `transformers` and `datasets` library together with a custom Amazon sagemaker-sdk extension to fine-tune a pre-trained transformer for question-answering on multiple-gpus. In particular, the pre-trained model will be fine-tuned using the `squad` dataset. The demo will use the new `smdistributed` library to run training on multiple gpus as training scripting we are going to use one of the `transformers` [example scripts from the repository](https://github.com/huggingface/transformers/tree/main/examples/pytorch/question-answering).\n",
     "\n",
     "To get started, we need to set up the environment with a few prerequisite steps, for permissions, configurations, and so on. \n",
     "\n",

From 2179f8583e109a377d0da787c78220b82c85ebcc Mon Sep 17 00:00:00 2001
From: miss_k <86659575+daredevil3435@users.noreply.github.com>
Date: Fri, 7 Jun 2024 17:31:44 +0530
Subject: [PATCH 2/2] updated code of
 preprocessing.ipynb,quicktour.ipynb,training.ipynb with respect to en
 directory

---
 transformers_doc/en/training.ipynb   |  98 ++-----
 transformers_doc/preprocessing.ipynb | 337 ++++++++++------------
 transformers_doc/quicktour.ipynb     | 401 +++++++++++++++++++++------
 transformers_doc/training.ipynb      | 270 ++++++++++--------
 4 files changed, 647 insertions(+), 459 deletions(-)

diff --git a/transformers_doc/en/training.ipynb b/transformers_doc/en/training.ipynb
index a0872dba..704ab573 100644
--- a/transformers_doc/en/training.ipynb
+++ b/transformers_doc/en/training.ipynb
@@ -46,21 +46,7 @@
     "cellView": "form",
     "hide_input": true
    },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<iframe width=\"560\" height=\"315\" src=\"https://www.youtube.com/embed/_BZearw7f0w?rel=0&amp;controls=0&amp;showinfo=0\" frameborder=\"0\" allowfullscreen></iframe>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "#@title\n",
     "from IPython.display import HTML\n",
@@ -81,19 +67,7 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'label': 0,\n",
-       " 'text': 'My expectations for McDonalds are t rarely high. But for one to still fail so spectacularly...that takes something special!\\\\nThe cashier took my friends\\'s order, then promptly ignored me. I had to force myself in front of a cashier who opened his register to wait on the person BEHIND me. I waited over five minutes for a gigantic order that included precisely one kid\\'s meal. After watching two people who ordered after me be handed their food, I asked where mine was. The manager started yelling at the cashiers for \\\\\"serving off their orders\\\\\" when they didn\\'t have their food. But neither cashier was anywhere near those controls, and the manager was the one serving food to customers and clearing the boards.\\\\nThe manager was rude when giving me my order. She didn\\'t make sure that I had everything ON MY RECEIPT, and never even had the decency to apologize that I felt I was getting poor service.\\\\nI\\'ve eaten at various McDonalds restaurants for over 30 years. I\\'ve worked at more than one location. I expect bad days, bad moods, and the occasional mistake. But I have yet to have a decent experience at this store. It will remain a place I avoid unless someone in my party needs to avoid illness from low blood sugar. Perhaps I should go back to the racially biased service of Steak n Shake instead!'}"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "from datasets import load_dataset\n",
     "\n",
@@ -173,21 +147,7 @@
     "cellView": "form",
     "hide_input": true
    },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<iframe width=\"560\" height=\"315\" src=\"https://www.youtube.com/embed/nvBXf7s7vTI?rel=0&amp;controls=0&amp;showinfo=0\" frameborder=\"0\" allowfullscreen></iframe>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "#@title\n",
     "from IPython.display import HTML\n",
@@ -383,21 +343,7 @@
     "cellView": "form",
     "hide_input": true
    },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<iframe width=\"560\" height=\"315\" src=\"https://www.youtube.com/embed/rnTGBy2ax1c?rel=0&amp;controls=0&amp;showinfo=0\" frameborder=\"0\" allowfullscreen></iframe>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "#@title\n",
     "from IPython.display import HTML\n",
@@ -619,21 +565,7 @@
     "cellView": "form",
     "hide_input": true
    },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<iframe width=\"560\" height=\"315\" src=\"https://www.youtube.com/embed/Dh9CL8fyG80?rel=0&amp;controls=0&amp;showinfo=0\" frameborder=\"0\" allowfullscreen></iframe>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "#@title\n",
     "from IPython.display import HTML\n",
@@ -922,7 +854,25 @@
    ]
   }
  ],
- "metadata": {},
+ "metadata": {
+  "kernelspec": {
+   "display_name": "base",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.5"
+  }
+ },
  "nbformat": 4,
  "nbformat_minor": 4
 }
diff --git a/transformers_doc/preprocessing.ipynb b/transformers_doc/preprocessing.ipynb
index 70fc0fcd..7ee4050b 100644
--- a/transformers_doc/preprocessing.ipynb
+++ b/transformers_doc/preprocessing.ipynb
@@ -23,18 +23,31 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Before you can use your data in a model, the data needs to be processed into an acceptable format for the model. A model does not understand raw text, images or audio. These inputs need to be converted into numbers and assembled into tensors. In this tutorial, you will:\n",
+    "Before you can train a model on a dataset, it needs to be preprocessed into the expected model input format. Whether your data is text, images, or audio, they need to be converted and assembled into batches of tensors. 🤗 Transformers provides a set of preprocessing classes to help prepare your data for the model. In this tutorial, you'll learn that for:\n",
     "\n",
-    "* Preprocess textual data with a tokenizer.\n",
-    "* Preprocess image or audio data with a feature extractor.\n",
-    "* Preprocess data for a multimodal task with a processor."
+    "* Text, use a [Tokenizer](https://huggingface.co/docs/transformers/main/en/./main_classes/tokenizer) to convert text into a sequence of tokens, create a numerical representation of the tokens, and assemble them into tensors.\n",
+    "* Speech and audio, use a [Feature extractor](https://huggingface.co/docs/transformers/main/en/./main_classes/feature_extractor) to extract sequential features from audio waveforms and convert them into tensors.\n",
+    "* Image inputs use a [ImageProcessor](https://huggingface.co/docs/transformers/main/en/./main_classes/image) to convert images into tensors.\n",
+    "* Multimodal inputs, use a [Processor](https://huggingface.co/docs/transformers/main/en/./main_classes/processors) to combine a tokenizer and a feature extractor or image processor.\n",
+    "\n",
+    "<Tip>\n",
+    "\n",
+    "`AutoProcessor` **always** works and automatically chooses the correct class for the model you're using, whether you're using a tokenizer, image processor, feature extractor or processor.\n",
+    "\n",
+    "</Tip>\n",
+    "\n",
+    "Before you begin, install 🤗 Datasets so you can load some datasets to experiment with:\n",
+    "\n",
+    "```bash\n",
+    "pip install datasets\n",
+    "```"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## NLP"
+    "## Natural Language Processing"
    ]
   },
   {
@@ -70,29 +83,15 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "The main tool for processing textual data is a [tokenizer](https://huggingface.co/docs/transformers/main/en/main_classes/tokenizer). A tokenizer starts by splitting text into *tokens* according to a set of rules. The tokens are converted into numbers, which are used to build tensors as input to a model. Any additional inputs required by a model are also added by the tokenizer.\n",
+    "The main tool for preprocessing textual data is a [tokenizer](https://huggingface.co/docs/transformers/main/en/main_classes/tokenizer). A tokenizer splits text into *tokens* according to a set of rules. The tokens are converted into numbers and then tensors, which become the model inputs. Any additional inputs required by the model are added by the tokenizer.\n",
     "\n",
     "<Tip>\n",
     "\n",
-    "If you plan on using a pretrained model, it's important to use the associated pretrained tokenizer. This ensures the text is split the same way as the pretraining corpus, and uses the same corresponding tokens-to-index (usually referrred to as the *vocab*) during pretraining.\n",
+    "If you plan on using a pretrained model, it's important to use the associated pretrained tokenizer. This ensures the text is split the same way as the pretraining corpus, and uses the same corresponding tokens-to-index (usually referred to as the *vocab*) during pretraining.\n",
     "\n",
     "</Tip>\n",
     "\n",
-    "Get started quickly by loading a pretrained tokenizer with the [AutoTokenizer](https://huggingface.co/docs/transformers/main/en/model_doc/auto#transformers.AutoTokenizer) class. This downloads the *vocab* used when a model is pretrained."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Tokenize"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Load a pretrained tokenizer with [AutoTokenizer.from_pretrained()](https://huggingface.co/docs/transformers/main/en/model_doc/auto#transformers.AutoTokenizer.from_pretrained):"
+    "Get started by loading a pretrained tokenizer with the [AutoTokenizer.from_pretrained()](https://huggingface.co/docs/transformers/main/en/model_doc/auto#transformers.AutoTokenizer.from_pretrained) method. This downloads the *vocab* a model was pretrained with:"
    ]
   },
   {
@@ -110,7 +109,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Then pass your sentence to the tokenizer:"
+    "Then pass your text to the tokenizer:"
    ]
   },
   {
@@ -146,7 +145,7 @@
     "* [attention_mask](https://huggingface.co/docs/transformers/main/en/glossary#attention-mask) indicates whether a token should be attended to or not.\n",
     "* [token_type_ids](https://huggingface.co/docs/transformers/main/en/glossary#token-type-ids) identifies which sequence a token belongs to when there is more than one sequence.\n",
     "\n",
-    "You can decode the `input_ids` to return the original input:"
+    "Return your input be decoding the  `input_ids` :"
    ]
   },
   {
@@ -176,7 +175,7 @@
     "As you can see, the tokenizer added two special tokens - `CLS` and `SEP` (classifier and separator) - to the sentence. Not all models need\n",
     "special tokens, but if they do, the tokenizer will automatically add them for you.\n",
     "\n",
-    "If there are several sentences you want to process, pass the sentences as a list to the tokenizer:"
+    "If there are several sentences you want to preprocess, pass them as a list as a list to the tokenizer:"
    ]
   },
   {
@@ -224,7 +223,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "This brings us to an important topic. When you process a batch of sentences, they aren't always the same length. This is a problem because tensors, the input to the model, need to have a uniform shape. Padding is a strategy for ensuring tensors are rectangular by adding a special *padding token* to sentences with fewer tokens.\n",
+    "Sentences aren't always the same length which can be an issue because tensors, the model inputs, need to have a uniform shape. Padding is a strategy for ensuring tensors are rectangular by adding a special *padding token* to shorter sentences.\n",
     "\n",
     "Set the `padding` parameter to `True` to pad the shorter sequences in the batch to match the longest sequence:"
    ]
@@ -267,7 +266,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Notice the tokenizer padded the first and third sentences with a `0` because they are shorter!"
+    "The first and third sentences are now padded with `0`'s because they are shorter."
    ]
   },
   {
@@ -320,6 +319,17 @@
     "print(encoded_input)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<Tip>\n",
+    "\n",
+    "Check out the [Padding and truncation](https://huggingface.co/docs/transformers/main/en/./pad_truncation) concept guide to learn more different padding and truncation arguments.\n",
+    "\n",
+    "</Tip>"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -363,7 +373,7 @@
     "    \"Don't think he knows about second breakfast, Pip.\",\n",
     "    \"What about elevensies?\",\n",
     "]\n",
-    "encoded_input = tokenizer(batch, padding=True, truncation=True, return_tensors=\"pt\")\n",
+    "encoded_input = tokenizer(batch_sentences, padding=True, truncation=True, return_tensors=\"pt\")\n",
     "print(encoded_input)"
    ]
   },
@@ -413,13 +423,9 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Audio inputs are preprocessed differently than textual inputs, but the end goal remains the same: create numerical sequences the model can understand. A [feature extractor](https://huggingface.co/docs/transformers/main/en/main_classes/feature_extractor) is designed for the express purpose of extracting features from raw image or audio data and converting them into tensors. Before you begin, install 🤗 Datasets to load an audio dataset to experiment with:\n",
-    "\n",
-    "```bash\n",
-    "pip install datasets\n",
-    "```\n",
+    "For audio tasks, you'll need a [feature extractor](https://huggingface.co/docs/transformers/main/en/main_classes/feature_extractor) to prepare your dataset for the model. The feature extractor is designed to extract features from raw audio data, and convert them into tensors.\n",
     "\n",
-    "Load the [MInDS-14](https://huggingface.co/datasets/PolyAI/minds14) dataset (see the 🤗 [Datasets tutorial](https://huggingface.co/docs/datasets/load_hub.html) for more details on how to load a dataset):"
+    "Load the [MInDS-14](https://huggingface.co/datasets/PolyAI/minds14) dataset (see the 🤗 [Datasets tutorial](https://huggingface.co/docs/datasets/load_hub.html) for more details on how to load a dataset) to see how you can use a feature extractor with audio datasets:"
    ]
   },
   {
@@ -478,40 +484,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Resample"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "For this tutorial, you will use the [Wav2Vec2](https://huggingface.co/facebook/wav2vec2-base) model. As you can see from the model card, the Wav2Vec2 model is pretrained on 16kHz sampled speech audio. It is important your audio data's sampling rate matches the sampling rate of the dataset used to pretrain the model. If your data's sampling rate isn't the same, then you need to resample your audio data. \n",
-    "\n",
-    "For example, the [MInDS-14](https://huggingface.co/datasets/PolyAI/minds14) dataset has a sampling rate of 8000kHz. In order to use the Wav2Vec2 model with this dataset, upsample the sampling rate to 16kHz:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'array': array([ 0.        ,  0.00024414, -0.00024414, ..., -0.00024414,\n",
-       "         0.        ,  0.        ], dtype=float32),\n",
-       " 'path': '/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~JOINT_ACCOUNT/602ba55abb1e6d0fbce92065.wav',\n",
-       " 'sampling_rate': 8000}"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "dataset = load_dataset(\"PolyAI/minds14\", name=\"en-US\", split=\"train\")\n",
-    "dataset[0][\"audio\"]"
+    "For this tutorial, you will use the [Wav2Vec2](https://huggingface.co/facebook/wav2vec2-base) model. As you can see from the model card, the Wav2Vec2 model is pretrained on 16kHz sampled speech audio. It is important your audio data's sampling rate matches the sampling rate of the dataset used to pretrain the model. If your data's sampling rate isn't the same, then you need to resample your audio data. "
    ]
   },
   {
@@ -534,7 +507,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "2. Load the audio file:"
+    "2. Call the `audio` column again to resample the audio file:"
    ]
   },
   {
@@ -564,21 +537,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "As you can see, the `sampling_rate` is now 16kHz!"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Feature extractor"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "The next step is to load a feature extractor to normalize and pad the input. When padding textual data, a `0` is added for shorter sequences. The same idea applies to audio data, and the audio feature extractor will add a `0` - interpreted as silence - to `array`.\n",
+    "Next, load a feature extractor to normalize and pad the input. When padding textual data, a `0` is added for shorter sequences. The same idea applies to audio data. The feature extractor adds a `0` - interpreted as silence - to `array`.\n",
     "\n",
     "Load the feature extractor with [AutoFeatureExtractor.from_pretrained()](https://huggingface.co/docs/transformers/main/en/model_doc/auto#transformers.AutoFeatureExtractor.from_pretrained):"
    ]
@@ -623,13 +582,6 @@
     "feature_extractor(audio_input, sampling_rate=16000)"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Pad and truncate"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -681,7 +633,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "As you can see, the first sample has a longer sequence than the second sample. Let's create a function that will preprocess the dataset. Specify a maximum sample length, and the feature extractor will either pad or truncate the sequences to match it:"
+    "Create a function to preprocess the dataset so the audio samples are the same lengths. Specify a maximum sample length, and the feature extractor will either pad or truncate the sequences to match it:"
    ]
   },
   {
@@ -706,7 +658,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Apply the function to the the first few examples in the dataset:"
+    "Apply the `preprocess_function` to the the first few examples in the dataset:"
    ]
   },
   {
@@ -722,7 +674,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Now take another look at the processed sample lengths:"
+    "The sample lengths are now the same and match the specified maximum length. You can pass your processed dataset to the model now!"
    ]
   },
   {
@@ -776,16 +728,36 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Vision"
+    "## Computer Vision"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "A feature extractor is also used to process images for vision tasks. Once again, the goal is to convert the raw image into a batch of tensors as input.\n",
+    "For computer vision tasks, you'll need an [image processor](https://huggingface.co/docs/transformers/main/en/main_classes/image_processor) to prepare your dataset for the model.\n",
+    "Image preprocessing consists of several steps that convert images into the input expected by the model. These steps\n",
+    "include but are not limited to resizing, normalizing, color channel correction, and converting images to tensors.\n",
+    "\n",
+    "<Tip>\n",
+    "\n",
+    "Image preprocessing often follows some form of image augmentation. Both image preprocessing and image augmentation\n",
+    "transform image data, but they serve different purposes:\n",
+    "\n",
+    "* Image augmentation alters images in a way that can help prevent overfitting and increase the robustness of the model. You can get creative in how you augment your data - adjust brightness and colors, crop, rotate, resize, zoom, etc. However, be mindful not to change the meaning of the images with your augmentations.\n",
+    "* Image preprocessing guarantees that the images match the model’s expected input format. When fine-tuning a computer vision model, images must be preprocessed exactly as when the model was initially trained.\n",
+    "\n",
+    "You can use any library you like for image augmentation. For image preprocessing, use the `ImageProcessor` associated with the model.\n",
+    "\n",
+    "</Tip>\n",
     "\n",
-    "Let's load the [food101](https://huggingface.co/datasets/food101) dataset for this tutorial. Use 🤗 Datasets `split` parameter to only load a small sample from the training split since the dataset is quite large:"
+    "Load the [food101](https://huggingface.co/datasets/food101) dataset (see the 🤗 [Datasets tutorial](https://huggingface.co/docs/datasets/load_hub.html) for more details on how to load a dataset) to see how you can use an image processor with computer vision datasets:\n",
+    "\n",
+    "<Tip>\n",
+    "\n",
+    "Use 🤗 Datasets `split` parameter to only load a small sample from the training split since the dataset is quite large!\n",
+    "\n",
+    "</Tip>"
    ]
   },
   {
@@ -819,21 +791,9 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "![vision-preprocess-tutorial.png](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/vision-preprocess-tutorial.png)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Feature extractor"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Load the feature extractor with [AutoFeatureExtractor.from_pretrained()](https://huggingface.co/docs/transformers/main/en/model_doc/auto#transformers.AutoFeatureExtractor.from_pretrained):"
+    "![vision-preprocess-tutorial.png](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/vision-preprocess-tutorial.png)\n",
+    "\n",
+    "Load the image processor with [AutoImageProcessor.from_pretrained()](https://huggingface.co/docs/transformers/main/en/model_doc/auto#transformers.AutoImageProcessor.from_pretrained):"
    ]
   },
   {
@@ -851,16 +811,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Data augmentation"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "For vision tasks, it is common to add some type of data augmentation to the images as a part of preprocessing. You can add augmentations with any library you'd like, but in this tutorial, you will use torchvision's [`transforms`](https://pytorch.org/vision/stable/transforms.html) module.\n",
+    "First, let's add some image augmentation. You can use any library you prefer, but in this tutorial, we'll use torchvision's [`transforms`](https://pytorch.org/vision/stable/transforms.html) module. If you're interested in using another data augmentation library, learn how in the [Albumentations](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/image_classification_albumentations.ipynb) or [Kornia notebooks](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/image_classification_kornia.ipynb).\n",
     "\n",
-    "1. Normalize the image and use [`Compose`](https://pytorch.org/vision/master/generated/torchvision.transforms.Compose.html) to chain some transforms - [`RandomResizedCrop`](https://pytorch.org/vision/main/generated/torchvision.transforms.RandomResizedCrop.html) and [`ColorJitter`](https://pytorch.org/vision/main/generated/torchvision.transforms.ColorJitter.html) - together:"
+    "1. Here we use [`Compose`](https://pytorch.org/vision/master/generated/torchvision.transforms.Compose.html) to chain together a couple of\n",
+    "transforms - [`RandomResizedCrop`](https://pytorch.org/vision/main/generated/torchvision.transforms.RandomResizedCrop.html) and [`ColorJitter`](https://pytorch.org/vision/main/generated/torchvision.transforms.ColorJitter.html).\n",
+    "Note that for resizing, we can get the image size requirements from the `image_processor`. For some models, an exact height and\n",
+    "width are expected, for others only the `shortest_edge` is defined."
    ]
   },
   {
@@ -881,7 +837,9 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "2. The model accepts [`pixel_values`](https://huggingface.co/docs/transformers/main/en/model_doc/visionencoderdecoder#transformers.VisionEncoderDecoderModel.forward.pixel_values) as it's input. This value is generated by the feature extractor. Create a function that generates `pixel_values` from the transforms:"
+    "2. The model accepts [`pixel_values`](https://huggingface.co/docs/transformers/main/en/model_doc/visionencoderdecoder#transformers.VisionEncoderDecoderModel.forward.pixel_values)\n",
+    "as its input. `ImageProcessor` can take care of normalizing the images, and generating appropriate tensors.\n",
+    "Create a function that combines image augmentation and image preprocessing for a batch of images and generates `pixel_values`:"
    ]
   },
   {
@@ -891,7 +849,8 @@
    "outputs": [],
    "source": [
     "def transforms(examples):\n",
-    "    examples[\"pixel_values\"] = [_transforms(image.convert(\"RGB\")) for image in examples[\"image\"]]\n",
+    "    images = [_transforms(img.convert(\"RGB\")) for img in examples[\"image\"]]\n",
+    "    examples[\"pixel_values\"] = image_processor(images, do_resize=False, return_tensors=\"pt\")[\"pixel_values\"]\n",
     "    return examples"
    ]
   },
@@ -899,7 +858,17 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "3. Then use 🤗 Datasets [`set_transform`](https://huggingface.co/docs/datasets/process.html#format-transform) to apply the transforms on-the-fly:"
+    "<Tip>\n",
+    "\n",
+    "In the example above we set `do_resize=False` because we have already resized the images in the image augmentation transformation,\n",
+    "and leveraged the `size` attribute from the appropriate `image_processor`. If you do not resize images during image augmentation,\n",
+    "leave this parameter out. By default, `ImageProcessor` will handle the resizing.\n",
+    "\n",
+    "If you wish to normalize images as a part of the augmentation transformation, use the `image_processor.image_mean`,\n",
+    "and `image_processor.image_std` values.\n",
+    "</Tip>\n",
+    "\n",
+    "3. Then use 🤗 Datasets [`set_transform`](https://huggingface.co/docs/datasets/process.html#format-transform) to apply the transforms on the fly:"
    ]
   },
   {
@@ -915,58 +884,23 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "4. Now when you access the image, you will notice the feature extractor has added the model input `pixel_values`:"
+    "4. Now when you access the image, you'll notice the image processor has added `pixel_values`. You can pass your processed dataset to the model now!"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'image': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=384x512 at 0x7F1A7B0630D0>,\n",
-       " 'label': 6,\n",
-       " 'pixel_values': tensor([[[ 0.0353,  0.0745,  0.1216,  ..., -0.9922, -0.9922, -0.9922],\n",
-       "          [-0.0196,  0.0667,  0.1294,  ..., -0.9765, -0.9843, -0.9922],\n",
-       "          [ 0.0196,  0.0824,  0.1137,  ..., -0.9765, -0.9686, -0.8667],\n",
-       "          ...,\n",
-       "          [ 0.0275,  0.0745,  0.0510,  ..., -0.1137, -0.1216, -0.0824],\n",
-       "          [ 0.0667,  0.0824,  0.0667,  ..., -0.0588, -0.0745, -0.0980],\n",
-       "          [ 0.0353,  0.0353,  0.0431,  ..., -0.0039, -0.0039, -0.0588]],\n",
-       " \n",
-       "         [[ 0.2078,  0.2471,  0.2863,  ..., -0.9451, -0.9373, -0.9451],\n",
-       "          [ 0.1608,  0.2471,  0.3098,  ..., -0.9373, -0.9451, -0.9373],\n",
-       "          [ 0.2078,  0.2706,  0.3020,  ..., -0.9608, -0.9373, -0.8275],\n",
-       "          ...,\n",
-       "          [-0.0353,  0.0118, -0.0039,  ..., -0.2392, -0.2471, -0.2078],\n",
-       "          [ 0.0196,  0.0353,  0.0196,  ..., -0.1843, -0.2000, -0.2235],\n",
-       "          [-0.0118, -0.0039, -0.0039,  ..., -0.0980, -0.0980, -0.1529]],\n",
-       " \n",
-       "         [[ 0.3961,  0.4431,  0.4980,  ..., -0.9216, -0.9137, -0.9216],\n",
-       "          [ 0.3569,  0.4510,  0.5216,  ..., -0.9059, -0.9137, -0.9137],\n",
-       "          [ 0.4118,  0.4745,  0.5216,  ..., -0.9137, -0.8902, -0.7804],\n",
-       "          ...,\n",
-       "          [-0.2314, -0.1922, -0.2078,  ..., -0.4196, -0.4275, -0.3882],\n",
-       "          [-0.1843, -0.1686, -0.2000,  ..., -0.3647, -0.3804, -0.4039],\n",
-       "          [-0.1922, -0.1922, -0.1922,  ..., -0.2941, -0.2863, -0.3412]]])}"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
-    "dataset[0][\"image\"]"
+    "dataset[0].keys()"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Here is what the image looks like after you preprocess it. Just as you'd expect from the applied transforms, the image has been randomly cropped and it's color properties are different."
+    "Here is what the image looks like after the transforms are applied. The image has been randomly cropped and it's color properties are different."
    ]
   },
   {
@@ -986,7 +920,48 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "![preprocessed_image](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/preprocessed_image.png)"
+    "![preprocessed_image](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/preprocessed_image.png)\n",
+    "\n",
+    "<Tip>\n",
+    "\n",
+    "For tasks like object detection, semantic segmentation, instance segmentation, and panoptic segmentation, `ImageProcessor`\n",
+    "offers post processing methods. These methods convert model's raw outputs into meaningful predictions such as bounding boxes,\n",
+    "or segmentation maps.\n",
+    "\n",
+    "</Tip>"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Pad"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In some cases, for instance, when fine-tuning [DETR](https://huggingface.co/docs/transformers/main/en/./model_doc/detr), the model applies scale augmentation at training\n",
+    "time. This may cause images to be different sizes in a batch. You can use [DetrImageProcessor.pad_and_create_pixel_mask()](https://huggingface.co/docs/transformers/main/en/model_doc/detr#transformers.DetrFeatureExtractor.pad_and_create_pixel_mask)\n",
+    "from [DetrImageProcessor](https://huggingface.co/docs/transformers/main/en/model_doc/detr#transformers.DetrImageProcessor) and define a custom `collate_fn` to batch images together."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def collate_fn(batch):\n",
+    "    pixel_values = [item[\"pixel_values\"] for item in batch]\n",
+    "    encoding = image_processor.pad_and_create_pixel_mask(pixel_values, return_tensors=\"pt\")\n",
+    "    labels = [item[\"labels\"] for item in batch]\n",
+    "    batch = {}\n",
+    "    batch[\"pixel_values\"] = encoding[\"pixel_values\"]\n",
+    "    batch[\"pixel_mask\"] = encoding[\"pixel_mask\"]\n",
+    "    batch[\"labels\"] = labels\n",
+    "    return batch"
    ]
   },
   {
@@ -1000,12 +975,9 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "For multimodal tasks. you will use a combination of everything you've learned so far and apply your skills to a automatic speech recognition (ASR) task. This means you will need a:\n",
-    "\n",
-    "* Feature extractor to preprocess the audio data.\n",
-    "* Tokenizer to process the text.\n",
+    "For tasks involving multimodal inputs, you'll need a [processor](https://huggingface.co/docs/transformers/main/en/main_classes/processors) to prepare your dataset for the model. A processor couples together two processing objects such as as tokenizer and feature extractor.\n",
     "\n",
-    "Let's return to the [LJ Speech](https://huggingface.co/datasets/lj_speech) dataset:"
+    "Load the [LJ Speech](https://huggingface.co/datasets/lj_speech) dataset (see the 🤗 [Datasets tutorial](https://huggingface.co/docs/datasets/load_hub.html) for more details on how to load a dataset) to see how you can use a processor for automatic speech recognition (ASR):"
    ]
   },
   {
@@ -1023,7 +995,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Since you are mainly interested in the `audio` and `text` column, remove the other columns:"
+    "For ASR, you're mainly focused on `audio` and `text` so you can remove the other columns:"
    ]
   },
   {
@@ -1105,14 +1077,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Processor"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "A processor combines a feature extractor and tokenizer. Load a processor with [`AutoProcessor.from_pretrained]:"
+    "Load a processor with [AutoProcessor.from_pretrained()](https://huggingface.co/docs/transformers/main/en/model_doc/auto#transformers.AutoProcessor.from_pretrained):"
    ]
   },
   {
@@ -1130,7 +1095,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "1. Create a function to process the audio data to `input_values`, and tokenizes the text to `labels`. These are your inputs to the model:"
+    "1. Create a function to process the audio data contained in `array` to `input_values`, and tokenizes the `text` to `labels`. These are your inputs to the model:"
    ]
   },
   {
@@ -1142,10 +1107,8 @@
     "def prepare_dataset(example):\n",
     "    audio = example[\"audio\"]\n",
     "\n",
-    "    example[\"input_values\"] = processor(audio[\"array\"], sampling_rate=16000)\n",
+    "    example.update(processor(audio=audio[\"array\"], text=example[\"text\"], sampling_rate=16000))\n",
     "\n",
-    "    with processor.as_target_processor():\n",
-    "        example[\"labels\"] = processor(example[\"text\"]).input_ids\n",
     "    return example"
    ]
   },
@@ -1169,13 +1132,15 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Notice the processor has added `input_values` and `labels`. The sampling rate has also been correctly downsampled to 16kHz.\n",
-    "\n",
-    "Awesome, you should now be able to preprocess data for any modality and even combine different modalities! In the next tutorial, learn how to fine-tune a model on your newly preprocessed data."
+    "The processor has now added `input_values` and `labels`, and the sampling rate has also been correctly downsampled to 16kHz. You can pass your processed dataset to the model now!"
    ]
   }
  ],
- "metadata": {},
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  }
+ },
  "nbformat": 4,
  "nbformat_minor": 4
 }
diff --git a/transformers_doc/quicktour.ipynb b/transformers_doc/quicktour.ipynb
index f43d7dbc..2516094b 100644
--- a/transformers_doc/quicktour.ipynb
+++ b/transformers_doc/quicktour.ipynb
@@ -23,14 +23,22 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Get up and running with 🤗 Transformers! Start using the [pipeline()](https://huggingface.co/docs/transformers/main/en/main_classes/pipelines#transformers.pipeline) for rapid inference, and quickly load a pretrained model and tokenizer with an [AutoClass](https://huggingface.co/docs/transformers/main/en/./model_doc/auto) to solve your text, vision or audio task.\n",
+    "Get up and running with 🤗 Transformers! Whether you're a developer or an everyday user, this quick tour will help you get started and show you how to use the [pipeline()](https://huggingface.co/docs/transformers/main/en/main_classes/pipelines#transformers.pipeline) for inference, load a pretrained model and preprocessor with an [AutoClass](https://huggingface.co/docs/transformers/main/en/./model_doc/auto), and quickly train a model with PyTorch or TensorFlow. If you're a beginner, we recommend checking out our tutorials or [course](https://huggingface.co/course/chapter1/1) next for more in-depth explanations of the concepts introduced here.\n",
     "\n",
-    "<Tip>\n",
+    "Before you begin, make sure you have all the necessary libraries installed:\n",
     "\n",
-    "All code examples presented in the documentation have a toggle on the top left for PyTorch and TensorFlow. If\n",
-    "not, the code is expected to work for both backends without any change.\n",
+    "```bash\n",
+    "!pip install transformers datasets\n",
+    "```\n",
     "\n",
-    "</Tip>"
+    "You'll also need to install your preferred machine learning framework:\n",
+    "\n",
+    "```bash\n",
+    "pip install torch\n",
+    "```\n",
+    "```bash\n",
+    "pip install tensorflow\n",
+    "```"
    ]
   },
   {
@@ -40,13 +48,6 @@
     "## Pipeline"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "[pipeline()](https://huggingface.co/docs/transformers/main/en/main_classes/pipelines#transformers.pipeline) is the easiest way to use a pretrained model for a given task."
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -80,57 +81,29 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "The [pipeline()](https://huggingface.co/docs/transformers/main/en/main_classes/pipelines#transformers.pipeline) supports many common tasks out-of-the-box:\n",
-    "\n",
-    "**Text**:\n",
-    "* Sentiment analysis: classify the polarity of a given text.\n",
-    "* Text generation (in English): generate text from a given input.\n",
-    "* Name entity recognition (NER): label each word with the entity it represents (person, date, location, etc.).\n",
-    "* Question answering: extract the answer from the context, given some context and a question.\n",
-    "* Fill-mask: fill in the blank given a text with masked words.\n",
-    "* Summarization: generate a summary of a long sequence of text or document.\n",
-    "* Translation: translate text into another language.\n",
-    "* Feature extraction: create a tensor representation of the text.\n",
-    "\n",
-    "**Image**:\n",
-    "* Image classification: classify an image.\n",
-    "* Image segmentation: classify every pixel in an image.\n",
-    "* Object detection: detect objects within an image.\n",
-    "\n",
-    "**Audio**:\n",
-    "* Audio classification: assign a label to a given segment of audio.\n",
-    "* Automatic speech recognition (ASR): transcribe audio data into text.\n",
+    "The [pipeline()](https://huggingface.co/docs/transformers/main/en/main_classes/pipelines#transformers.pipeline) is the easiest and fastest way to use a pretrained model for inference. You can use the [pipeline()](https://huggingface.co/docs/transformers/main/en/main_classes/pipelines#transformers.pipeline) out-of-the-box for many tasks across different modalities, some of which are shown in the table below:\n",
     "\n",
     "<Tip>\n",
     "\n",
-    "For more details about the [pipeline()](https://huggingface.co/docs/transformers/main/en/main_classes/pipelines#transformers.pipeline) and associated tasks, refer to the documentation [here](https://huggingface.co/docs/transformers/main/en/./main_classes/pipelines).\n",
-    "\n",
-    "</Tip>"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Pipeline usage"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "In the following example, you will use the [pipeline()](https://huggingface.co/docs/transformers/main/en/main_classes/pipelines#transformers.pipeline) for sentiment analysis.\n",
+    "For a complete list of available tasks, check out the [pipeline API reference](https://huggingface.co/docs/transformers/main/en/./main_classes/pipelines).\n",
     "\n",
-    "Install the following dependencies if you haven't already:\n",
+    "</Tip>\n",
     "\n",
-    "```bash\n",
-    "pip install torch\n",
-    "```\n",
-    "```bash\n",
-    "pip install tensorflow\n",
-    "```\n",
+    "| **Task**                     | **Description**                                                                                              | **Modality**    | **Pipeline identifier**                       |\n",
+    "|------------------------------|--------------------------------------------------------------------------------------------------------------|-----------------|-----------------------------------------------|\n",
+    "| Text classification          | assign a label to a given sequence of text                                                                   | NLP             | pipeline(task=“sentiment-analysis”)           |\n",
+    "| Text generation              | generate text given a prompt                                                                                 | NLP             | pipeline(task=“text-generation”)              |\n",
+    "| Summarization                | generate a summary of a sequence of text or document                                                         | NLP             | pipeline(task=“summarization”)                |\n",
+    "| Image classification         | assign a label to an image                                                                                   | Computer vision | pipeline(task=“image-classification”)         |\n",
+    "| Image segmentation           | assign a label to each individual pixel of an image (supports semantic, panoptic, and instance segmentation) | Computer vision | pipeline(task=“image-segmentation”)           |\n",
+    "| Object detection             | predict the bounding boxes and classes of objects in an image                                                | Computer vision | pipeline(task=“object-detection”)             |\n",
+    "| Audio classification         | assign a label to some audio data                                                                            | Audio           | pipeline(task=“audio-classification”)         |\n",
+    "| Automatic speech recognition | transcribe speech into text                                                                                  | Audio           | pipeline(task=“automatic-speech-recognition”) |\n",
+    "| Visual question answering    | answer a question about the image, given an image and a question                                             | Multimodal      | pipeline(task=“vqa”)                          |\n",
+    "| Document question answering  | answer a question about a document, given an image and a question                                            | Multimodal      | pipeline(task=\"document-question-answering\")  |\n",
+    "| Image captioning             | generate a caption for a given image                                                                         | Multimodal      | pipeline(task=\"image-to-text\")                |\n",
     "\n",
-    "Import [pipeline()](https://huggingface.co/docs/transformers/main/en/main_classes/pipelines#transformers.pipeline) and specify the task you want to complete:"
+    "Start by creating an instance of [pipeline()](https://huggingface.co/docs/transformers/main/en/main_classes/pipelines#transformers.pipeline) and specifying a task you want to use it for. In this guide, you'll use the [pipeline()](https://huggingface.co/docs/transformers/main/en/main_classes/pipelines#transformers.pipeline) for sentiment analysis as an example:"
    ]
   },
   {
@@ -148,7 +121,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "The pipeline downloads and caches a default [pretrained model](https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english) and tokenizer for sentiment analysis. Now you can use the `classifier` on your target text:"
+    "The [pipeline()](https://huggingface.co/docs/transformers/main/en/main_classes/pipelines#transformers.pipeline) downloads and caches a default [pretrained model](https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english) and tokenizer for sentiment analysis. Now you can use the `classifier` on your target text:"
    ]
   },
   {
@@ -175,7 +148,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "For more than one sentence, pass a list of sentences to the [pipeline()](https://huggingface.co/docs/transformers/main/en/main_classes/pipelines#transformers.pipeline) which returns a list of dictionaries:"
+    "If you have more than one input, pass your inputs as a list to the [pipeline()](https://huggingface.co/docs/transformers/main/en/main_classes/pipelines#transformers.pipeline) to return a list of dictionaries:"
    ]
   },
   {
@@ -201,6 +174,13 @@
     "    print(f\"label: {result['label']}, with score: {round(result['score'], 4)}\")"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The [pipeline()](https://huggingface.co/docs/transformers/main/en/main_classes/pipelines#transformers.pipeline) can also iterate over an entire dataset for any task you like. For this example, let's choose automatic speech recognition as our task:"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -230,7 +210,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Next, load a dataset (see the 🤗 Datasets [Quick Start](https://huggingface.co/docs/datasets/quickstart.html) for more details) you'd like to iterate over. For example, let's load the [MInDS-14](https://huggingface.co/datasets/PolyAI/minds14) dataset:"
+    "Load an audio dataset (see the 🤗 Datasets [Quick Start](https://huggingface.co/docs/datasets/quickstart.html) for more details) you'd like to iterate over. For example, load the [MInDS-14](https://huggingface.co/datasets/PolyAI/minds14) dataset:"
    ]
   },
   {
@@ -248,7 +228,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "We need to make sure that the sampling rate of the dataset matches the sampling \n",
+    "You need to make sure that the sampling rate of the dataset matches the sampling \n",
     "rate `facebook/wav2vec2-base-960h` was trained on."
    ]
   },
@@ -265,7 +245,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Audio files are automatically loaded and resampled when calling the `\"audio\"` column.\n",
+    "The audio files are automatically loaded and resampled when calling the `\"audio\"` column.\n",
     "Let's extract the raw waveform arrays of the first 4 samples and pass it as a list to the pipeline:"
    ]
   },
@@ -294,7 +274,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "For a larger dataset where the inputs are big (like in speech or vision), you will want to pass along a generator instead of a list that loads all the inputs in memory. See the [pipeline documentation](https://huggingface.co/docs/transformers/main/en/./main_classes/pipelines) for more information."
+    "For a larger dataset where the inputs are big (like in speech or vision), you will want to pass along a generator instead of a list that loads all the inputs in memory. Take a look at the [pipeline API reference](https://huggingface.co/docs/transformers/main/en/./main_classes/pipelines) for more information."
    ]
   },
   {
@@ -362,7 +342,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Then you can specify the model and tokenizer in the [pipeline()](https://huggingface.co/docs/transformers/main/en/main_classes/pipelines#transformers.pipeline), and apply the `classifier` on your target text:"
+    "Specify the model and tokenizer in the [pipeline()](https://huggingface.co/docs/transformers/main/en/main_classes/pipelines#transformers.pipeline), and now you can apply the `classifier` on French text:"
    ]
   },
   {
@@ -470,7 +450,6 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Next, the tokenizer converts the tokens into numbers in order to construct a tensor as input to the model. This is known as the model's *vocabulary*.\n",
     "\n",
     "Pass your text to the tokenizer:"
    ]
@@ -507,7 +486,7 @@
     "* [input_ids](https://huggingface.co/docs/transformers/main/en/./glossary#input-ids): numerical representions of your tokens.\n",
     "* [atttention_mask](https://huggingface.co/docs/transformers/main/en/.glossary#attention-mask): indicates which tokens should be attended to.\n",
     "\n",
-    "Just like the [pipeline()](https://huggingface.co/docs/transformers/main/en/main_classes/pipelines#transformers.pipeline), the tokenizer will accept a list of inputs. In addition, the tokenizer can also pad and truncate the text to return a batch with uniform length:"
+    "A tokenizer can also accept a list of inputs, pad and truncate the text to return a batch with uniform length:"
    ]
   },
   {
@@ -544,7 +523,11 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Read the [preprocessing](https://huggingface.co/docs/transformers/main/en/./preprocessing) tutorial for more details about tokenization."
+    "<Tip>\n",
+    "\n",
+    "Check out the [preprocess](https://huggingface.co/docs/transformers/main/en/./preprocessing) tutorial for more details about tokenization, and how to use an [AutoImageProcessor](https://huggingface.co/docs/transformers/main/en/model_doc/auto#transformers.AutoImageProcessor), [AutoFeatureExtractor](https://huggingface.co/docs/transformers/main/en/model_doc/auto#transformers.AutoFeatureExtractor) and [AutoProcessor](https://huggingface.co/docs/transformers/main/en/model_doc/auto#transformers.AutoProcessor) to preprocess image, audio, and multimodal inputs.\n",
+    "\n",
+    "</Tip> "
    ]
   },
   {
@@ -692,17 +675,8 @@
    "source": [
     "<Tip>\n",
     "\n",
-    "All 🤗 Transformers models (PyTorch or TensorFlow) outputs the tensors *before* the final activation\n",
-    "function (like softmax) because the final activation function is often fused with the loss.\n",
-    "\n",
-    "</Tip>\n",
-    "\n",
-    "Models are a standard [`torch.nn.Module`](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) or a [`tf.keras.Model`](https://www.tensorflow.org/api_docs/python/tf/keras/Model) so you can use them in your usual training loop. However, to make things easier, 🤗 Transformers provides a [Trainer](https://huggingface.co/docs/transformers/main/en/main_classes/trainer#transformers.Trainer) class for PyTorch that adds functionality for distributed training, mixed precision, and more. For TensorFlow, you can use the `fit` method from [Keras](https://keras.io/). Refer to the [training tutorial](https://huggingface.co/docs/transformers/main/en/./training) for more details.\n",
-    "\n",
-    "<Tip>\n",
-    "\n",
-    "🤗 Transformers model outputs are special dataclasses so their attributes are autocompleted in an IDE.\n",
-    "The model outputs also behave like a tuple or a dictionary (e.g., you can index with an integer, a slice or a string) in which case the attributes that are `None` are ignored.\n",
+    "All 🤗 Transformers models (PyTorch or TensorFlow) output the tensors *before* the final activation\n",
+    "function (like softmax) because the final activation function is often fused with the loss. Model outputs are special dataclasses so their attributes are autocompleted in an IDE. The model outputs behave like a tuple or a dictionary (you can index with an integer, a slice or a string) in which case, attributes that are None are ignored.\n",
     "\n",
     "</Tip>"
    ]
@@ -812,9 +786,278 @@
     "tokenizer = AutoTokenizer.from_pretrained(pt_save_directory)\n",
     "tf_model = TFAutoModelForSequenceClassification.from_pretrained(pt_save_directory, from_pt=True)"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Custom model builds"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You can modify the model's configuration class to change how a model is built. The configuration specifies a model's attributes, such as the number of hidden layers or attention heads. You start from scratch when you initialize a model from a custom configuration class. The model attributes are randomly initialized, and you'll need to train the model before you can use it to get meaningful results.\n",
+    "\n",
+    "Start by importing [AutoConfig](https://huggingface.co/docs/transformers/main/en/model_doc/auto#transformers.AutoConfig), and then load the pretrained model you want to modify. Within [AutoConfig.from_pretrained()](https://huggingface.co/docs/transformers/main/en/model_doc/auto#transformers.AutoConfig.from_pretrained), you can specify the attribute you want to change, such as the number of attention heads:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import AutoConfig\n",
+    "\n",
+    "my_config = AutoConfig.from_pretrained(\"distilbert-base-uncased\", n_heads=12)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Create a model from your custom configuration with [AutoModel.from_config()](https://huggingface.co/docs/transformers/main/en/model_doc/auto#transformers.FlaxAutoModelForVision2Seq.from_config):"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import AutoModel\n",
+    "\n",
+    "my_model = AutoModel.from_config(my_config)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Create a model from your custom configuration with [TFAutoModel.from_config()](https://huggingface.co/docs/transformers/main/en/model_doc/auto#transformers.FlaxAutoModelForVision2Seq.from_config):"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import TFAutoModel\n",
+    "\n",
+    "my_model = TFAutoModel.from_config(my_config)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Take a look at the [Create a custom architecture](https://huggingface.co/docs/transformers/main/en/./create_a_model) guide for more information about building custom configurations."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Trainer - a PyTorch optimized training loop"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "All models are a standard [`torch.nn.Module`](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) so you can use them in any typical training loop. While you can write your own training loop, 🤗 Transformers provides a [Trainer](https://huggingface.co/docs/transformers/main/en/main_classes/trainer#transformers.Trainer) class for PyTorch, which contains the basic training loop and adds additional functionality for features like distributed training, mixed precision, and more.\n",
+    "\n",
+    "Depending on your task, you'll typically pass the following parameters to [Trainer](https://huggingface.co/docs/transformers/main/en/main_classes/trainer#transformers.Trainer):\n",
+    "\n",
+    "1. A [PreTrainedModel](https://huggingface.co/docs/transformers/main/en/main_classes/model#transformers.PreTrainedModel) or a [`torch.nn.Module`](https://pytorch.org/docs/stable/nn.html#torch.nn.Module):\n",
+    "\n",
+    "   ```py\n",
+    "   >>> from transformers import AutoModelForSequenceClassification\n",
+    "\n",
+    "   >>> model = AutoModelForSequenceClassification.from_pretrained(\"distilbert-base-uncased\")\n",
+    "   ```\n",
+    "\n",
+    "2. [TrainingArguments](https://huggingface.co/docs/transformers/main/en/main_classes/trainer#transformers.TrainingArguments) contains the model hyperparameters you can change like learning rate, batch size, and the number of epochs to train for. The default values are used if you don't specify any training arguments:\n",
+    "\n",
+    "   ```py\n",
+    "   >>> from transformers import TrainingArguments\n",
+    "\n",
+    "   >>> training_args = TrainingArguments(\n",
+    "   ...     output_dir=\"path/to/save/folder/\",\n",
+    "   ...     learning_rate=2e-5,\n",
+    "   ...     per_device_train_batch_size=8,\n",
+    "   ...     per_device_eval_batch_size=8,\n",
+    "   ...     num_train_epochs=2,\n",
+    "   ... )\n",
+    "   ```\n",
+    "\n",
+    "3. A preprocessing class like a tokenizer, image processor, feature extractor, or processor:\n",
+    "\n",
+    "   ```py\n",
+    "   >>> from transformers import AutoTokenizer\n",
+    "\n",
+    "   >>> tokenizer = AutoTokenizer.from_pretrained(\"distilbert-base-uncased\")\n",
+    "   ```\n",
+    "\n",
+    "4. Load a dataset:\n",
+    "\n",
+    "   ```py\n",
+    "   >>> from datasets import load_dataset\n",
+    "\n",
+    "   >>> dataset = load_dataset(\"rotten_tomatoes\")  # doctest: +IGNORE_RESULT\n",
+    "   ```\n",
+    "\n",
+    "5. Create a function to tokenize the dataset:\n",
+    "\n",
+    "   ```py\n",
+    "   >>> def tokenize_dataset(dataset):\n",
+    "   ...     return tokenizer(dataset[\"text\"])\n",
+    "   ```\n",
+    "\n",
+    "   Then apply it over the entire dataset with [map](https://huggingface.co/docs/datasets/main/en/package_reference/main_classes#datasets.Dataset.map):\n",
+    "\n",
+    "   ```py\n",
+    "   >>> dataset = dataset.map(tokenize_dataset, batched=True)\n",
+    "   ```\n",
+    "\n",
+    "6. A [DataCollatorWithPadding](https://huggingface.co/docs/transformers/main/en/main_classes/data_collator#transformers.DataCollatorWithPadding) to create a batch of examples from your dataset:\n",
+    "\n",
+    "   ```py\n",
+    "   >>> from transformers import DataCollatorWithPadding\n",
+    "\n",
+    "   >>> data_collator = DataCollatorWithPadding(tokenizer=tokenizer)\n",
+    "   ```\n",
+    "\n",
+    "Now gather all these classes in [Trainer](https://huggingface.co/docs/transformers/main/en/main_classes/trainer#transformers.Trainer):"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import Trainer\n",
+    "\n",
+    "trainer = Trainer(\n",
+    "    model=model,\n",
+    "    args=training_args,\n",
+    "    train_dataset=dataset[\"train\"],\n",
+    "    eval_dataset=dataset[\"test\"],\n",
+    "    tokenizer=tokenizer,\n",
+    "    data_collator=data_collator,\n",
+    ")  # doctest: +SKIP"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "When you're ready, call [train()](https://huggingface.co/docs/transformers/main/en/main_classes/trainer#transformers.Trainer.train) to start training:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "trainer.train()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<Tip>\n",
+    "\n",
+    "For tasks - like translation or summarization - that use a sequence-to-sequence model, use the [Seq2SeqTrainer](https://huggingface.co/docs/transformers/main/en/main_classes/trainer#transformers.Seq2SeqTrainer) and [Seq2SeqTrainingArguments](https://huggingface.co/docs/transformers/main/en/main_classes/trainer#transformers.Seq2SeqTrainingArguments) classes instead.\n",
+    "\n",
+    "</Tip>\n",
+    "\n",
+    "You can customize the training loop behavior by subclassing the methods inside [Trainer](https://huggingface.co/docs/transformers/main/en/main_classes/trainer#transformers.Trainer). This allows you to customize features such as the loss function, optimizer, and scheduler. Take a look at the [Trainer](https://huggingface.co/docs/transformers/main/en/main_classes/trainer#transformers.Trainer) reference for which methods can be subclassed. \n",
+    "\n",
+    "The other way to customize the training loop is by using [Callbacks](https://huggingface.co/docs/transformers/main/en/./main_classes/callbacks). You can use callbacks to integrate with other libraries and inspect the training loop to report on progress or stop the training early. Callbacks do not modify anything in the training loop itself. To customize something like the loss function, you need to subclass the [Trainer](https://huggingface.co/docs/transformers/main/en/main_classes/trainer#transformers.Trainer) instead."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Train with TensorFlow"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "All models are a standard [`tf.keras.Model`](https://www.tensorflow.org/api_docs/python/tf/keras/Model) so they can be trained in TensorFlow with the [Keras](https://keras.io/) API. 🤗 Transformers provides the [prepare_tf_dataset()](https://huggingface.co/docs/transformers/main/en/main_classes/model#transformers.TFPreTrainedModel.prepare_tf_dataset) method to easily load your dataset as a `tf.data.Dataset` so you can start training right away with Keras' [`compile`](https://keras.io/api/models/model_training_apis/#compile-method) and [`fit`](https://keras.io/api/models/model_training_apis/#fit-method) methods.\n",
+    "\n",
+    "1. You'll start with a [TFPreTrainedModel](https://huggingface.co/docs/transformers/main/en/main_classes/model#transformers.TFPreTrainedModel) or a [`tf.keras.Model`](https://www.tensorflow.org/api_docs/python/tf/keras/Model):\n",
+    "\n",
+    "   ```py\n",
+    "   >>> from transformers import TFAutoModelForSequenceClassification\n",
+    "\n",
+    "   >>> model = TFAutoModelForSequenceClassification.from_pretrained(\"distilbert-base-uncased\")\n",
+    "   ```\n",
+    "\n",
+    "2. A preprocessing class like a tokenizer, image processor, feature extractor, or processor:\n",
+    "\n",
+    "   ```py\n",
+    "   >>> from transformers import AutoTokenizer\n",
+    "\n",
+    "   >>> tokenizer = AutoTokenizer.from_pretrained(\"distilbert-base-uncased\")\n",
+    "   ```\n",
+    "\n",
+    "3. Create a function to tokenize the dataset:\n",
+    "\n",
+    "   ```py\n",
+    "   >>> def tokenize_dataset(dataset):\n",
+    "   ...     return tokenizer(dataset[\"text\"])  # doctest: +SKIP\n",
+    "   ```\n",
+    "\n",
+    "4. Apply the tokenizer over the entire dataset with [map](https://huggingface.co/docs/datasets/main/en/package_reference/main_classes#datasets.Dataset.map) and then pass the dataset and tokenizer to [prepare_tf_dataset()](https://huggingface.co/docs/transformers/main/en/main_classes/model#transformers.TFPreTrainedModel.prepare_tf_dataset). You can also change the batch size and shuffle the dataset here if you'd like:\n",
+    "\n",
+    "   ```py\n",
+    "   >>> dataset = dataset.map(tokenize_dataset)  # doctest: +SKIP\n",
+    "   >>> tf_dataset = model.prepare_tf_dataset(\n",
+    "   ...     dataset[\"train\"], batch_size=16, shuffle=True, tokenizer=tokenizer\n",
+    "   ... )  # doctest: +SKIP\n",
+    "   ```\n",
+    "\n",
+    "5. When you're ready, you can call `compile` and `fit` to start training. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:\n",
+    "\n",
+    "   ```py\n",
+    "   >>> from tensorflow.keras.optimizers import Adam\n",
+    "\n",
+    "   >>> model.compile(optimizer=Adam(3e-5))  # No loss argument!\n",
+    "   >>> model.fit(tf_dataset)  # doctest: +SKIP\n",
+    "   ```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## What's next?"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now that you've completed the 🤗 Transformers quick tour, check out our guides and learn how to do more specific things like writing a custom model, fine-tuning a model for a task, and how to train a model with a script. If you're interested in learning more about 🤗 Transformers core concepts, grab a cup of coffee and take a look at our Conceptual Guides!"
+   ]
   }
  ],
- "metadata": {},
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  }
+ },
  "nbformat": 4,
  "nbformat_minor": 4
 }
diff --git a/transformers_doc/training.ipynb b/transformers_doc/training.ipynb
index 7420ba94..5a70035b 100644
--- a/transformers_doc/training.ipynb
+++ b/transformers_doc/training.ipynb
@@ -46,21 +46,7 @@
     "cellView": "form",
     "hide_input": true
    },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<iframe width=\"560\" height=\"315\" src=\"https://www.youtube.com/embed/_BZearw7f0w?rel=0&amp;controls=0&amp;showinfo=0\" frameborder=\"0\" allowfullscreen></iframe>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "#@title\n",
     "from IPython.display import HTML\n",
@@ -81,19 +67,7 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'label': 0,\n",
-       " 'text': 'My expectations for McDonalds are t rarely high. But for one to still fail so spectacularly...that takes something special!\\\\nThe cashier took my friends\\'s order, then promptly ignored me. I had to force myself in front of a cashier who opened his register to wait on the person BEHIND me. I waited over five minutes for a gigantic order that included precisely one kid\\'s meal. After watching two people who ordered after me be handed their food, I asked where mine was. The manager started yelling at the cashiers for \\\\\"serving off their orders\\\\\" when they didn\\'t have their food. But neither cashier was anywhere near those controls, and the manager was the one serving food to customers and clearing the boards.\\\\nThe manager was rude when giving me my order. She didn\\'t make sure that I had everything ON MY RECEIPT, and never even had the decency to apologize that I felt I was getting poor service.\\\\nI\\'ve eaten at various McDonalds restaurants for over 30 years. I\\'ve worked at more than one location. I expect bad days, bad moods, and the occasional mistake. But I have yet to have a decent experience at this store. It will remain a place I avoid unless someone in my party needs to avoid illness from low blood sugar. Perhaps I should go back to the racially biased service of Steak n Shake instead!'}"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "from datasets import load_dataset\n",
     "\n",
@@ -157,6 +131,15 @@
     "## Train"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "At this point, you should follow the section corresponding to the framework you want to use. You can use the links\n",
+    "in the right sidebar to jump to the one you want - and if you want to hide all of the content for a given framework,\n",
+    "just use the button at the top-right of that framework's block!"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -164,21 +147,7 @@
     "cellView": "form",
     "hide_input": true
    },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<iframe width=\"560\" height=\"315\" src=\"https://www.youtube.com/embed/nvBXf7s7vTI?rel=0&amp;controls=0&amp;showinfo=0\" frameborder=\"0\" allowfullscreen></iframe>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "#@title\n",
     "from IPython.display import HTML\n",
@@ -186,6 +155,13 @@
     "HTML('<iframe width=\"560\" height=\"315\" src=\"https://www.youtube.com/embed/nvBXf7s7vTI?rel=0&amp;controls=0&amp;showinfo=0\" frameborder=\"0\" allowfullscreen></iframe>')"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Train with PyTorch Trainer"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -249,7 +225,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Metrics"
+    "### Evaluate"
    ]
   },
   {
@@ -266,9 +242,9 @@
    "outputs": [],
    "source": [
     "import numpy as np\n",
-    "from datasets import load_metric\n",
+    "import evaluate\n",
     "\n",
-    "metric = load_metric(\"accuracy\")"
+    "metric = evaluate.load(\"accuracy\")"
    ]
   },
   {
@@ -367,21 +343,7 @@
     "cellView": "form",
     "hide_input": true
    },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<iframe width=\"560\" height=\"315\" src=\"https://www.youtube.com/embed/rnTGBy2ax1c?rel=0&amp;controls=0&amp;showinfo=0\" frameborder=\"0\" allowfullscreen></iframe>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "#@title\n",
     "from IPython.display import HTML\n",
@@ -393,21 +355,33 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "🤗 Transformers models also supports training in TensorFlow with the Keras API."
+    "## Train a TensorFlow model with Keras"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Convert dataset to TensorFlow format"
+    "You can also train 🤗 Transformers models in TensorFlow with the Keras API!"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "The [DefaultDataCollator](https://huggingface.co/docs/transformers/main/en/main_classes/data_collator#transformers.DefaultDataCollator) assembles tensors into a batch for the model to train on. Make sure you specify `return_tensors` to return TensorFlow tensors:"
+    "### Loading data for Keras"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "When you want to train a 🤗 Transformers model with the Keras API, you need to convert your dataset to a format that\n",
+    "Keras understands. If your dataset is small, you can just convert the whole thing to NumPy arrays and pass it to Keras.\n",
+    "Let's try that first before we do anything more complicated.\n",
+    "\n",
+    "First, load a dataset. We'll use the CoLA dataset from the [GLUE benchmark](https://huggingface.co/datasets/glue),\n",
+    "since it's a simple binary text classification task, and just take the training split for now."
    ]
   },
   {
@@ -416,22 +390,33 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from transformers import DefaultDataCollator\n",
+    "from datasets import load_dataset\n",
     "\n",
-    "data_collator = DefaultDataCollator(return_tensors=\"tf\")"
+    "dataset = load_dataset(\"glue\", \"cola\")\n",
+    "dataset = dataset[\"train\"]      #Just take the training split for now"
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
+   "outputs": [],
    "source": [
-    "<Tip>\n",
-    "\n",
-    "[Trainer](https://huggingface.co/docs/transformers/main/en/main_classes/trainer#transformers.Trainer) uses [DataCollatorWithPadding](https://huggingface.co/docs/transformers/main/en/main_classes/data_collator#transformers.DataCollatorWithPadding) by default so you don't need to explicitly specify a data collator.\n",
+    "from transformers import AutoTokenizer\n",
     "\n",
-    "</Tip>\n",
+    "tokenizer = AutoTokenizer.from_pretrained(\"bert-base-cased\")\n",
+    "tokenized_data = tokenizer(dataset[\"sentence\"], return_tensors=\"np\", padding=True)\n",
+    "#Tokenizer returns a BatchEncoding, but we convert that to a dict for Keras\n",
+    "tokenized_data = dict(tokenized_data)\n",
     "\n",
-    "Next, convert the tokenized datasets to TensorFlow datasets with the [`to_tf_dataset`](https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.to_tf_dataset) method. Specify your inputs in `columns`, and your label in `label_cols`:"
+    "labels = np.array(datasets[\"label\"])   #Label is already an array of 0 and 1"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Finally, load, [`compile`](https://keras.io/api/models/model_training_apis/#compile-method), and [`fit`](https://keras.io/api/models/model_training_apis/#fit-method) the model. Note that Transformers models all have a default task-relevant loss function, so you don't need to specify one unless you want to:"
    ]
   },
   {
@@ -440,35 +425,57 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "tf_train_dataset = small_train_dataset.to_tf_dataset(\n",
-    "    columns=[\"attention_mask\", \"input_ids\", \"token_type_ids\"],\n",
-    "    label_cols=[\"labels\"],\n",
-    "    shuffle=True,\n",
-    "    collate_fn=data_collator,\n",
-    "    batch_size=8,\n",
-    ")\n",
-    "\n",
-    "tf_validation_dataset = small_eval_dataset.to_tf_dataset(\n",
-    "    columns=[\"attention_mask\", \"input_ids\", \"token_type_ids\"],\n",
-    "    label_cols=[\"labels\"],\n",
-    "    shuffle=False,\n",
-    "    collate_fn=data_collator,\n",
-    "    batch_size=8,\n",
-    ")"
+    "from transformers import TFAutoModelForSequenceClassification\n",
+    "from tensorflow.keras.optimizers import Adam\n",
+    "\n",
+    "# Load and compile our model\n",
+    "model = TFAutoModelForSequenceClassification.from_pretrained(\"bert-base-cased\")\n",
+    "# Lower learning rates are often better for fine-tuning transformers\n",
+    "model.compile(optimizer=Adam(3e-5))  # No loss argument!\n",
+    "\n",
+    "model.fit(tokenized_data, labels)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<Tip>\n",
+    "\n",
+    "You don't have to pass a loss argument to your models when you `compile()` them! Hugging Face models automatically\n",
+    "choose a loss that is appropriate for their task and model architecture if this argument is left blank. You can always\n",
+    "override this by specifying a loss yourself if you want to!\n",
+    "\n",
+    "</Tip>\n",
+    "\n",
+    "This approach works great for smaller datasets, but for larger datasets, you might find it starts to become a problem. Why?\n",
+    "Because the tokenized array and labels would have to be fully loaded into memory, and because NumPy doesn’t handle\n",
+    "“jagged” arrays, so every tokenized sample would have to be padded to the length of the longest sample in the whole\n",
+    "dataset. That’s going to make your array even bigger, and all those padding tokens will slow down training too!"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Compile and fit"
+    "### Loading data as a tf.data.Dataset"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Load a TensorFlow model with the expected number of labels:"
+    "If you want to avoid slowing down training, you can load your data as a `tf.data.Dataset` instead. Although you can write your own\n",
+    "`tf.data` pipeline if you want, we have two convenience methods for doing this:\n",
+    "\n",
+    "- [prepare_tf_dataset()](https://huggingface.co/docs/transformers/main/en/main_classes/model#transformers.TFPreTrainedModel.prepare_tf_dataset): This is the method we recommend in most cases. Because it is a method\n",
+    "on your model, it can inspect the model to automatically figure out which columns are usable as model inputs, and\n",
+    "discard the others to make a simpler, more performant dataset.\n",
+    "- [to_tf_dataset](https://huggingface.co/docs/datasets/main/en/package_reference/main_classes#datasets.Dataset.to_tf_dataset): This method is more low-level, and is useful when you want to exactly control how\n",
+    "your dataset is created, by specifying exactly which `columns` and `label_cols` to include.\n",
+    "\n",
+    "Before you can use [prepare_tf_dataset()](https://huggingface.co/docs/transformers/main/en/main_classes/model#transformers.TFPreTrainedModel.prepare_tf_dataset), you will need to add the tokenizer outputs to your dataset as columns, as shown in\n",
+    "the following code sample:"
    ]
   },
   {
@@ -477,17 +484,21 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import tensorflow as tf\n",
-    "from transformers import TFAutoModelForSequenceClassification\n",
+    "def tokenize_dataset(data):\n",
+    "    # Keys of the returned dictionary will be added to the dataset as columns\n",
+    "    return tokenizer(data[\"text\"])\n",
+    "\n",
     "\n",
-    "model = TFAutoModelForSequenceClassification.from_pretrained(\"bert-base-cased\", num_labels=5)"
+    "dataset = dataset.map(tokenize_dataset)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Then compile and fine-tune your model with [`fit`](https://keras.io/api/models/model_training_apis/) as you would with any other Keras model:"
+    "Remember that Hugging Face datasets are stored on disk by default, so this will not inflate your memory usage! Once the\n",
+    "columns have been added, you can stream batches from the dataset and add padding to each batch, which greatly\n",
+    "reduces the number of padding tokens compared to padding the entire dataset."
    ]
   },
   {
@@ -496,20 +507,33 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "model.compile(\n",
-    "    optimizer=tf.keras.optimizers.Adam(learning_rate=5e-5),\n",
-    "    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n",
-    "    metrics=tf.metrics.SparseCategoricalAccuracy(),\n",
-    ")\n",
-    "\n",
-    "model.fit(tf_train_dataset, validation_data=tf_validation_dataset, epochs=3)"
+    "tf_dataset = model.prepare_tf_dataset(dataset[\"train\"], batch_size=16, shuffle=True, tokenizer=tokenizer)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "<a id='pytorch_native'></a>"
+    "Note that in the code sample above, you need to pass the tokenizer to `prepare_tf_dataset` so it can correctly pad batches as they're loaded.\n",
+    "If all the samples in your dataset are the same length and no padding is necessary, you can skip this argument.\n",
+    "If you need to do something more complex than just padding samples (e.g. corrupting tokens for masked language\n",
+    "modelling), you can use the `collate_fn` argument instead to pass a function that will be called to transform the\n",
+    "list of samples into a batch and apply any preprocessing you want. See our\n",
+    "[examples](https://github.com/huggingface/transformers/tree/main/examples) or\n",
+    "[notebooks](https://huggingface.co/docs/transformers/notebooks) to see this approach in action.\n",
+    "\n",
+    "Once you've created a `tf.data.Dataset`, you can compile and fit the model as before:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model.compile(optimizer=Adam(3e-6))   #No loss argument!\n",
+    "\n",
+    "model.fit(tf_dataset)"
    ]
   },
   {
@@ -526,21 +550,7 @@
     "cellView": "form",
     "hide_input": true
    },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<iframe width=\"560\" height=\"315\" src=\"https://www.youtube.com/embed/Dh9CL8fyG80?rel=0&amp;controls=0&amp;showinfo=0\" frameborder=\"0\" allowfullscreen></iframe>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "#@title\n",
     "from IPython.display import HTML\n",
@@ -564,7 +574,7 @@
    "outputs": [],
    "source": [
     "del model\n",
-    "del pytorch_model\n",
+    "# del pytorch_model\n",
     "del trainer\n",
     "torch.cuda.empty_cache()"
    ]
@@ -771,7 +781,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Metrics"
+    "### Evaluate"
    ]
   },
   {
@@ -787,7 +797,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "metric = load_metric(\"accuracy\")\n",
+    "import evaluate\n",
+    "\n",
+    "metric = evaluate.load(\"accuracy\")\n",
     "model.eval()\n",
     "for batch in eval_dataloader:\n",
     "    batch = {k: v.to(device) for k, v in batch.items()}\n",
@@ -828,7 +840,25 @@
    ]
   }
  ],
- "metadata": {},
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.5"
+  }
+ },
  "nbformat": 4,
  "nbformat_minor": 4
 }