instructlab · anastasds · Jun 10, 2025 · Jun 10, 2025 · alimaredia · Jun 11, 2025
@@ -260,7 +260,9 @@
    "source": [
     "### View the Chunks\n",
     "\n",
-    "To view the chunks, run through the following cell. As you can see the document is broken into small pieces with metadata about the chunk based on the document's format"
+    "To view a random sample of the generated chunks, run through the following cell. Re-run the cell to see another random sample. If there are chunking issues, try different conversion and/or chunking configurations.\n",
+    "\n",
+    "As you can see, the document is broken into small pieces with metadata about the chunk based on the document's format."
    ]
   },
   {
@@ -270,8 +272,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "#print(all_chunks)\n",
-    "print(chunks[0])"
+    "import random\n",
+    "import json\n",
+    "\n",
+    "sample = random.sample(all_chunks, min(len(all_chunks), 5))\n",
+    "for chunk in sample:\n",
+    "    print(f\"{chunk['chunk']}\\n\\n====\\n\")"
    ]
   },
   {
@@ -681,7 +687,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": ".venv",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -695,7 +701,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.12"
+   "version": "3.11.9"
   }
  },
  "nbformat": 4,