Adjust codeblock (as direct commit)

programminghistorian · Mar 14, 2024 · 2992636 · 2992636
1 parent 85862d6
commit 2992636
Showing 1 changed file with 29 additions and 18 deletions.
diff --git a/assets/clustering-visualizing-word-embeddings/clustering-visualizing-word-embeddings.ipynb b/assets/clustering-visualizing-word-embeddings/clustering-visualizing-word-embeddings.ipynb
@@ -423,24 +423,35 @@
       "source": [
         "# Adapted from https://stackoverflow.com/a/72503304\n",
         "import os\n",
-        "from io import BytesIO\n",
-        "from urllib.request import urlopen\n",
-        "from zipfile import ZipFile\n",
-        "\n",
-        "# Where is the Zipfile stored on Zenodo?\n",
-        "zipfile = 'clustering-visualizing-word-embeddings.zip'\n",
-        "zipurl  = f'https://zenodo.org/records/7948908/files/{zipfile}?download=1'\n",
-        "\n",
-        "# Open the remote Zipfile and read it directly into Python\n",
-        "with urlopen(zipurl) as zipresp:\n",
-        "    with ZipFile(BytesIO(zipresp.read())) as zf:\n",
-        "        for zfile in zf.namelist():\n",
-        "            if not zfile.startswith('__'): # Don't unpack hidden MacOSX junk\n",
-        "                print(f\"Extracting {zfile}\") # Update the user\n",
-        "                zf.extract(zfile,'.')\n",
-        "# And rename the unzipped directory to 'data' --\n",
-        "# IMPORTANT: Note that if 'data' already exists it will (probably) be silently overwritten.\n",
-        "os.rename('clustering-visualizing-word-embeddings','data')"
+        "import pandas as pd\n",
+        "\n",
+        "dn = 'data'\n",
+        "fn = 'ph-tutorial-data-cleaned.parquet'\n",
+        "\n",
+        "if not os.path.exists(os.path.join(dn,fn)):\n",
+        "    print(f\"Couldn't find {os.path.join('data',fn)}, downloading...\")\n",
+        "    from io import BytesIO\n",
+        "    from urllib.request import urlopen\n",
+        "    from zipfile import ZipFile\n",
+        "\n",
+        "    # Where is the Zipfile stored on Zenodo?\n",
+        "    zipfile = 'clustering-visualizing-word-embeddings.zip'\n",
+        "    zipurl  = f'https://zenodo.org/records/7948908/files/{zipfile}?download=1'\n",
+        "\n",
+        "    # Open the remote Zipfile and read it directly into Python\n",
+        "    with urlopen(zipurl) as zipresp:\n",
+        "        with ZipFile(BytesIO(zipresp.read())) as zf:\n",
+        "            for zfile in zf.namelist():\n",
+        "                if not zfile.startswith('__'): # Don't unpack hidden MacOSX junk\n",
+        "                    print(f\"Extracting {zfile}\") # Update the user\n",
+        "                    zf.extract(zfile,'.')\n",
+        "    print(\"  Downloaded.\")\n",
+        "    # And rename the unzipped directory to 'data' --\n",
+        "    # IMPORTANT: Note that if 'data' already exists it will (probably) be silently overwritten.\n",
+        "    os.rename('clustering-visualizing-word-embeddings',dn)\n",
+        "\n",
+        "print(f\"Loading {fn}\")\n",
+        "df = pd.read_parquet(os.path.join(dn,fn))"
       ],
       "id": "BdKlC83wlu8a"
     },