diff --git a/assets/clustering-visualizing-word-embeddings/clustering-visualizing-word-embeddings.ipynb b/assets/clustering-visualizing-word-embeddings/clustering-visualizing-word-embeddings.ipynb index af5c48cd4d..b28caf3d27 100644 --- a/assets/clustering-visualizing-word-embeddings/clustering-visualizing-word-embeddings.ipynb +++ b/assets/clustering-visualizing-word-embeddings/clustering-visualizing-word-embeddings.ipynb @@ -423,24 +423,35 @@ "source": [ "# Adapted from https://stackoverflow.com/a/72503304\n", "import os\n", - "from io import BytesIO\n", - "from urllib.request import urlopen\n", - "from zipfile import ZipFile\n", - "\n", - "# Where is the Zipfile stored on Zenodo?\n", - "zipfile = 'clustering-visualizing-word-embeddings.zip'\n", - "zipurl = f'https://zenodo.org/records/7948908/files/{zipfile}?download=1'\n", - "\n", - "# Open the remote Zipfile and read it directly into Python\n", - "with urlopen(zipurl) as zipresp:\n", - " with ZipFile(BytesIO(zipresp.read())) as zf:\n", - " for zfile in zf.namelist():\n", - " if not zfile.startswith('__'): # Don't unpack hidden MacOSX junk\n", - " print(f\"Extracting {zfile}\") # Update the user\n", - " zf.extract(zfile,'.')\n", - "# And rename the unzipped directory to 'data' --\n", - "# IMPORTANT: Note that if 'data' already exists it will (probably) be silently overwritten.\n", - "os.rename('clustering-visualizing-word-embeddings','data')" + "import pandas as pd\n", + "\n", + "dn = 'data'\n", + "fn = 'ph-tutorial-data-cleaned.parquet'\n", + "\n", + "if not os.path.exists(os.path.join(dn,fn)):\n", + " print(f\"Couldn't find {os.path.join('data',fn)}, downloading...\")\n", + " from io import BytesIO\n", + " from urllib.request import urlopen\n", + " from zipfile import ZipFile\n", + "\n", + " # Where is the Zipfile stored on Zenodo?\n", + " zipfile = 'clustering-visualizing-word-embeddings.zip'\n", + " zipurl = f'https://zenodo.org/records/7948908/files/{zipfile}?download=1'\n", + "\n", + " # Open the remote Zipfile and read it directly into Python\n", + " with urlopen(zipurl) as zipresp:\n", + " with ZipFile(BytesIO(zipresp.read())) as zf:\n", + " for zfile in zf.namelist():\n", + " if not zfile.startswith('__'): # Don't unpack hidden MacOSX junk\n", + " print(f\"Extracting {zfile}\") # Update the user\n", + " zf.extract(zfile,'.')\n", + " print(\" Downloaded.\")\n", + " # And rename the unzipped directory to 'data' --\n", + " # IMPORTANT: Note that if 'data' already exists it will (probably) be silently overwritten.\n", + " os.rename('clustering-visualizing-word-embeddings',dn)\n", + "\n", + "print(f\"Loading {fn}\")\n", + "df = pd.read_parquet(os.path.join(dn,fn))" ], "id": "BdKlC83wlu8a" },