Skip to content

Commit

Permalink
Adjust codeblock (as direct commit)
Browse files Browse the repository at this point in the history
  • Loading branch information
anisa-hawes committed Mar 14, 2024
1 parent 85862d6 commit 2992636
Showing 1 changed file with 29 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -423,24 +423,35 @@
"source": [
"# Adapted from https://stackoverflow.com/a/72503304\n",
"import os\n",
"from io import BytesIO\n",
"from urllib.request import urlopen\n",
"from zipfile import ZipFile\n",
"\n",
"# Where is the Zipfile stored on Zenodo?\n",
"zipfile = 'clustering-visualizing-word-embeddings.zip'\n",
"zipurl = f'https://zenodo.org/records/7948908/files/{zipfile}?download=1'\n",
"\n",
"# Open the remote Zipfile and read it directly into Python\n",
"with urlopen(zipurl) as zipresp:\n",
" with ZipFile(BytesIO(zipresp.read())) as zf:\n",
" for zfile in zf.namelist():\n",
" if not zfile.startswith('__'): # Don't unpack hidden MacOSX junk\n",
" print(f\"Extracting {zfile}\") # Update the user\n",
" zf.extract(zfile,'.')\n",
"# And rename the unzipped directory to 'data' --\n",
"# IMPORTANT: Note that if 'data' already exists it will (probably) be silently overwritten.\n",
"os.rename('clustering-visualizing-word-embeddings','data')"
"import pandas as pd\n",
"\n",
"dn = 'data'\n",
"fn = 'ph-tutorial-data-cleaned.parquet'\n",
"\n",
"if not os.path.exists(os.path.join(dn,fn)):\n",
" print(f\"Couldn't find {os.path.join('data',fn)}, downloading...\")\n",
" from io import BytesIO\n",
" from urllib.request import urlopen\n",
" from zipfile import ZipFile\n",
"\n",
" # Where is the Zipfile stored on Zenodo?\n",
" zipfile = 'clustering-visualizing-word-embeddings.zip'\n",
" zipurl = f'https://zenodo.org/records/7948908/files/{zipfile}?download=1'\n",
"\n",
" # Open the remote Zipfile and read it directly into Python\n",
" with urlopen(zipurl) as zipresp:\n",
" with ZipFile(BytesIO(zipresp.read())) as zf:\n",
" for zfile in zf.namelist():\n",
" if not zfile.startswith('__'): # Don't unpack hidden MacOSX junk\n",
" print(f\"Extracting {zfile}\") # Update the user\n",
" zf.extract(zfile,'.')\n",
" print(\" Downloaded.\")\n",
" # And rename the unzipped directory to 'data' --\n",
" # IMPORTANT: Note that if 'data' already exists it will (probably) be silently overwritten.\n",
" os.rename('clustering-visualizing-word-embeddings',dn)\n",
"\n",
"print(f\"Loading {fn}\")\n",
"df = pd.read_parquet(os.path.join(dn,fn))"
],
"id": "BdKlC83wlu8a"
},
Expand Down

0 comments on commit 2992636

Please sign in to comment.