matsim-org · chtozaivan · Oct 25, 2024
diff --git a/LinkedIn/03_02_loadingtext.ipynb b/LinkedIn/03_02_loadingtext.ipynb
diff --git a/LinkedIn/03_03_findinganagrams.ipynb b/LinkedIn/03_03_findinganagrams.ipynb
@@ -0,0 +1,291 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 03_03: Finding Anagrams"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import math\n",
+    "import collections\n",
+    "\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import matplotlib.pyplot as pp\n",
+    "\n",
+    "%matplotlib inline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "words = sorted({line.strip().lower() for line in open('words.txt', 'r')})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sorted(\"aaron\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sorted(\"elvis\") == sorted(\"lives\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sorted(\"elvis\") == sorted(\"sings\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "'-'.join(sorted(\"aaron\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "''.join(sorted(\"aaron\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# compute the signature string for a word\n",
+    "\n",
+    "def signature(word):\n",
+    "    return ''.join(sorted(word))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# brute-force anagram search: compare myword's signature\n",
+    "# with the signatures of all words in the dictionary\n",
+    "\n",
+    "def find_anagram(myword):\n",
+    "    mysig = signature(myword)\n",
+    "    \n",
+    "    for word in words:\n",
+    "        if mysig == signature(word):\n",
+    "            print(word)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "find_anagram('dictionary')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%time find_anagram('dictionary')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# make a dict that maps each signature to the set of words with that signature;\n",
+    "# each signature will map to at least one word\n",
+    "\n",
+    "words_by_sig = collections.defaultdict(set)\n",
+    "\n",
+    "for word in words:\n",
+    "    words_by_sig[signature(word)].add(word)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "words_by_sig"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# keep only the key/value pairs where the set has more than one element;\n",
+    "# this is now a regular dict\n",
+    "\n",
+    "anagrams_by_sig = {sig: wordset for sig, wordset in words_by_sig.items() if len(wordset) > 1}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "anagrams_by_sig"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# smart anagram search: look up myword's signature, return set\n",
+    "\n",
+    "def find_anagram_fast(myword):\n",
+    "    sig = signature(myword)\n",
+    "    \n",
+    "    return anagrams_by_sig[sig]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "find_anagram_fast('tops')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "find_anagram_fast('michele')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# handle case when myword's signature is not found, returning the empty set\n",
+    "\n",
+    "def find_anagram_fast(myword):\n",
+    "    sig = signature(myword)\n",
+    "\n",
+    "    try:\n",
+    "        return anagrams_by_sig[sig]\n",
+    "    except KeyError:\n",
+    "        return set()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "find_anagram_fast('Michele')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%time find_anagram_fast('Michele')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# list of signatures, sorted by length, longest first\n",
+    "sorted(anagrams_by_sig.keys(), key=len, reverse=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# list of anagram sets, sorted by signature length\n",
+    "[anagrams_by_sig[sig] for sig in sorted(anagrams_by_sig.keys(), key=len, reverse=True)]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# list of anagram sets, sorted by their length, largest first\n",
+    "sorted(anagrams_by_sig.values(), key=len, reverse=True)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}