diff --git a/Preprocess Stock Data.ipynb b/Preprocess Stock Data.ipynb new file mode 100644 index 0000000..729226f --- /dev/null +++ b/Preprocess Stock Data.ipynb @@ -0,0 +1,129 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Import to Postgres\n", + "\n", + "Load the prices and fundamentals into postgres." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "```shell\n", + "DB_NAME=stocks pgfutter csv prices.csv\n", + "DB_NAME=stocks pgfutter csv --fields \"ticker_indicator,date,value\" fundamentals.csv\n", + "```\n", + "\n", + "Setup the prices table.\n", + "\n", + "```sql\n", + "ALTER TABLE import.prices\n", + "SET SCHEMA public;\n", + "\n", + "ALTER TABLE prices ALTER COLUMN \"date\" \n", + "SET DATA TYPE date \n", + "USING date::date;\n", + "\n", + "CREATE INDEX idx_prices_date \n", + "ON prices(date);\n", + "```\n", + "\n", + "Setup the fundamentals materialized view.\n", + "\n", + "```sql\n", + "ALTER TABLE import.fundamentals ALTER COLUMN \"date\" \n", + "SET DATA TYPE date \n", + "USING date::date;\n", + "\n", + "CREATE MATERIALIZED VIEW fundamentals AS\n", + "SELECT split_part(ticker_indicator, '_', 1) as ticker, date, value as epsdil \n", + "FROM import.fundamentals\n", + "WHERE ticker_indicator LIKE '%EPSDIL_MRT';\n", + "\n", + "CREATE INDEX idx_fundamentals_date \n", + "ON fundamentals(date);\n", + "\n", + "CREATE INDEX idx_fundamentals_ticker \n", + "ON fundamentals(ticker);\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Query and Export\n", + "\n", + "Export the prices with fundamentals. This took about 40 minutes." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "```sql\n", + "COPY \n", + "\n", + "(SELECT p.*, \n", + "\n", + "CASE p.epsdil::numeric\n", + "\tWHEN 0 THEN 0\n", + "\tELSE p.adj_close::numeric / p.epsdil::numeric\n", + "END as pe\n", + "\n", + "FROM (\n", + "\tSELECT prices.adj_close, prices.date, prices.ticker,\n", + "\t(\n", + "\t\tSELECT fundamentals.epsdil\n", + "\t\tFROM fundamentals\n", + "\t\tWHERE prices.ticker = fundamentals.ticker\n", + "\t\tAND prices.date >= fundamentals.date \n", + "\t\tORDER BY fundamentals.date DESC\n", + "\t\tLIMIT 1\n", + "\t)\n", + "\tFROM prices\n", + "\tWHERE prices.adj_close IS NOT NULL AND prices.adj_close != ''\n", + ") p\n", + "WHERE p.epsdil IS NOT NULL AND p.epsdil != ''\n", + ")\n", + "\n", + "TO 'prices.csv' (format CSV);\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/README.md b/README.md new file mode 100644 index 0000000..61cac3d --- /dev/null +++ b/README.md @@ -0,0 +1,5 @@ +## Run on floydhub + +```shell +floyd run --gpu --mode jupyter --env tensorflow-1.0 +``` diff --git a/floyd_requirements.txt b/floyd_requirements.txt new file mode 100644 index 0000000..78620c4 --- /dev/null +++ b/floyd_requirements.txt @@ -0,0 +1 @@ +tqdm diff --git a/stock2vec.ipynb b/stock2vec.ipynb index 7a9fe66..2889a18 100644 --- a/stock2vec.ipynb +++ b/stock2vec.ipynb @@ -18,7 +18,6 @@ "outputs": [], "source": [ "import csv\n", - "# from dateutil.relativedelta import relativedelta\n", "import datetime\n", "import math\n", "import numpy as np\n", @@ -39,7 +38,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 56, "metadata": { "collapsed": false }, @@ -48,7 +47,64 @@ "name": "stderr", "output_type": "stream", "text": [ - "rows: 95000000it [02:14, 703865.57it/s] \n" + "Prices: 3%|▎ | 13.5M/485M [00:01<00:59, 7.95MB/s] \n" + ] + }, + { + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 19\u001b[0m \u001b[0;34m'https://s3.amazonaws.com/perl-ml/prices.csv?response-content-disposition=attachment&X-Amz-Security-Token=FQoDYXdzEN3%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEaDDGDXIQxfhjhlalnoyKqAiqteedReEObibGFinGZUTbCNLqOsBrBfhb3m%2B9WSc202KdlXdoi8bxYATvctErFAeNF%2FlVgdPlu%2BRy8dLOHw5a%2BvTNM92V8V1XiJnuYgpe69GI914L1xceQGmcJ9qQ1Fg2iSi5cGj2%2FNL26CHIOmdblBGp6VUFUqtu0ZoRb18XXYBlSGQIGk4kxGfwiN5%2BbnQNB%2FInBx0YkDI5XFOIOXa1HzF4anoHgoSSjwdq8FXLQh8LXD5mYvqkTLokIssfZeJrc4TyPy9gZW4hewwbI4NAauQvJfde2Z%2BA%2B5iV4%2B%2B8wFFcDMeM%2Fg%2BYyrTVhaRVZ%2FIU033J6CXshjaL0uHwFleXw%2FHlzMjQst2YZmQu0EqxNCowwwxcugVsKcMaPdMq%2BWJ66qWxN5DZcC3oo%2FJvbxgU%3D&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Date=20170325T201144Z&X-Amz-SignedHeaders=host&X-Amz-Expires=300&X-Amz-Credential=ASIAIZGCD6XQ355X2AMA%2F20170325%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Signature=0c2703d3dbef5f58006a3b7e89ff85b2b86e67542b861784fbff2da48434e0df'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 20\u001b[0m \u001b[0mdataset_filename\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 21\u001b[0;31m pbar.hook)\n\u001b[0m", + "\u001b[0;32m/Users/jperl/anaconda/envs/dlnd/lib/python3.6/urllib/request.py\u001b[0m in \u001b[0;36murlretrieve\u001b[0;34m(url, filename, reporthook, data)\u001b[0m\n\u001b[1;32m 275\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 276\u001b[0m \u001b[0;32mwhile\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 277\u001b[0;31m \u001b[0mblock\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 278\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mblock\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 279\u001b[0m \u001b[0;32mbreak\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Users/jperl/anaconda/envs/dlnd/lib/python3.6/http/client.py\u001b[0m in \u001b[0;36mread\u001b[0;34m(self, amt)\u001b[0m\n\u001b[1;32m 447\u001b[0m \u001b[0;31m# Amount is given, implement using readinto\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 448\u001b[0m \u001b[0mb\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbytearray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mamt\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 449\u001b[0;31m \u001b[0mn\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreadinto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 450\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mmemoryview\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mn\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtobytes\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 451\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Users/jperl/anaconda/envs/dlnd/lib/python3.6/http/client.py\u001b[0m in \u001b[0;36mreadinto\u001b[0;34m(self, b)\u001b[0m\n\u001b[1;32m 491\u001b[0m \u001b[0;31m# connection, and the user is reading more bytes than will be provided\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 492\u001b[0m \u001b[0;31m# (for example, reading in 1k chunks)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 493\u001b[0;31m \u001b[0mn\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreadinto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 494\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mn\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mb\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 495\u001b[0m \u001b[0;31m# Ideally, we would raise IncompleteRead if the content-length\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Users/jperl/anaconda/envs/dlnd/lib/python3.6/socket.py\u001b[0m in \u001b[0;36mreadinto\u001b[0;34m(self, b)\u001b[0m\n\u001b[1;32m 584\u001b[0m \u001b[0;32mwhile\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 585\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 586\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sock\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrecv_into\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 587\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 588\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_timeout_occurred\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Users/jperl/anaconda/envs/dlnd/lib/python3.6/ssl.py\u001b[0m in \u001b[0;36mrecv_into\u001b[0;34m(self, buffer, nbytes, flags)\u001b[0m\n\u001b[1;32m 1000\u001b[0m \u001b[0;34m\"non-zero flags not allowed in calls to recv_into() on %s\"\u001b[0m \u001b[0;34m%\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1001\u001b[0m self.__class__)\n\u001b[0;32m-> 1002\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnbytes\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbuffer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1003\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1004\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0msocket\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrecv_into\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbuffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnbytes\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mflags\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Users/jperl/anaconda/envs/dlnd/lib/python3.6/ssl.py\u001b[0m in \u001b[0;36mread\u001b[0;34m(self, len, buffer)\u001b[0m\n\u001b[1;32m 863\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Read on closed or unwrapped SSL socket.\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 864\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 865\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sslobj\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbuffer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 866\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mSSLError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 867\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mSSL_ERROR_EOF\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msuppress_ragged_eofs\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Users/jperl/anaconda/envs/dlnd/lib/python3.6/ssl.py\u001b[0m in \u001b[0;36mread\u001b[0;34m(self, len, buffer)\u001b[0m\n\u001b[1;32m 623\u001b[0m \"\"\"\n\u001b[1;32m 624\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mbuffer\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 625\u001b[0;31m \u001b[0mv\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sslobj\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbuffer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 626\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 627\u001b[0m \u001b[0mv\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sslobj\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mKeyboardInterrupt\u001b[0m: " + ] + } + ], + "source": [ + "from urllib.request import urlretrieve\n", + "from os.path import isfile, isdir\n", + "\n", + "dataset_folder_path = 'input'\n", + "dataset_filename = 'input/prices.csv'\n", + "dataset_name = 'Prices'\n", + "\n", + "class DLProgress(tqdm):\n", + " last_block = 0\n", + "\n", + " def hook(self, block_num=1, block_size=1, total_size=None):\n", + " self.total = total_size\n", + " self.update((block_num - self.last_block) * block_size)\n", + " self.last_block = block_num\n", + "\n", + "if not isfile(dataset_filename):\n", + " with DLProgress(unit='B', unit_scale=True, miniters=1, desc=dataset_name) as pbar:\n", + " urlretrieve(\n", + " 'https://s3.amazonaws.com/perl-ml/prices.csv?response-content-disposition=attachment&X-Amz-Security-Token=FQoDYXdzEN3%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEaDDGDXIQxfhjhlalnoyKqAiqteedReEObibGFinGZUTbCNLqOsBrBfhb3m%2B9WSc202KdlXdoi8bxYATvctErFAeNF%2FlVgdPlu%2BRy8dLOHw5a%2BvTNM92V8V1XiJnuYgpe69GI914L1xceQGmcJ9qQ1Fg2iSi5cGj2%2FNL26CHIOmdblBGp6VUFUqtu0ZoRb18XXYBlSGQIGk4kxGfwiN5%2BbnQNB%2FInBx0YkDI5XFOIOXa1HzF4anoHgoSSjwdq8FXLQh8LXD5mYvqkTLokIssfZeJrc4TyPy9gZW4hewwbI4NAauQvJfde2Z%2BA%2B5iV4%2B%2B8wFFcDMeM%2Fg%2BYyrTVhaRVZ%2FIU033J6CXshjaL0uHwFleXw%2FHlzMjQst2YZmQu0EqxNCowwwxcugVsKcMaPdMq%2BWJ66qWxN5DZcC3oo%2FJvbxgU%3D&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Date=20170325T201144Z&X-Amz-SignedHeaders=host&X-Amz-Expires=300&X-Amz-Credential=ASIAIZGCD6XQ355X2AMA%2F20170325%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Signature=0c2703d3dbef5f58006a3b7e89ff85b2b86e67542b861784fbff2da48434e0df',\n", + " dataset_filename,\n", + " pbar.hook)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "rows: 10000000it [00:15, 607491.88it/s] \n" ] }, { @@ -59,277 +115,128 @@ " \n", " \n", " \n", - " 0\n", - " 1\n", - " 2\n", + " adj_close\n", + " date\n", + " ticker\n", + " epsdil\n", + " pe\n", " \n", " \n", " \n", " \n", - " 65171973\n", - " PEP\n", - " 2001-06-16\n", - " 1.50\n", + " 45526\n", + " 13.745073\n", + " 2001-06-18\n", + " OLED\n", + " -0.87\n", + " -15.798935\n", " \n", " \n", - " 80257284\n", - " SVU\n", - " 2001-06-16\n", - " 0.47\n", + " 3046709\n", + " 6.147269\n", + " 2001-06-18\n", + " YUM\n", + " 0.69\n", + " 8.909085\n", " \n", " \n", - " 80572836\n", + " 1778402\n", + " 43.419875\n", + " 2001-06-18\n", " SWY\n", - " 2001-06-16\n", " 2.31\n", + " 18.796483\n", " \n", " \n", - " 93428968\n", - " YUM\n", - " 2001-06-16\n", - " 0.69\n", + " 315864\n", + " 29.645033\n", + " 2001-06-18\n", + " PEP\n", + " 1.50\n", + " 19.763356\n", " \n", " \n", - " 62307830\n", - " OLED\n", - " 2001-06-17\n", - " -0.87\n", + " 1743245\n", + " 11.213455\n", + " 2001-06-18\n", + " SVU\n", + " 0.47\n", + " 23.858415\n", " \n", " \n", "\n", "" ], "text/plain": [ - " 0 1 2\n", - "65171973 PEP 2001-06-16 1.50\n", - "80257284 SVU 2001-06-16 0.47\n", - "80572836 SWY 2001-06-16 2.31\n", - "93428968 YUM 2001-06-16 0.69\n", - "62307830 OLED 2001-06-17 -0.87" + " adj_close date ticker epsdil pe\n", + "45526 13.745073 2001-06-18 OLED -0.87 -15.798935\n", + "3046709 6.147269 2001-06-18 YUM 0.69 8.909085\n", + "1778402 43.419875 2001-06-18 SWY 2.31 18.796483\n", + "315864 29.645033 2001-06-18 PEP 1.50 19.763356\n", + "1743245 11.213455 2001-06-18 SVU 0.47 23.858415" ] }, - "execution_count": 2, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "chunksize = 1000000\n", - "fund_rows = 94011143\n", + "price_rows = 9191528\n", "\n", - "fund_reader = pd.read_csv('input/sharadar_fundamentals.csv', \n", + "price_reader = pd.read_csv('input/prices.csv', \n", " header=None,\n", " parse_dates=[1],\n", " chunksize=chunksize, \n", " iterator=True)\n", "\n", - "df_fund = pd.DataFrame()\n", - "\n", - "with tqdm(total=fund_rows, desc='rows') as pbar:\n", - " for chunk in fund_reader:\n", - " # Select diluted earnings per share data\n", - " chunk = chunk[chunk[0].str.contains('EPSDIL_MRT')]\n", - " # Extract the ticker\n", - " chunk[0] = chunk[0].str.extract('(.+?(?=_))', expand=False)\n", - " df_fund = df_fund.append(chunk)\n", - " pbar.update(chunksize);\n", - "\n", - "# Sort by date, then ticker\n", - "df_fund.sort_values([1, 0], inplace=True)\n", - "\n", - "earliest_date = df_fund.values[0][1]\n", - "\n", - "df_fund.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Load the ticker prices." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "rows: 15000000it [00:21, 693364.20it/s] \n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " ticker date adj_close\n", - "397 A 2001-06-18 28.510446\n", - "12636 AAN 2001-06-18 4.669972\n", - "19603 AAON 2001-06-18 1.883050\n", - "32593 AAPL 2001-06-18 1.323831\n", - "43324 ABAX 2001-06-18 5.458700\n" - ] - } - ], - "source": [ - "price_rows = 14684263\n", - "\n", - "# Import the prices\n", - "\n", - "prices_reader = pd.read_csv('input/wiki_prices.csv', \n", - " chunksize=chunksize,\n", - " parse_dates=['date'],\n", - " usecols=['adj_close', 'date', 'ticker'])\n", - "\n", "df_prices = pd.DataFrame()\n", "\n", "with tqdm(total=price_rows, desc='rows') as pbar:\n", - " for chunk in prices_reader:\n", - " chunk = chunk[pd.notnull(chunk['adj_close'])] \n", - " chunk = chunk[chunk['date'] >= earliest_date]\n", + " for chunk in price_reader:\n", " df_prices = df_prices.append(chunk)\n", " pbar.update(chunksize);\n", "\n", - "df_prices.sort_values(['date', 'ticker'], inplace=True)\n", + "df_prices.columns = ['adj_close', 'date', 'ticker', 'epsdil', 'pe']\n", "\n", - "print(df_prices.head())" + "# Sort by date, then ticker\n", + "df_prices.sort_values(['date', 'pe'], inplace=True)\n", + "\n", + "df_prices.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Append diluted earnings per share." + "## Build context\n", + "\n", + "For each stock, find C stocks that have the closest volatility to that ticker for that day." ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 46, "metadata": { "collapsed": false }, "outputs": [ { - "name": "stderr", + "name": "stdout", "output_type": "stream", "text": [ - "rows: 0%| | 1631/9969557 [01:13<106:16:34, 26.05it/s]\n" - ] - }, - { - "ename": "KeyboardInterrupt", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 28\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 29\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mtqdm\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtotal\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdf_prices\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdesc\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'rows'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mpbar\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 30\u001b[0;31m \u001b[0mfund_cols\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf_prices\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mr\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mget_fund_cols\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpbar\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 31\u001b[0m \u001b[0mfund_cols\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m'fund_date'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'epsdil'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'pe'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 32\u001b[0m \u001b[0mdf_prices\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf_prices\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfund_cols\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/Users/jperl/anaconda/envs/dlnd/lib/python3.6/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36mapply\u001b[0;34m(self, func, axis, broadcast, raw, reduce, args, **kwds)\u001b[0m\n\u001b[1;32m 4150\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mreduce\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4151\u001b[0m \u001b[0mreduce\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 4152\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_apply_standard\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreduce\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mreduce\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4153\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4154\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_apply_broadcast\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/Users/jperl/anaconda/envs/dlnd/lib/python3.6/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m_apply_standard\u001b[0;34m(self, func, axis, ignore_failures, reduce)\u001b[0m\n\u001b[1;32m 4246\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4247\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mseries_gen\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 4248\u001b[0;31m \u001b[0mresults\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4249\u001b[0m \u001b[0mkeys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4250\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m(r)\u001b[0m\n\u001b[1;32m 28\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 29\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mtqdm\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtotal\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdf_prices\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdesc\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'rows'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mpbar\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 30\u001b[0;31m \u001b[0mfund_cols\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf_prices\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mr\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mget_fund_cols\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpbar\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 31\u001b[0m \u001b[0mfund_cols\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m'fund_date'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'epsdil'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'pe'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 32\u001b[0m \u001b[0mdf_prices\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf_prices\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfund_cols\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m\u001b[0m in \u001b[0;36mget_fund_cols\u001b[0;34m(row, pbar)\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[0mpbar\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 17\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 18\u001b[0;31m \u001b[0mfund\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mget_fund\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrow\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdate\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrow\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mticker\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 19\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 20\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mfund\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m\u001b[0m in \u001b[0;36mget_fund\u001b[0;34m(date, ticker)\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mget_fund\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdate\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mticker\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;31m# Find the most recent fundamentals before the date\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mfund\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf_fund\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf_fund\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mticker\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m&\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mdf_fund\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m<=\u001b[0m \u001b[0mdate\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtail\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mfund\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/Users/jperl/anaconda/envs/dlnd/lib/python3.6/site-packages/pandas/core/ops.py\u001b[0m in \u001b[0;36mwrapper\u001b[0;34m(self, other, axis)\u001b[0m\n\u001b[1;32m 853\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 854\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0merrstate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mall\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'ignore'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 855\u001b[0;31m \u001b[0mres\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mna_op\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mother\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 856\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misscalar\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mres\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 857\u001b[0m raise TypeError('Could not compare %s type with Series' %\n", - "\u001b[0;32m/Users/jperl/anaconda/envs/dlnd/lib/python3.6/site-packages/pandas/core/ops.py\u001b[0m in \u001b[0;36mna_op\u001b[0;34m(x, y)\u001b[0m\n\u001b[1;32m 757\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 758\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mis_object_dtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 759\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_comp_method_OBJECT_ARRAY\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mop\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 760\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 761\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/Users/jperl/anaconda/envs/dlnd/lib/python3.6/site-packages/pandas/core/ops.py\u001b[0m in \u001b[0;36m_comp_method_OBJECT_ARRAY\u001b[0;34m(op, x, y)\u001b[0m\n\u001b[1;32m 737\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvec_compare\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mop\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 738\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 739\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mscalar_compare\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mop\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 740\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 741\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m: " + "window for 0 OLED 0\n", + "1 YUM\n", + "window for 5 OLED 0\n", + "1 YUM\n", + "2 SWY\n", + "3 PEP\n", + "4 SVU\n", + "3019\n" ] } ], - "source": [ - "def get_fund(date, ticker):\n", - " # Find the most recent fundamentals before the date\n", - " fund = df_fund[(df_fund[0] == ticker) & (df_fund[1] <= date)].tail(1) \n", - " if fund.shape[0] < 1:\n", - " return None\n", - " return fund.values[0]\n", - "\n", - "def price_to_earnings(price, epsdil):\n", - " if epsdil is None:\n", - " return None\n", - " if epsdil == 0:\n", - " return 0\n", - " return price / epsdil\n", - "\n", - "def get_fund_cols(row, pbar):\n", - " pbar.update()\n", - " \n", - " fund = get_fund(row.date, row.ticker)\n", - " \n", - " if fund is None:\n", - " return pd.Series([None, None, None])\n", - " \n", - " date = fund[1]\n", - " epsdil = fund[2]\n", - " pe = price_to_earnings(row.adj_close, epsdil)\n", - " \n", - " return pd.Series([date, epsdil, pe])\n", - "\n", - "with tqdm(total=df_prices.shape[0], desc='rows') as pbar:\n", - " fund_cols = df_prices.apply(lambda r: get_fund_cols(r, pbar), axis=1)\n", - " fund_cols.columns = ['fund_date', 'epsdil', 'pe']\n", - " df_prices = df_prices.join(fund_cols)\n", - "\n", - "print(df_prices.head())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Preprocessing\n", - "\n", - "Filter ticks for the past ~15 years of stocks with volume > 10000 and volatility > 0." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "df = df[df['date'] >= datetime.date(2016,1,1)]\n", - "df = df[df['adj_volume'] > 10000]\n", - "del df['adj_volume']\n", - "\n", - "df['volt'] = (df['adj_high'] - df['adj_low']) / df['adj_close']\n", - "del df['adj_high']\n", - "del df['adj_low']\n", - "del df['adj_close']\n", - "\n", - "df = df[df['volt'] > 0]\n", - "\n", - "df.sort_values(['date', 'volt'], inplace=True)\n", - "\n", - "n_stocks = len(df['ticker'].unique())\n", - "print('n_stocks', n_stocks)\n", - "\n", - "print(df.head())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Get Nearby Stocks**\n", - "\n", - "For each stock, find C stocks that have the closest volatility to that ticker for that day." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], "source": [ "ticker_to_int = {}\n", "int_to_ticker = {}\n", @@ -342,10 +249,10 @@ " return key\n", "\n", "def get_stock_date(stocks, idx):\n", - " return stocks.iloc[[idx], 1].values[0]\n", + " return stocks.iloc[idx][1]\n", "\n", "def get_stock_ticker(stocks, idx):\n", - " return stocks.iloc[[idx], 0].values[0]\n", + " return stocks.iloc[idx][2]\n", "\n", "def get_stock_int(stocks, idx):\n", " return get_ticker_int(get_stock_ticker(stocks, idx))\n", @@ -368,15 +275,15 @@ " \n", " return window\n", "\n", - "for idx in range(0, 10, 5):\n", - " print('window for', get_stock_ticker(df, idx), get_stock_int(df, idx))\n", - " for nearby_int in get_window(df, idx, 5):\n", + "for idx in range(0, 20, 10):\n", + " print('window for', idx, get_stock_ticker(df_prices, idx), get_stock_int(df_prices, idx))\n", + " for nearby_int in get_window(df_prices, idx, 5):\n", " print(nearby_int, int_to_ticker[nearby_int])" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 44, "metadata": { "collapsed": false }, @@ -405,12 +312,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 47, "metadata": { "collapsed": false }, "outputs": [], "source": [ + "n_stocks = len(df_prices['ticker'].unique())\n", + "\n", "train_graph = tf.Graph()\n", "with train_graph.as_default():\n", " inputs = tf.placeholder(tf.int32, [None], name='inputs')\n", @@ -432,7 +341,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 48, "metadata": { "collapsed": false }, @@ -456,7 +365,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 49, "metadata": { "collapsed": false }, @@ -489,15 +398,500 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 52, "metadata": { "collapsed": false }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/10 Iteration: 10 Avg. Training loss: 5.2061 1.8950 sec/batch\n", + "Epoch 1/10 Iteration: 20 Avg. Training loss: 5.0781 1.9572 sec/batch\n", + "Epoch 1/10 Iteration: 30 Avg. Training loss: 4.9133 2.0237 sec/batch\n", + "Epoch 1/10 Iteration: 40 Avg. Training loss: 4.7580 2.0461 sec/batch\n", + "Epoch 1/10 Iteration: 50 Avg. Training loss: 4.7297 1.9665 sec/batch\n", + "Epoch 1/10 Iteration: 60 Avg. Training loss: 4.5833 2.0258 sec/batch\n", + "Epoch 1/10 Iteration: 70 Avg. Training loss: 4.6288 2.0546 sec/batch\n", + "Epoch 1/10 Iteration: 80 Avg. Training loss: 4.6524 2.1314 sec/batch\n", + "Epoch 1/10 Iteration: 90 Avg. Training loss: 4.7750 1.9557 sec/batch\n", + "Epoch 1/10 Iteration: 100 Avg. Training loss: 4.6779 1.9280 sec/batch\n", + "nearest[k] 2444\n", + "nearest[k] 2899\n", + "nearest[k] 1547\n", + "Nearest to ACAS: CATO, FAST, VLGEA, LM, MYE,\n", + "nearest[k] 1840\n", + "nearest[k] 2568\n", + "nearest[k] 2191\n", + "Nearest to RTN: GLF, ADS, HOLX, DDR, CAC,\n", + "nearest[k] 1922\n", + "nearest[k] 1977\n", + "nearest[k] 2228\n", + "nearest[k] 2757\n", + "nearest[k] 2825\n", + "Nearest to SYKE: GNCMA, PPL, BCOR,\n", + "nearest[k] 2439\n", + "nearest[k] 2354\n", + "nearest[k] 1745\n", + "Nearest to USAK: SENEA, CBB, MSFT, MSI, SPNC,\n", + "nearest[k] 2088\n", + "nearest[k] 2785\n", + "nearest[k] 2370\n", + "nearest[k] 1793\n", + "nearest[k] 1818\n", + "nearest[k] 2747\n", + "nearest[k] 2705\n", + "Nearest to IMMU: STT,\n", + "nearest[k] 1603\n", + "nearest[k] 1657\n", + "nearest[k] 2324\n", + "nearest[k] 2458\n", + "Nearest to UFPT: PHX, CINF, CHS, RMTI,\n", + "nearest[k] 2562\n", + "nearest[k] 2991\n", + "nearest[k] 1769\n", + "nearest[k] 1900\n", + "Nearest to SENEA: USAK, RE, COLB, ALR,\n", + "nearest[k] 1679\n", + "nearest[k] 2406\n", + "Nearest to MOD: PHM, ENZN, CTG, DST, FBC, VICR,\n", + "nearest[k] 2220\n", + "nearest[k] 2278\n", + "nearest[k] 1632\n", + "nearest[k] 2772\n", + "nearest[k] 2374\n", + "nearest[k] 1547\n", + "Nearest to AMG: LAMR, AAN,\n", + "nearest[k] 2098\n", + "nearest[k] 2526\n", + "Nearest to ORLY: MIND, GAIA, STAR, XEL, RGR, LCI,\n", + "nearest[k] 2316\n", + "nearest[k] 2583\n", + "nearest[k] 2135\n", + "Nearest to MTD: POOL, CNMD, RGEN, WINA, AIRM,\n", + "nearest[k] 2202\n", + "nearest[k] 2396\n", + "nearest[k] 2634\n", + "nearest[k] 1902\n", + "Nearest to GE: UFI, RE, MCS, TTWO,\n", + "nearest[k] 2874\n", + "nearest[k] 1797\n", + "nearest[k] 2969\n", + "nearest[k] 2726\n", + "nearest[k] 1990\n", + "nearest[k] 2155\n", + "Nearest to ICUI: IOSP, NFX,\n", + "nearest[k] 2448\n", + "nearest[k] 1594\n", + "nearest[k] 2647\n", + "Nearest to ECL: NDSN, VSAT, UTX, TILE, INT,\n", + "nearest[k] 3018\n", + "nearest[k] 2708\n", + "nearest[k] 2041\n", + "nearest[k] 1619\n", + "nearest[k] 2158\n", + "nearest[k] 2146\n", + "nearest[k] 1747\n", + "Nearest to INTC: C,\n", + "nearest[k] 1966\n", + "nearest[k] 2427\n", + "nearest[k] 1967\n", + "nearest[k] 1760\n", + "nearest[k] 2207\n", + "Nearest to FAST: CATY, ACAS, DIN,\n", + "Epoch 1/10 Iteration: 110 Avg. Training loss: 4.6291 2.0415 sec/batch\n", + "Epoch 1/10 Iteration: 120 Avg. Training loss: 4.5290 2.1442 sec/batch\n", + "Epoch 1/10 Iteration: 130 Avg. Training loss: 4.5225 2.1808 sec/batch\n", + "Epoch 1/10 Iteration: 140 Avg. Training loss: 4.4390 2.1799 sec/batch\n", + "Epoch 1/10 Iteration: 150 Avg. Training loss: 4.3236 2.2162 sec/batch\n", + "Epoch 1/10 Iteration: 160 Avg. Training loss: 4.3258 1.9534 sec/batch\n", + "Epoch 1/10 Iteration: 170 Avg. Training loss: 4.2205 1.9416 sec/batch\n", + "Epoch 1/10 Iteration: 180 Avg. Training loss: 4.3843 2.0543 sec/batch\n", + "Epoch 1/10 Iteration: 190 Avg. Training loss: 4.5850 2.2644 sec/batch\n", + "Epoch 1/10 Iteration: 200 Avg. Training loss: 4.4068 2.0691 sec/batch\n", + "nearest[k] 2444\n", + "nearest[k] 2899\n", + "nearest[k] 2569\n", + "nearest[k] 2975\n", + "Nearest to ACAS: CATO, FAST, VLGEA, MYE,\n", + "nearest[k] 1840\n", + "nearest[k] 2568\n", + "nearest[k] 2191\n", + "Nearest to RTN: ADS, GLF, CAC, HOLX, DDR,\n", + "nearest[k] 1922\n", + "nearest[k] 1977\n", + "nearest[k] 2757\n", + "nearest[k] 2228\n", + "nearest[k] 2825\n", + "Nearest to SYKE: GNCMA, PPL, BCOR,\n", + "nearest[k] 2439\n", + "nearest[k] 2354\n", + "nearest[k] 1745\n", + "Nearest to USAK: SENEA, MSFT, CBB, MSI, WWD,\n", + "nearest[k] 2088\n", + "nearest[k] 2785\n", + "nearest[k] 2370\n", + "nearest[k] 1818\n", + "nearest[k] 2747\n", + "nearest[k] 1793\n", + "nearest[k] 2705\n", + "Nearest to IMMU: STT,\n", + "nearest[k] 1603\n", + "nearest[k] 1657\n", + "nearest[k] 2324\n", + "nearest[k] 2458\n", + "Nearest to UFPT: PHX, CINF, RMTI, CHS,\n", + "nearest[k] 2562\n", + "nearest[k] 1769\n", + "nearest[k] 2991\n", + "nearest[k] 1900\n", + "Nearest to SENEA: USAK, RE, COLB, CPHD,\n", + "nearest[k] 1679\n", + "nearest[k] 2406\n", + "Nearest to MOD: PHM, ENZN, CTG, DST, INGR, GHC,\n", + "nearest[k] 2220\n", + "nearest[k] 2278\n", + "nearest[k] 1632\n", + "nearest[k] 2772\n", + "nearest[k] 2719\n", + "nearest[k] 2374\n", + "Nearest to AMG: AAN, BHB,\n", + "nearest[k] 2098\n", + "nearest[k] 2526\n", + "Nearest to ORLY: MIND, GAIA, STAR, XEL, RGR, LCI,\n", + "nearest[k] 2316\n", + "nearest[k] 2583\n", + "nearest[k] 2135\n", + "nearest[k] 2462\n", + "Nearest to MTD: POOL, CNMD, AIRM, AVA,\n", + "nearest[k] 2202\n", + "nearest[k] 2396\n", + "nearest[k] 2634\n", + "nearest[k] 1902\n", + "nearest[k] 2728\n", + "Nearest to GE: UFI, RE, ARCW,\n", + "nearest[k] 2874\n", + "nearest[k] 2969\n", + "nearest[k] 1797\n", + "nearest[k] 2726\n", + "nearest[k] 1990\n", + "nearest[k] 2155\n", + "Nearest to ICUI: IOSP, UBA,\n", + "nearest[k] 2448\n", + "nearest[k] 1594\n", + "nearest[k] 2647\n", + "nearest[k] 2177\n", + "Nearest to ECL: NDSN, VSAT, TILE, MXWL,\n", + "nearest[k] 3018\n", + "nearest[k] 2708\n", + "nearest[k] 2041\n", + "nearest[k] 2158\n", + "nearest[k] 2146\n", + "nearest[k] 1619\n", + "Nearest to INTC: C, BBOX,\n", + "nearest[k] 1966\n", + "nearest[k] 2427\n", + "nearest[k] 1967\n", + "nearest[k] 1760\n", + "nearest[k] 2207\n", + "Nearest to FAST: CATY, ACAS, DIN,\n", + "Epoch 1/10 Iteration: 210 Avg. Training loss: 4.3871 2.0840 sec/batch\n", + "Epoch 1/10 Iteration: 220 Avg. Training loss: 4.3398 2.1501 sec/batch\n", + "Epoch 1/10 Iteration: 230 Avg. Training loss: 4.2177 2.3706 sec/batch\n", + "Epoch 1/10 Iteration: 240 Avg. Training loss: 4.1333 2.0591 sec/batch\n", + "Epoch 1/10 Iteration: 250 Avg. Training loss: 4.0676 1.9890 sec/batch\n", + "Epoch 1/10 Iteration: 260 Avg. Training loss: 3.9750 2.0815 sec/batch\n", + "Epoch 1/10 Iteration: 270 Avg. Training loss: 3.9287 1.9713 sec/batch\n", + "Epoch 1/10 Iteration: 280 Avg. Training loss: 4.3463 2.8065 sec/batch\n", + "Epoch 1/10 Iteration: 290 Avg. Training loss: 4.2628 1.9577 sec/batch\n", + "Epoch 1/10 Iteration: 300 Avg. Training loss: 4.1398 2.0271 sec/batch\n", + "nearest[k] 2444\n", + "nearest[k] 2569\n", + "nearest[k] 2975\n", + "nearest[k] 2899\n", + "Nearest to ACAS: CATO, FAST, VLGEA, MYE,\n", + "nearest[k] 1840\n", + "nearest[k] 2568\n", + "nearest[k] 2191\n", + "Nearest to RTN: HOLX, ADS, GLF, CAC, PRKR,\n", + "nearest[k] 1922\n", + "nearest[k] 1977\n", + "nearest[k] 2757\n", + "nearest[k] 2228\n", + "Nearest to SYKE: GNCMA, MANH, PPL, QCOM,\n", + "nearest[k] 2439\n", + "nearest[k] 2354\n", + "nearest[k] 1745\n", + "nearest[k] 2025\n", + "Nearest to USAK: SENEA, MSFT, CBB, SPNC,\n", + "nearest[k] 2088\n", + "nearest[k] 2785\n", + "nearest[k] 2370\n", + "nearest[k] 1818\n", + "nearest[k] 2747\n", + "nearest[k] 2323\n", + "Nearest to IMMU: STT, HUBG,\n", + "nearest[k] 1603\n", + "nearest[k] 1657\n", + "nearest[k] 2324\n", + "nearest[k] 2638\n", + "nearest[k] 2458\n", + "Nearest to UFPT: PHX, RMTI, CINF,\n", + "nearest[k] 2562\n", + "nearest[k] 1769\n", + "nearest[k] 1635\n", + "nearest[k] 1900\n", + "nearest[k] 2991\n", + "Nearest to SENEA: USAK, RE, NNBR,\n", + "nearest[k] 2406\n", + "nearest[k] 1679\n", + "Nearest to MOD: PHM, ENZN, DST, CTG, GHC, INGR,\n", + "nearest[k] 2220\n", + "nearest[k] 1632\n", + "nearest[k] 2278\n", + "nearest[k] 2772\n", + "nearest[k] 2719\n", + "Nearest to AMG: AAN, BHB, LAMR,\n", + "nearest[k] 2098\n", + "Nearest to ORLY: MIND, GAIA, STAR, RGR, VAL, XEL, LCI,\n", + "nearest[k] 2316\n", + "nearest[k] 2583\n", + "nearest[k] 2135\n", + "nearest[k] 2462\n", + "Nearest to MTD: POOL, AIRM, CNMD, WINA,\n", + "nearest[k] 2202\n", + "nearest[k] 2396\n", + "nearest[k] 1902\n", + "nearest[k] 2634\n", + "nearest[k] 2728\n", + "Nearest to GE: UFI, RE, ARCW,\n", + "nearest[k] 2874\n", + "nearest[k] 2969\n", + "nearest[k] 2726\n", + "nearest[k] 1990\n", + "nearest[k] 1797\n", + "Nearest to ICUI: IOSP, UBA, NFX,\n", + "nearest[k] 1594\n", + "nearest[k] 2448\n", + "nearest[k] 2647\n", + "nearest[k] 2177\n", + "Nearest to ECL: NDSN, MXWL, VSAT, TILE,\n", + "nearest[k] 3018\n", + "nearest[k] 2708\n", + "nearest[k] 2041\n", + "nearest[k] 2146\n", + "nearest[k] 2158\n", + "nearest[k] 1619\n", + "Nearest to INTC: DIS, BBOX,\n", + "nearest[k] 2427\n", + "nearest[k] 1966\n", + "nearest[k] 2207\n", + "nearest[k] 1967\n", + "nearest[k] 1760\n", + "Nearest to FAST: ACAS, CATY, DIN,\n", + "Epoch 1/10 Iteration: 310 Avg. Training loss: 4.1017 2.0536 sec/batch\n", + "Epoch 1/10 Iteration: 320 Avg. Training loss: 4.0373 1.9826 sec/batch\n", + "Epoch 1/10 Iteration: 330 Avg. Training loss: 3.9007 2.1121 sec/batch\n", + "Epoch 1/10 Iteration: 340 Avg. Training loss: 3.7818 2.1092 sec/batch\n", + "Epoch 1/10 Iteration: 350 Avg. Training loss: 3.8070 2.0725 sec/batch\n", + "Epoch 1/10 Iteration: 360 Avg. Training loss: 3.7260 2.0439 sec/batch\n", + "Epoch 1/10 Iteration: 370 Avg. Training loss: 3.6276 1.9681 sec/batch\n", + "Epoch 1/10 Iteration: 380 Avg. Training loss: 4.0391 1.9432 sec/batch\n", + "Epoch 1/10 Iteration: 390 Avg. Training loss: 4.0613 2.0991 sec/batch\n", + "Epoch 1/10 Iteration: 400 Avg. Training loss: 3.8643 1.9802 sec/batch\n", + "nearest[k] 2444\n", + "nearest[k] 2569\n", + "nearest[k] 2975\n", + "nearest[k] 2899\n", + "Nearest to ACAS: CATO, FAST, MYE, LM,\n", + "nearest[k] 1840\n", + "nearest[k] 2191\n", + "nearest[k] 2568\n", + "Nearest to RTN: HOLX, OLED, CLFD, PRKR, ADS,\n", + "nearest[k] 1922\n", + "nearest[k] 1977\n", + "nearest[k] 2228\n", + "Nearest to SYKE: GNCMA, QCOM, MANH, PPL, BCOR,\n", + "nearest[k] 2439\n", + "nearest[k] 2354\n", + "nearest[k] 1745\n", + "nearest[k] 1713\n", + "Nearest to USAK: SENEA, SPNC, MSFT, JBL,\n", + "nearest[k] 2088\n", + "nearest[k] 2785\n", + "nearest[k] 2370\n", + "nearest[k] 1818\n", + "nearest[k] 2747\n", + "nearest[k] 2323\n", + "Nearest to IMMU: STT, HUBG,\n", + "nearest[k] 1603\n", + "nearest[k] 2324\n", + "nearest[k] 1657\n", + "nearest[k] 2638\n", + "Nearest to UFPT: PHX, RMTI, IDRA, CWST,\n", + "nearest[k] 1635\n", + "nearest[k] 2562\n", + "nearest[k] 1769\n", + "Nearest to SENEA: USAK, NBL, COLB, RE, NNBR,\n", + "nearest[k] 2406\n", + "nearest[k] 1679\n", + "nearest[k] 2185\n", + "Nearest to MOD: ENZN, PHM, DST, CTG, INGR,\n", + "nearest[k] 2220\n", + "nearest[k] 2772\n", + "nearest[k] 1632\n", + "nearest[k] 2719\n", + "nearest[k] 2278\n", + "Nearest to AMG: AAN, MATW, BHB,\n", + "nearest[k] 2098\n", + "Nearest to ORLY: MIND, GAIA, LCI, VAL, RGR, GSOL, MIDD,\n", + "nearest[k] 2316\n", + "nearest[k] 2583\n", + "nearest[k] 2462\n", + "nearest[k] 2135\n", + "Nearest to MTD: POOL, CNMD, AIRM, WINA,\n", + "nearest[k] 2202\n", + "nearest[k] 2396\n", + "nearest[k] 1902\n", + "nearest[k] 2634\n", + "nearest[k] 2728\n", + "Nearest to GE: UFI, RE, ARCW,\n", + "nearest[k] 2874\n", + "nearest[k] 2969\n", + "nearest[k] 2726\n", + "Nearest to ICUI: IOSP, AZO, NFX, UBA, OSIS,\n", + "nearest[k] 2647\n", + "nearest[k] 2448\n", + "nearest[k] 2177\n", + "Nearest to ECL: NDSN, TRI, VSAT, IDXX, MXWL,\n", + "nearest[k] 2708\n", + "nearest[k] 3018\n", + "nearest[k] 2146\n", + "nearest[k] 2041\n", + "nearest[k] 2158\n", + "nearest[k] 1619\n", + "Nearest to INTC: DIS, BBOX,\n", + "nearest[k] 2427\n", + "nearest[k] 2207\n", + "nearest[k] 1712\n", + "nearest[k] 1760\n", + "nearest[k] 1967\n", + "nearest[k] 1966\n", + "Nearest to FAST: ACAS, CATY,\n", + "Epoch 1/10 Iteration: 410 Avg. Training loss: 3.7915 1.9852 sec/batch\n", + "Epoch 1/10 Iteration: 420 Avg. Training loss: 3.8297 1.9980 sec/batch\n", + "Epoch 1/10 Iteration: 430 Avg. Training loss: 3.7000 2.1386 sec/batch\n", + "Epoch 1/10 Iteration: 440 Avg. Training loss: 3.5404 2.0681 sec/batch\n", + "Epoch 1/10 Iteration: 450 Avg. Training loss: 3.4214 2.0249 sec/batch\n", + "Epoch 1/10 Iteration: 460 Avg. Training loss: 3.3465 2.0138 sec/batch\n", + "Epoch 1/10 Iteration: 470 Avg. Training loss: 3.4769 2.0312 sec/batch\n", + "Epoch 1/10 Iteration: 480 Avg. Training loss: 3.6643 2.0105 sec/batch\n", + "Epoch 1/10 Iteration: 490 Avg. Training loss: 3.8713 2.0370 sec/batch\n", + "Epoch 1/10 Iteration: 500 Avg. Training loss: 3.7673 2.0179 sec/batch\n", + "nearest[k] 2444\n", + "nearest[k] 2569\n", + "Nearest to ACAS: FAST, CATO, MYE, LM, BHB, VMI,\n", + "nearest[k] 1840\n", + "nearest[k] 2191\n", + "Nearest to RTN: HOLX, OLED, CLFD, PRKR, ARCW, ADS,\n", + "nearest[k] 1922\n", + "nearest[k] 1977\n", + "nearest[k] 2388\n", + "Nearest to SYKE: GNCMA, MANH, QCOM, PPL, BCOR,\n", + "nearest[k] 2439\n", + "nearest[k] 2354\n", + "nearest[k] 1745\n", + "Nearest to USAK: SENEA, MSFT, SPNC, JBL, ORCL,\n", + "nearest[k] 2088\n", + "nearest[k] 2785\n", + "nearest[k] 2370\n", + "nearest[k] 1818\n", + "nearest[k] 2747\n", + "nearest[k] 2323\n", + "Nearest to IMMU: STT, HUBG,\n", + "nearest[k] 2324\n", + "nearest[k] 1657\n", + "nearest[k] 2638\n", + "Nearest to UFPT: AKRX, PHX, RMTI, CWST, IDRA,\n", + "nearest[k] 1635\n", + "nearest[k] 2562\n", + "nearest[k] 1769\n", + "nearest[k] 2991\n", + "Nearest to SENEA: USAK, NNBR, RE, COLB,\n", + "nearest[k] 2406\n", + "nearest[k] 1679\n", + "Nearest to MOD: ENZN, PHM, DST, GHC, CTG, TDY,\n", + "nearest[k] 2220\n", + "nearest[k] 2719\n", + "nearest[k] 2772\n", + "nearest[k] 1632\n", + "nearest[k] 2278\n", + "Nearest to AMG: AAN, MATW, BHB,\n", + "Nearest to ORLY: GAIA, LCI, VAL, MIND, MNI, GSOL, RGR, MIDD,\n", + "nearest[k] 2316\n", + "nearest[k] 2583\n", + "nearest[k] 2135\n", + "Nearest to MTD: POOL, CNMD, WINA, AIRM, SLG,\n", + "nearest[k] 2202\n", + "nearest[k] 1902\n", + "nearest[k] 2396\n", + "nearest[k] 2728\n", + "Nearest to GE: RE, UFI, MCS, ARCW,\n", + "nearest[k] 2874\n", + "nearest[k] 2969\n", + "nearest[k] 2726\n", + "Nearest to ICUI: AZO, IOSP, OSIS, NFX, UBA,\n", + "nearest[k] 2647\n", + "nearest[k] 2177\n", + "Nearest to ECL: NDSN, TRI, IDXX, HOG, VSAT, ANSS,\n", + "nearest[k] 2708\n", + "nearest[k] 3018\n", + "nearest[k] 2041\n", + "nearest[k] 2146\n", + "nearest[k] 2158\n", + "nearest[k] 1619\n", + "Nearest to INTC: DIS, NUE,\n", + "nearest[k] 1712\n", + "nearest[k] 1760\n", + "nearest[k] 2207\n", + "nearest[k] 2427\n", + "Nearest to FAST: ACAS, FRED, CATY, LNN,\n", + "Epoch 1/10 Iteration: 510 Avg. Training loss: 3.6362 2.0846 sec/batch\n", + "Epoch 1/10 Iteration: 520 Avg. Training loss: 3.6206 2.1565 sec/batch\n", + "Epoch 1/10 Iteration: 530 Avg. Training loss: 3.5182 2.0636 sec/batch\n", + "Epoch 1/10 Iteration: 540 Avg. Training loss: 3.4995 2.0273 sec/batch\n", + "Epoch 1/10 Iteration: 550 Avg. Training loss: 3.2319 2.0127 sec/batch\n", + "Epoch 1/10 Iteration: 560 Avg. Training loss: 3.2393 1.9948 sec/batch\n", + "Epoch 1/10 Iteration: 570 Avg. Training loss: 3.2264 2.0412 sec/batch\n", + "Epoch 1/10 Iteration: 580 Avg. Training loss: 3.3506 2.0951 sec/batch\n" + ] + }, + { + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0mbatches\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mget_batches\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf_prices\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch_size\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mwindow_size\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[0mstart\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 16\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mbatches\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 17\u001b[0m feed = {inputs: x,\n\u001b[1;32m 18\u001b[0m labels: np.array(y)[:, None]}\n", + "\u001b[0;32m\u001b[0m in \u001b[0;36mget_batches\u001b[0;34m(stocks, batch_size, window_size)\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstart\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstop\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0mbatch_x\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mget_stock_int\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstocks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 8\u001b[0;31m \u001b[0mbatch_y\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mget_window\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstocks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mwindow_size\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 9\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mextend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbatch_y\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mextend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mbatch_x\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbatch_y\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m\u001b[0m in \u001b[0;36mget_window\u001b[0;34m(stocks, idx, window_size)\u001b[0m\n\u001b[1;32m 30\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstart\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstop\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 31\u001b[0m \u001b[0mnearby_stock_int\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mget_stock_int\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstocks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 32\u001b[0;31m \u001b[0mnearby_stock_date\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mget_stock_date\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstocks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 33\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mnearby_stock_int\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mstock_int\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mnearby_stock_date\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mstock_date\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 34\u001b[0m \u001b[0mwindow\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnearby_stock_int\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m\u001b[0m in \u001b[0;36mget_stock_date\u001b[0;34m(stocks, idx)\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mget_stock_date\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstocks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0midx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 12\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mstocks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0midx\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 13\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mget_stock_ticker\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstocks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0midx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Users/jperl/anaconda/envs/dlnd/lib/python3.6/site-packages/pandas/core/indexing.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 1310\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_tuple\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1311\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1312\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_axis\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1313\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1314\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_getitem_axis\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Users/jperl/anaconda/envs/dlnd/lib/python3.6/site-packages/pandas/core/indexing.py\u001b[0m in \u001b[0;36m_getitem_axis\u001b[0;34m(self, key, axis)\u001b[0m\n\u001b[1;32m 1628\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_is_valid_integer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1629\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1630\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1631\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1632\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_convert_to_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mobj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mis_setter\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Users/jperl/anaconda/envs/dlnd/lib/python3.6/site-packages/pandas/core/indexing.py\u001b[0m in \u001b[0;36m_get_loc\u001b[0;34m(self, key, axis)\u001b[0m\n\u001b[1;32m 103\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 104\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_get_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 105\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mobj\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_ixs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 106\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 107\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_slice\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mobj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkind\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Users/jperl/anaconda/envs/dlnd/lib/python3.6/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m_ixs\u001b[0;34m(self, i, axis)\u001b[0m\n\u001b[1;32m 1974\u001b[0m \u001b[0mcopy\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1975\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1976\u001b[0;31m \u001b[0mnew_values\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_data\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfast_xs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1977\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mis_scalar\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnew_values\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1978\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mnew_values\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Users/jperl/anaconda/envs/dlnd/lib/python3.6/site-packages/pandas/core/internals.py\u001b[0m in \u001b[0;36mfast_xs\u001b[0;34m(self, loc)\u001b[0m\n\u001b[1;32m 3500\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3501\u001b[0m \u001b[0;31m# unique\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3502\u001b[0;31m \u001b[0mdtype\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_interleaved_dtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mblocks\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3503\u001b[0m \u001b[0mn\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3504\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mempty\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Users/jperl/anaconda/envs/dlnd/lib/python3.6/site-packages/pandas/core/internals.py\u001b[0m in \u001b[0;36m_interleaved_dtype\u001b[0;34m(blocks)\u001b[0m\n\u001b[1;32m 4462\u001b[0m \u001b[0mcounts\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdefaultdict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlist\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4463\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mx\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mblocks\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 4464\u001b[0;31m \u001b[0mcounts\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4465\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4466\u001b[0m \u001b[0mhave_int\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcounts\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mIntBlock\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mKeyboardInterrupt\u001b[0m: " + ] + } + ], "source": [ - "epochs = 50\n", + "epochs = 10\n", "batch_size = 1000\n", - "window_size = 5\n", + "window_size = 10\n", "\n", "with train_graph.as_default():\n", " saver = tf.train.Saver()\n", @@ -508,7 +902,7 @@ " sess.run(tf.global_variables_initializer())\n", "\n", " for e in range(1, epochs+1):\n", - " batches = get_batches(df, batch_size, window_size)\n", + " batches = get_batches(df_prices, batch_size, window_size)\n", " start = time.time()\n", " for x, y in batches: \n", " feed = {inputs: x,\n",