From e1ec954f04cde66c8c82e9fbd345c50a469ee2f0 Mon Sep 17 00:00:00 2001 From: "kennedy.behrman@sqor.com" Date: Wed, 4 Oct 2017 13:23:00 -0700 Subject: [PATCH] adding files to git --- Makefile | 18 ++ buildspec.yml | 0 .../.ipynb_checkpoints/paws-checkpoint.ipynb | 169 ++++++++++++++++++ notebooks/1979.csv | 101 +++++++++++ notebooks/paws.ipynb | 169 ++++++++++++++++++ paws/__init__.py | 1 + paws/s3.py | 27 +++ pcli.py | 36 ++++ requirements.txt | 13 ++ tests/test_paws_cli.py | 17 ++ tests/test_s3.py | 26 +++ 11 files changed, 577 insertions(+) create mode 100644 Makefile create mode 100644 buildspec.yml create mode 100644 notebooks/.ipynb_checkpoints/paws-checkpoint.ipynb create mode 100644 notebooks/1979.csv create mode 100644 notebooks/paws.ipynb create mode 100644 paws/__init__.py create mode 100644 paws/s3.py create mode 100755 pcli.py create mode 100644 requirements.txt create mode 100644 tests/test_paws_cli.py create mode 100644 tests/test_s3.py diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..e5f6896 --- /dev/null +++ b/Makefile @@ -0,0 +1,18 @@ +setup: + python3 -m venv ~/.pragai-aws + +install: + pip install -r requirements.txt + +test: + PYTHONPATH=. && pytest -vv --cov=paws tests/*.py + PYTHONPATH=. && py.test --nbval-lax notebooks/*.ipynb + cov=pli + + +lint: + pylint --disable=R,C paws + +all: + setup install test lint + diff --git a/buildspec.yml b/buildspec.yml new file mode 100644 index 0000000..e69de29 diff --git a/notebooks/.ipynb_checkpoints/paws-checkpoint.ipynb b/notebooks/.ipynb_checkpoints/paws-checkpoint.ipynb new file mode 100644 index 0000000..528d465 --- /dev/null +++ b/notebooks/.ipynb_checkpoints/paws-checkpoint.ipynb @@ -0,0 +1,169 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2017-10-04 09:54:16,937 - Paws - INFO - Attempting download: gdelt-open-data, events/1979.csv, 1979.csv\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "DONE\n" + ] + } + ], + "source": [ + "#Add root checkout to path\n", + "import sys\n", + "sys.path.append(\"..\")\n", + "import pandas as pd\n", + "from paws.s3 import (boto_s3_resource, download)\n", + "resource = boto_s3_resource()\n", + "# csv_file = download(resource=resource, \n", + "# bucket=\"gdelt-open-data\", \n", + "# key=\"events/1979.csv\",\n", + "# filename=\"1979.csv\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
01234
count10163635956
unique10034463636
top0District of ColumbiaUnited States\\tUS\\tUSDC\\t38.8951\\t-77.0364\\t5...District of ColumbiaUnited States\\tUS\\tUSDC\\t38.8951\\t-77.0364\\t5...
freq26677
\n", + "
" + ], + "text/plain": [ + " 0 1 \\\n", + "count 101 63 \n", + "unique 100 34 \n", + "top 0 District of Columbia \n", + "freq 2 6 \n", + "\n", + " 2 \\\n", + "count 63 \n", + "unique 46 \n", + "top United States\\tUS\\tUSDC\\t38.8951\\t-77.0364\\t5... \n", + "freq 6 \n", + "\n", + " 3 \\\n", + "count 59 \n", + "unique 36 \n", + "top District of Columbia \n", + "freq 7 \n", + "\n", + " 4 \n", + "count 56 \n", + "unique 36 \n", + "top United States\\tUS\\tUSDC\\t38.8951\\t-77.0364\\t5... \n", + "freq 7 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Load the file, truncate it and save.\n", + "# df = pd.read_csv(csv_file, names=range(5))\n", + "# df = df.head(100)\n", + "# df.to_csv(csv_file)\n", + "df = pd.read_csv(\"1979.csv\", names=range(5))\n", + "df.head()\n", + "df.describe()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/1979.csv b/notebooks/1979.csv new file mode 100644 index 0000000..a1db9c7 --- /dev/null +++ b/notebooks/1979.csv @@ -0,0 +1,101 @@ +,0,1,2,3,4 +,0,1,2,3,4 +0.0,0 19790101 197901 1979 1979.0027 AFR AFRICA AFR 1 040 040 04 1 1 9 1 9 5.52631578947368 0 0 0 0 0 0 0 0 20130203,,,, +1.0,1 19790101 197901 1979 1979.0027 AGR FARMER AGR 1 030 030 03 1 4 10 1 10 10.9792284866469 0 0 0 1 Nigeria NI NI 10 8 0 1 Nigeria NI NI 10 8 0 20130203,,,, +2.0,2 19790101 197901 1979 1979.0027 AGR FARMER AGR 1 100 100 10 3 -5 10 1 10 10.9792284866469 0 0 0 1 Nigeria NI NI 10 8 0 1 Nigeria NI NI 10 8 0 20130203,,,, +3.0,3 19790101 197901 1979 1979.0027 CHN CHINA CHN 1 043 043 04 1 2.8 2 1 2 7.73943889068043 0 0 0 4 Dizhou, Guangxi, China CH CH16 23.0036 106.359 -1903584 4 Dizhou, Guangxi, China CH CH16 23.0036 106.359 -1903584 20130203 +4.0,4 19790101 197901 1979 1979.0027 COP POLICE COP 0 190 190 19 4 -10 5 1 5 3.37909186906019 0 0 0 3 Dallas, Texas, United States US USTX 32.7831 -96.8067 1380944 3 Dallas, Texas, United States US USTX 32.7831 -96.8067 1380944 20130203 +5.0,5 19790101 197901 1979 1979.0027 CVL IMMIGRANT CVL 1 046 046 04 1 7 6 1 6 2.26244343891403 0 0 0 4 Emunim, HaDarom, Israel IS IS01 31.7444 34.6809 -779676 4 Emunim, HaDarom, Israel IS IS01 31.7444 34.6809 -779676 20130203 +6.0,6 19790101 197901 1979 1979.0027 EGYEDU EGYPTIAN EGY EDU 1 040 040 04 1 1 9 1 9 5.8252427184466 0 0 0 0 0 0 0 0 20130203,,,, +7.0,7 19790101 197901 1979 1979.0027 GOV DEPUTY PRIME MINISTER GOV 1 020 020 02 1 3 2 1 2 3.88349514563107 0 0 0 1 Turkey TU TU 39 35 0 1 Turkey TU TU 39 35 0 20130203,,,, +8.0,8 19790101 197901 1979 1979.0027 GOV REGIME GOV 1 040 040 04 1 1 2 1 2 5.29411764705882 0 0 0 4 Peiping, Beijing, China CH CH22 39.9289 116.388 -1898541 4 Peiping, Beijing, China CH CH22 39.9289 116.388 -1898541 20130203 +9.0,9 19790101 197901 1979 1979.0027 GOV REGIME GOV 1 040 040 04 1 1 8 1 8 5.29411764705882 0 0 0 4 Taiwan Strait, Taiwan (general), Taiwan TW TW00 24 119 -2637942 4 Peiping, Beijing, China CH CH22 39.9289 116.388 -1898541 20130203 +10.0,10 19790101 197901 1979 1979.0027 GOV GOVERNOR GOV 1 040 040 04 1 1 3 1 3 2.32558139534884 0 0 0 4 Szczecin, Zachodniopomorskie, Poland PL PL87 53.4395 14.5939 -531881 4 Szczecin, Zachodniopomorskie, Poland PL PL87 53.4395 14.5939 -531881 20130203 +11.0,11 19790101 197901 1979 1979.0027 GOV GOVERNMENT GOV 0 042 042 04 1 1.9 9 1 9 4.85036119711042 0 0 0 4 Teheran, Iran (general), Iran IR IR00 35.705 51.4216 -3087341 4 Teheran, Iran (general), Iran IR IR00 35.705 51.4216 -3087341 20130203 +12.0,12 19790101 197901 1979 1979.0027 GOV CABINET GOV 0 043 043 04 1 2.8 9 1 9 4.21686746987952 0 0 0 4 Zimbabwe Rhodesia, Zimbabwe (general), Zimbabwe ZI ZI00 -19 29 -2324006 4 Zimbabwe Rhodesia, Zimbabwe (general), Zimbabwe ZI ZI00 -19 29 -2324006 20130203 +13.0,13 19790101 197901 1979 1979.0027 GOV KING GOV 1 046 046 04 1 7 15 2 15 6.99238245337536 0 0 0 0 0 0 0 0 20130203,,,, +14.0,14 19790101 197901 1979 1979.0027 GOV REGIME GOV 1 046 046 04 1 7 2 1 2 5.29411764705882 0 0 0 4 Peiping, Beijing, China CH CH22 39.9289 116.388 -1898541 4 Peiping, Beijing, China CH CH22 39.9289 116.388 -1898541 20130203 +15.0,15 19790101 197901 1979 1979.0027 GOV REGIME GOV 1 046 046 04 1 7 8 1 8 5.29411764705882 0 0 0 4 Taiwan Strait, Taiwan (general), Taiwan TW TW00 24 119 -2637942 4 Peiping, Beijing, China CH CH22 39.9289 116.388 -1898541 20130203 +16.0,16 19790101 197901 1979 1979.0027 GOV GOVERNMENT GOV 0 051 051 05 1 3.4 9 1 9 2.5 0 0 0 4 Phnom Penh, (CB11), Cambodia CB CB11 11.55 104.917 -1033783 4 Phnom Penh, (CB11), Cambodia CB CB11 11.55 104.917 -1033783 20130203 +17.0,17 19790101 197901 1979 1979.0027 GOVMIL MILITARY GOVERNMENT GOV MIL 0 043 043 04 1 2.8 5 1 5 4.85036119711042 0 0 0 4 Teheran, Iran (general), Iran IR IR00 35.705 51.4216 -3087341 4 Teheran, Iran (general), Iran IR IR00 35.705 51.4216 -3087341 20130203 +18.0,18 19790101 197901 1979 1979.0027 GUYGOV GUYANA GUY GOV 1 0841 084 08 2 7 2 1 2 3.29218106995885 0 0 0 4 Guyana, Guyana (general), Guyana GY GY00 4 -60 -919987 1 Russia RS RS 60 100 0 20130203,, +19.0,19 19790101 197901 1979 1979.0027 IRN AHVAZ IRN 1 042 042 04 1 1.9 2 1 2 4.46428571428571 0 0 0 4 Ahvaz, Khuzestan, Iran IR IR15 31.3203 48.6693 -3052107 4 Ahvaz, Khuzestan, Iran IR IR15 31.3203 48.6693 -3052108 20130203 +20.0,20 19790101 197901 1979 1979.0027 IRN IRANIAN IRN 1 0841 084 08 2 7 2 1 2 8.38958534233366 0 0 0 0 Mordad, Fars, Iran IR IR07 30.2322 51.5619 -3755961 0 Mordad, Fars, Iran IR IR07 30.2322 51.5619 20130203 +21.0,21 19790101 197901 1979 1979.0027 JPN JAPAN JPN 1 061 061 06 2 6.4 10 1 10 8.92857142857143 0 0 0 1 Japan JA JA 36 138 0 1 Japan JA JA 36 138 0 20130203,,,, +22.0,22 19790101 197901 1979 1979.0027 KHM CAMBODIAN KHM 1 043 043 04 1 2.8 2 1 2 3.30073349633252 0 0 0 4 Mondulkiri, Khet Mondul Kiri, Cambodia CB CB10 12.45 107.2 -1032742 4 Mondulkiri, Khet Mondul Kiri, Cambodia CB CB10 12.45 107.2 -1024640 20130203 +23.0,23 19790101 197901 1979 1979.0027 KHM CAMBODIA KHM 0 072 072 07 2 8.3 7 1 7 3.30073349633252 0 0 0 4 Mondulkiri, Khet Mondul Kiri, Cambodia CB CB10 12.45 107.2 -1032742 4 Mondulkiri, Khet Mondul Kiri, Cambodia CB CB10 12.45 107.2 -1024640 20130203 +24.0,24 19790101 197901 1979 1979.0027 KHM CAMBODIA KHM 0 072 072 07 2 8.3 2 1 2 3.30073349633252 0 0 0 4 Peking, Beijing, China CH CH22 39.9289 116.388 -1898541 4 Peking, Beijing, China CH CH22 39.9289 116.388 -1898545 20130203 +25.0,25 19790101 197901 1979 1979.0027 LAB UNIONS LAB 0 030 030 03 1 4 9 1 9 5.88235294117647 0 0 0 3 Washington, District of Columbia, United States US USDC 38.8951 -77.0364 531871 3 Washington, District of Columbia, United States US USDC 38.8951 -77.0364 531871 20130203 +26.0,26 19790101 197901 1979 1979.0027 LAB UNIONS LAB 1 057 057 05 1 8 9 1 9 6.41399416909621 0 0 0 3 Washington, District of Columbia, United States US USDC 38.8951 -77.0364 531871 3 Washington, District of Columbia, United States US USDC 38.8951 -77.0364 531871 20130203 +27.0,27 19790101 197901 1979 1979.0027 LEG REPRESENTATIVES LEG 1 042 042 04 1 1.9 5 1 5 5.79710144927536 0 0 0 4 Sadat, Shamal Sina', Egypt EG EG27 31.2237 34.2084 9232784 4 Sadat, Shamal Sina', Egypt EG EG27 31.2237 34.2084 9232784 20130203 +28.0,28 19790101 197901 1979 1979.0027 MED NEWSPAPER MED 1 030 030 03 1 4 9 1 9 5.88235294117647 0 0 0 3 Washington, District of Columbia, United States US USDC 38.8951 -77.0364 531871 3 Washington, District of Columbia, United States US USDC 38.8951 -77.0364 531871 20130203 +29.0,29 19790101 197901 1979 1979.0027 MED WRITER MED 1 112 112 11 3 -2 9 1 9 1.38888888888889 0 0 0 3 Washington, District of Columbia, United States US USDC 38.8951 -77.0364 531871 3 Washington, District of Columbia, United States US USDC 38.8951 -77.0364 531871 20130203 +30.0,30 19790101 197901 1979 1979.0027 MEX MEXICO MEX 1 043 043 04 1 2.8 9 1 9 5.40540540540541 0 0 0 0 0 0 0 0 20130203,,,, +31.0,31 19790101 197901 1979 1979.0027 MIL COMMANDER MIL 1 040 040 04 1 1 10 1 10 4.16666666666667 0 0 0 5 West Lake Region, Kagera, Tanzania TZ TZ19 -2 31.5 -2574875 5 West Lake Region, Kagera, Tanzania TZ TZ19 -2 31.5 20130203 +32.0,32 19790101 197901 1979 1979.0027 MOS IMAM MOS 0 042 042 04 1 1.9 4 1 4 7.86516853932584 0 0 0 0 Moscow, Moskva, Russia RS RS48 55.7522 37.6156 -2960561 0 Moscow, Moskva, Russia RS RS48 55.7522 37.6156 20130203 +33.0,33 19790101 197901 1979 1979.0027 NPL KATHMANDU NPL 1 042 042 04 1 1.9 6 1 6 4.31654676258993 0 0 0 4 Islamabad, Islamabad, Pakistan PK PK08 33.7 73.1667 -2762812 4 Islamabad, Islamabad, Pakistan PK PK08 33.7 73.1667 -2762812 20130203 +34.0,34 19790101 197901 1979 1979.0027 RUS RUSSIA RUS 0 040 040 04 1 1 1 1 1 7.86516853932584 0 0 0 0 Moscow, Moskva, Russia RS RS48 55.7522 37.6156 -2960561 0 Moscow, Moskva, Russia RS RS48 55.7522 37.6156 20130203 +35.0,35 19790101 197901 1979 1979.0027 RUS RUSSIA RUS 0 042 042 04 1 1.9 1 1 1 7.86516853932584 0 0 0 4 Mongol, Khabarovskiy Kray, Russia RS RS30 52.1256 140.381 -2959894 4 Mongol, Khabarovskiy Kray, Russia RS RS30 52.1256 140.381 -2959894 20130203 +36.0,36 19790101 197901 1979 1979.0027 RUS MOSCOW RUS 1 046 046 04 1 7 10 1 10 2.83018867924528 0 0 0 0 Moscow, Moskva, Russia RS RS48 55.7522 37.6156 -2960561 0 Moscow, Moskva, Russia RS RS48 55.7522 37.6156 20130203 +37.0,37 19790101 197901 1979 1979.0027 TMP EAST TIMOR TMP 1 080 080 08 2 5 4 1 4 1.14942528735632 0 0 0 4 Dili, East Timor (general), East Timor TT TT00 -8.55861 125.574 -2676852 4 Dili, East Timor (general), East Timor TT TT00 -8.55861 125.574 9004104 20130203 +38.0,38 19790101 197901 1979 1979.0027 TMP EAST TIMOR TMP 1 190 190 19 4 -10 4 1 4 1.14942528735632 0 0 0 4 Dili, East Timor (general), East Timor TT TT00 -8.55861 125.574 -2676852 4 Dili, East Timor (general), East Timor TT TT00 -8.55861 125.574 9004104 20130203 +39.0,39 19790101 197901 1979 1979.0027 TWN TAIWAN 1 046 046 04 1 7 3 1 3 5.29411764705882 0 0 0 4 Taiwan Strait, Taiwan (general), Taiwan TW TW00 24 119 -2637942 4 Taiwan Strait, Taiwan (general), Taiwan TW TW00 24 119 -2637942 20130203 +40.0,40 19790101 197901 1979 1979.0027 TWN TAIWAN 0 051 051 05 1 3.4 2 1 2 7.73943889068043 0 0 0 4 Chang Tse, Jiayi Xian, Taiwan TW TW07 23.4227 120.153 -2635117 4 Dizhou, Guangxi, China CH CH16 23.0036 106.359 -1903584 20130203 +41.0,41 19790101 197901 1979 1979.0027 TWN TAIWAN 0 051 051 05 1 3.4 1 1 1 7.73943889068043 0 0 0 4 Chang Tse, Jiayi Xian, Taiwan TW TW07 23.4227 120.153 -2635117 4 Chang Tse, Jiayi Xian, Taiwan TW TW07 23.4227 120.153 -2635117 20130203 +42.0,42 19790101 197901 1979 1979.0027 TWN TAIWAN 0 051 051 05 1 3.4 1 1 1 7.73943889068043 0 0 0 4 Fuda, Fujian, China CH CH07 26.7006 118.82 11044526 4 Hsien, Hebei, China CH CH10 38.1822 116.111 -1931868 20130203 +43.0,43 19790101 197901 1979 1979.0027 TWN TAIWAN 0 051 051 05 1 3.4 6 1 6 7.73943889068043 0 0 0 4 Fuda, Fujian, China CH CH07 26.7006 118.82 11044526 4 Fuda, Fujian, China CH CH07 26.7006 118.82 11044526 20130203 +44.0,44 19790101 197901 1979 1979.0027 TWN TAIWAN 1 080 080 08 2 5 7 1 7 10.355486862442 0 0 0 4 Chiang-Kuo, Taidong Xian, Taiwan TW TW24 22.7596 121.164 10039671 4 Chiang-Kuo, Taidong Xian, Taiwan TW TW24 22.7596 121.164 10039671 20130203 +45.0,45 19790101 197901 1979 1979.0027 TWN TAIWAN 1 080 080 08 2 5 2 1 2 10.355486862442 0 0 0 3 Washington, District of Columbia, United States US USDC 38.8951 -77.0364 531871 3 Washington, District of Columbia, United States US USDC 38.8951 -77.0364 531871 20130203 +46.0,46 19790101 197901 1979 1979.0027 TWNGOV TAIWAN GOV 1 043 043 04 1 2.8 5 1 5 9.09090909090909 0 0 0 4 Taipeh, T'ai-pei, Taiwan TW TW03 25.0478 121.532 -2637882 4 Taipeh, T'ai-pei, Taiwan TW TW03 25.0478 121.532 -2637882 20130203 +47.0,47 19790101 197901 1979 1979.0027 UAF TERRORIST UAF 1 043 043 04 1 2.8 10 1 10 1.68067226890756 0 0 0 4 Beitbridge, Limpopo, South Africa SF SF09 -22.2167 29.9833 -1209924 4 Beitbridge, Limpopo, South Africa SF SF09 -22.2167 29.9833 -1209924 20130203 +48.0,48 19790101 197901 1979 1979.0027 USA UNITED STATES USA 0 020 020 02 1 3 4 1 4 4.71698113207547 0 0 0 1 United States US US 38 -97 0 1 United States US US 38 -97 0 20130203,,,, +49.0,49 19790101 197901 1979 1979.0027 USA WASHINGTON USA 0 036 036 03 1 4 7 1 7 6.61921708185053 0 0 0 3 Washington, District of Columbia, United States US USDC 38.8951 -77.0364 531871 3 Washington, District of Columbia, United States US USDC 38.8951 -77.0364 531871 20130203 +50.0,50 19790101 197901 1979 1979.0027 USA UNITED STATES USA 0 040 040 04 1 1 9 1 9 7.69230769230769 0 0 0 2 Ohio, United States US USOH 40.3736 -82.7755 0 2 Ohio, United States US USOH 40.3736 -82.7755 0 20130203,, +51.0,51 19790101 197901 1979 1979.0027 VNM VIETNAM VNM 1 051 051 05 1 3.4 7 1 7 4.3956043956044 0 0 0 1 Vietnam, Republic Of VM VM 16 106 0 4 Atlantic Ocean, , OC OCOC 10 -25 -1506320 20130203, +52.0,52 19790101 197901 1979 1979.0027 VNM HO CHI MINH CITY VNM 0 195 195 19 4 -10 2 1 2 3.30073349633252 0 0 0 4 Neak Luong, Prey Vê, Cambodia CB CB14 11.2756 105.283 -1028941 4 Mekong River, Vietnam (general), Vietnam +53.0,53 19790101 197901 1979 1979.0027 AFR AFRICA AFR 1 040 040 04 1 1 9 1 9 5.52631578947368 0 0 0 0 0 0 0 0 20130203,,,, +54.0,54 19790101 197901 1979 1979.0027 AFR AFRICA AFR 0 080 080 08 2 5 10 1 10 10.9792284866469 1 Nigeria NI NI 10 8 0 0 0 0 1 Nigeria NI NI 10 8 0 20130203,,,, +55.0,55 19790101 197901 1979 1979.0027 AFR AFRICA AFR FRA FRENCH FRA 1 050 050 05 1 3.5 4 1 4 5.52631578947368 0 0 0 0 0 0 0 0 20130203,,,, +56.0,56 19790101 197901 1979 1979.0027 AFR AFRICA AFR FRAGOV FRENCH FRA GOV 1 043 043 04 1 2.8 9 1 9 5.52631578947368 0 0 0 0 0 0 0 0 20130203,,,, +57.0,57 19790101 197901 1979 1979.0027 AFR AFRICA AFR ZMB ZAMBIAN ZMB 1 100 100 10 3 -5 9 1 9 8.75576036866359 0 0 0 0 0 0 0 0 20130203,,,, +58.0,58 19790101 197901 1979 1979.0027 ARG ARGENTINA ARG ARGGOVMIL ARGENTINA ARG GOV MIL 1 172 172 17 4 -5 4 1 4 2.29885057471264 1 Argentina AR AR -34 -64 0 1 Argentina AR AR -34 -64 0 1 Argentina AR AR -34 -64 0 20130203,,,, +59.0,59 19790101 197901 1979 1979.0027 ARG ARGENTINA ARG CHL CHILE CHL 1 172 172 17 4 -5 2 1 2 2.29885057471264 1 Argentina AR AR -34 -64 0 1 Argentina AR AR -34 -64 0 1 Argentina AR AR -34 -64 0 20130203,,,, +60.0,60 19790101 197901 1979 1979.0027 ARG ARGENTINA ARG CHL CHILE CHL 1 172 172 17 4 -5 2 1 2 2.29885057471264 1 Argentina AR AR -34 -64 0 1 Chile CI CI -30 -71 0 1 Argentina AR AR -34 -64 0 20130203,,,, +61.0,61 19790101 197901 1979 1979.0027 ARG ARGENTINA ARG VATGOV VATICAN VAT GOV 1 072 072 07 2 8.3 4 1 4 2.29885057471264 1 Argentina AR AR -34 -64 0 1 Argentina AR AR -34 -64 0 1 Argentina AR AR -34 -64 0 20130203,,,, +62.0,62 19790101 197901 1979 1979.0027 ATH ATHEIST ATH MOSGOV MOSLEM MOS GOV 0 120 120 12 3 -4 2 1 2 7.86516853932584 0 Moscow, Moskva, Russia RS RS48 55.7522 37.6156 -2960561 4 Mecca, Makkah, Saudi Arabia SA SA14 21.4267 39.8261 -3096949 4 Moscow +63.0,63 19790101 197901 1979 1979.0027 ATH ATHEIST ATH RUS RUSSIA RUS 0 120 120 12 3 -4 3 1 3 7.86516853932584 4 Mongol, Khabarovskiy Kray, Russia RS RS30 52.1256 140.381 -2959894 4 Mongol, Khabarovskiy Kray, Russia RS RS30 52.1256 140.381 -2959894 4 Mongol +64.0,64 19790101 197901 1979 1979.0027 ATH ATHEIST ATH RUS RUSSIA RUS 0 120 120 12 3 -4 2 1 2 7.86516853932584 0 Moscow, Moskva, Russia RS RS48 55.7522 37.6156 -2960561 0 Moscow, Moskva, Russia RS RS48 55.7522 37.6156 -2960561 4 Moscow +65.0,65 19790101 197901 1979 1979.0027 BUS EMPLOYER BUS 1 173 173 17 4 -5 5 1 5 5.26315789473684 1 Canada CA CA 60 -95 0 0 0 0 1 Canada CA CA 60 -95 0 20130203,,,, +66.0,66 19790101 197901 1979 1979.0027 BUS EMPLOYER BUS CHNGOV CHINESE CHN GOV 0 111 111 11 3 -2 7 1 7 6.61921708185053 4 Taiwa, Anhui, China CH CH01 31.85 117.683 -1901669 4 Taiwa, Anhui, China CH CH01 31.85 117.683 -1901669 4 Taiwa +67.0,67 19790101 197901 1979 1979.0027 BUS COMPANY BUS USA UNITED STATES USA 0 040 040 04 1 1 4 1 4 5.94594594594595 2 New Jersey, United States US USNJ 40.314 -74.5089 0 2 New Jersey, United States US USNJ 40.314 -74.5089 0 2 New Jersey, United States US USNJ 40.314 -74.5089 0 20130203, +68.0,68 19790101 197901 1979 1979.0027 CAF CENTRAL AFRICAN CAF COD ZAIRE COD 0 046 046 04 1 7 9 1 9 5.52631578947368 0 0 0 0 0 0 0 0 20130203,,,, +69.0,69 19790101 197901 1979 1979.0027 CAF CENTRAL AFRICAN CAF TCD CHAD TCD 0 046 046 04 1 7 9 1 9 5.52631578947368 0 0 0 0 0 0 0 0 20130203,,,, +70.0,70 19790101 197901 1979 1979.0027 CAN CANADA CAN 1 0841 084 08 2 7 4 1 4 5.26315789473684 1 Canada CA CA 60 -95 0 0 0 0 1 Canada CA CA 60 -95 0 20130203,,,, +71.0,71 19790101 197901 1979 1979.0027 CAN CANADA CAN 1 173 173 17 4 -5 4 1 4 5.26315789473684 1 Canada CA CA 60 -95 0 0 0 0 1 Canada CA CA 60 -95 0 20130203,,,, +72.0,72 19790101 197901 1979 1979.0027 CAN CANADA CAN CANBUS CANADA CAN BUS 1 030 030 03 1 4 1 1 1 5.26315789473684 1 Canada CA CA 60 -95 0 1 Canada CA CA 60 -95 0 1 Canada CA CA 60 -95 0 20130203,,,, +73.0,73 19790101 197901 1979 1979.0027 CAN CANADIAN CAN KHM KAMPUCHEA KHM 1 051 051 05 1 3.4 9 1 9 7.51445086705202 0 0 0 0 0 0 0 0 20130203,,,, +74.0,74 19790101 197901 1979 1979.0027 CANBUS CANADA CAN BUS 1 173 173 17 4 -5 4 1 4 5.26315789473684 1 Canada CA CA 60 -95 0 0 0 0 1 Canada CA CA 60 -95 0 20130203,,,, +75.0,75 19790101 197901 1979 1979.0027 CANCRM CANADA CAN CRM 1 081 081 08 2 5 1 1 1 5.26315789473684 1 Canada CA CA 60 -95 0 0 0 0 1 Canada CA CA 60 -95 0 20130203,,,, +76.0,76 19790101 197901 1979 1979.0027 CHL CHILE CHL 0 010 010 01 1 0 2 1 2 5.66318926974665 1 Chile CI CI -30 -71 0 0 0 0 1 Chile CI CI -30 -71 0 20130203,,,, +77.0,77 19790101 197901 1979 1979.0027 CHL CHILE CHL 0 020 020 02 1 3 9 1 9 5.66318926974665 1 Chile CI CI -30 -71 0 0 0 0 1 Chile CI CI -30 -71 0 20130203,,,, +78.0,78 19790101 197901 1979 1979.0027 CHL CHILEAN CHL CHLGOVLAB CHILE CHL GOV LAB 1 036 036 03 1 4 1 1 1 7.25388601036269 1 Chile CI CI -30 -71 0 1 Chile CI CI -30 -71 0 1 Chile CI CI -30 -71 0 20130203,,,, +79.0,79 19790101 197901 1979 1979.0027 CHL CHILEAN CHL CHLGOVLAB CHILE CHL GOV LAB 1 040 040 04 1 1 1 1 1 7.25388601036269 1 Chile CI CI -30 -71 0 1 Chile CI CI -30 -71 0 1 Chile CI CI -30 -71 0 20130203,,,, +80.0,80 19790101 197901 1979 1979.0027 CHL CHILE CHL CHLMIL CHILE CHL MIL 0 010 010 01 1 0 2 1 2 5.66318926974665 1 Chile CI CI -30 -71 0 1 Chile CI CI -30 -71 0 1 Chile CI CI -30 -71 0 20130203,,,, +81.0,81 19790101 197901 1979 1979.0027 CHL CHILE CHL GOV PRESIDENT GOV 0 112 112 11 3 -2 4 1 4 5.66318926974665 1 Chile CI CI -30 -71 0 1 Chile CI CI -30 -71 0 1 Chile CI CI -30 -71 0 20130203,,,, +82.0,82 19790101 197901 1979 1979.0027 CHL CHILEAN CHL GOVLAB LABOR MINIST GOV LAB 1 036 036 03 1 4 1 1 1 7.25388601036269 1 Chile CI CI -30 -71 0 1 Chile CI CI -30 -71 0 1 Chile CI CI -30 -71 0 20130203,,,, +83.0,83 19790101 197901 1979 1979.0027 CHL CHILEAN CHL GOVLAB LABOR MINIST GOV LAB 1 040 040 04 1 1 1 1 1 7.25388601036269 1 Chile CI CI -30 -71 0 1 Chile CI CI -30 -71 0 1 Chile CI CI -30 -71 0 20130203,,,, +84.0,84 19790101 197901 1979 1979.0027 CHL CHILE CHL HUN BUDAPEST HUN 1 040 040 04 1 1 2 1 2 7.40740740740741 1 Italy IT IT 42.8333 12.8333 0 4 Budapest, Budapest, Hungary HU HU05 47.5 19.0833 -850553 4 Budapest, Budapest, Hungary HU HU05 47.5 19.0833 -850558 20130203 +85.0,85 19790101 197901 1979 1979.0027 CHL CHILE CHL HUN BUDAPEST HUN 1 040 040 04 1 1 3 1 3 7.40740740740741 1 Italy IT IT 42.8333 12.8333 0 4 Budapest, Budapest, Hungary HU HU05 47.5 19.0833 -850553 1 Italy IT IT 42.8333 12.8333 0 20130203,, +86.0,86 19790101 197901 1979 1979.0027 CHL CHILE CHL ITA ITALY ITA 1 046 046 04 1 7 2 1 2 7.40740740740741 1 Chile CI CI -30 -71 0 4 Budapest, Budapest, Hungary HU HU05 47.5 19.0833 -850553 4 Budapest, Budapest, Hungary HU HU05 47.5 19.0833 -850558 20130203 +87.0,87 19790101 197901 1979 1979.0027 CHL CHILE CHL ITA ITALIAN ITA 1 046 046 04 1 7 8 1 8 7.40740740740741 1 Chile CI CI -30 -71 0 1 Italy IT IT 42.8333 12.8333 0 4 Budapest, Budapest, Hungary HU HU05 47.5 19.0833 -850558 20130203,, +88.0,88 19790101 197901 1979 1979.0027 CHLGOVLAB CHILE CHL GOV LAB CHL CHILEAN CHL 1 036 036 03 1 4 1 1 1 7.25388601036269 1 Chile CI CI -30 -71 0 1 Chile CI CI -30 -71 0 1 Chile CI CI -30 -71 0 20130203,,,, +89.0,89 19790101 197901 1979 1979.0027 CHLGOVLAB CHILE CHL GOV LAB CHL CHILEAN CHL 1 040 040 04 1 1 1 1 1 7.25388601036269 1 Chile CI CI -30 -71 0 1 Chile CI CI -30 -71 0 1 Chile CI CI -30 -71 0 20130203,,,, +90.0,90 19790101 197901 1979 1979.0027 CHLGOVMIL CHILE CHL GOV MIL GOV JUNTA GOV 1 036 036 03 1 4 9 1 9 5.66318926974665 1 Chile CI CI -30 -71 0 1 Chile CI CI -30 -71 0 1 Chile CI CI -30 -71 0 20130203,,,, +91.0,91 19790101 197901 1979 1979.0027 CHN CHINESE CHN 1 010 010 01 1 0 2 1 2 10.355486862442 4 Peking, Beijing, China CH CH22 39.9289 116.388 -1898541 0 0 0 4 Peking, Beijing, China CH CH22 39.9289 116.388 -1898541 20130203 +92.0,92 19790101 197901 1979 1979.0027 CHN CHINESE CHN 1 010 010 01 1 0 4 1 4 10.355486862442 4 Peking, Beijing, China CH CH22 39.9289 116.388 -1898541 0 0 0 3 Washington, District of Columbia, United States US USDC 38.8951 -77.0364 531871 20130203 +93.0,93 19790101 197901 1979 1979.0027 CHN CHINA CHN 1 042 042 04 1 1.9 2 1 2 7.73943889068043 4 Changgong, Hunan, China CH CH11 29.0811 109.892 10466659 0 0 0 4 Changgong, Hunan, China CH CH11 29.0811 109.892 10466659 20130203 +94.0,94 19790101 197901 1979 1979.0027 CHN CHINA CHN 0 110 110 11 3 -2 3 1 3 6.61921708185053 4 Peng Chen, Fujian, China CH CH07 25.9383 119.651 444859 0 0 0 4 Peng Chen, Fujian, China CH CH07 25.9383 119.651 444859 20130203 +95.0,95 19790101 197901 1979 1979.0027 CHN CHINA CHN 1 154 154 15 4 -7.2 2 1 2 3.30073349633252 4 Neak Luong, Prey Vê, Cambodia CB CB14 11.2756 105.283 -1028941 0 0 0 4 Mekong River, Vietnam (general), Vietnam +96.0,96 19790101 197901 1979 1979.0027 CHN CHINA CHN 1 154 154 15 4 -7.2 6 1 6 3.30073349633252 4 Mekong River, Vietnam (general), Vietnam, Republic Of VM VM00 10.25 105.917 -3721425 0 0 0 4 Mekong River, Vietnam (general) +97.0,97 19790101 197901 1979 1979.0027 CHN CHINA CHN 1 154 154 15 4 -7.2 1 1 1 3.30073349633252 1 Russia RS RS 60 100 0 0 0 0 4 Mekong River, Vietnam (general), Vietnam, Republic Of VM VM00 10.25 105.917 -3721425 20130203, +98.0,98 19790101 197901 1979 1979.0027 CHN CHINESE CHN 1 180 180 18 4 -9 2 1 2 2.08333333333333 4 Cao Bang, Fujian, China CH CH07 26.5768 117.742 10808006 0 0 0 4 Dam Thuy, Qu?ng Ninh, Vietnam diff --git a/notebooks/paws.ipynb b/notebooks/paws.ipynb new file mode 100644 index 0000000..528d465 --- /dev/null +++ b/notebooks/paws.ipynb @@ -0,0 +1,169 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2017-10-04 09:54:16,937 - Paws - INFO - Attempting download: gdelt-open-data, events/1979.csv, 1979.csv\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "DONE\n" + ] + } + ], + "source": [ + "#Add root checkout to path\n", + "import sys\n", + "sys.path.append(\"..\")\n", + "import pandas as pd\n", + "from paws.s3 import (boto_s3_resource, download)\n", + "resource = boto_s3_resource()\n", + "# csv_file = download(resource=resource, \n", + "# bucket=\"gdelt-open-data\", \n", + "# key=\"events/1979.csv\",\n", + "# filename=\"1979.csv\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
01234
count10163635956
unique10034463636
top0District of ColumbiaUnited States\\tUS\\tUSDC\\t38.8951\\t-77.0364\\t5...District of ColumbiaUnited States\\tUS\\tUSDC\\t38.8951\\t-77.0364\\t5...
freq26677
\n", + "
" + ], + "text/plain": [ + " 0 1 \\\n", + "count 101 63 \n", + "unique 100 34 \n", + "top 0 District of Columbia \n", + "freq 2 6 \n", + "\n", + " 2 \\\n", + "count 63 \n", + "unique 46 \n", + "top United States\\tUS\\tUSDC\\t38.8951\\t-77.0364\\t5... \n", + "freq 6 \n", + "\n", + " 3 \\\n", + "count 59 \n", + "unique 36 \n", + "top District of Columbia \n", + "freq 7 \n", + "\n", + " 4 \n", + "count 56 \n", + "unique 36 \n", + "top United States\\tUS\\tUSDC\\t38.8951\\t-77.0364\\t5... \n", + "freq 7 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Load the file, truncate it and save.\n", + "# df = pd.read_csv(csv_file, names=range(5))\n", + "# df = df.head(100)\n", + "# df.to_csv(csv_file)\n", + "df = pd.read_csv(\"1979.csv\", names=range(5))\n", + "df.head()\n", + "df.describe()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/paws/__init__.py b/paws/__init__.py new file mode 100644 index 0000000..a53a3a5 --- /dev/null +++ b/paws/__init__.py @@ -0,0 +1 @@ +__version__ = "0" diff --git a/paws/s3.py b/paws/s3.py new file mode 100644 index 0000000..57a39b1 --- /dev/null +++ b/paws/s3.py @@ -0,0 +1,27 @@ +""" +S3 methods for PAWS library +""" + +import boto3 +from sensible.loginit import logger + +log = logger("Paws") + + +def boto_s3_resource(): + """Create boto3 S3 Resource""" + + return boto3.resource("s3") + +def download(bucket, key, filename, resource=None): + """Downloads file from s3""" + + if resource is None: + resource = boto_s3_resource() + log_msg = "Attempting download: {bucket}, {key}, {filename}".\ + format(bucket=bucket, key=key, filename=filename) + log.info(log_msg) + resource.meta.client.download_file(bucket, key, filename) + return filename + + diff --git a/pcli.py b/pcli.py new file mode 100755 index 0000000..ba3cc78 --- /dev/null +++ b/pcli.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python + +""" +Command-line Tool for Working with PAWS library +""" +import sys + +import click +import paws +from paws import s3 + +@click.version_option(paws.__version__) +@click.group() +def cli(): + """PAWS Tool""" + +@cli.command("download") +@click.option("--bucket", help="Name of S3 Bucket") +@click.option("--key", help="Name of S3 Key") +@click.option("--filename", help="Name of file") +def download(bucket, key, filename): + """Downloads an S3 file + ./paws-cli.py --bucket gdelt-open-data --key events/1979.csv --filename 1979.csv + """ + + if not bucket and not key and not filename: + click.echo("--bucket and --key and --filename are required") + sys.exit(1) + click.echo("Downloading s3 file with: bucket-{bucket},key{key},filename{filename}".\ + format(bucket=bucket, key=key, filename=filename)) + res = s3.download(bucket, key,filename) + click.echo(res) + +if __name__ == "__main__": + cli() + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..7db4059 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,13 @@ +awscli +boto3 +moto +pytest +pylint +sensible +jupyter +pytest-cov +pandas +nbval +click + + diff --git a/tests/test_paws_cli.py b/tests/test_paws_cli.py new file mode 100644 index 0000000..276cdaf --- /dev/null +++ b/tests/test_paws_cli.py @@ -0,0 +1,17 @@ +import pytest +import click +from click.testing import CliRunner + +from pcli import cli +from paws import __version__ + +@pytest.fixture +def runner(): + cli_runner = CliRunner() + yield cli_runner + + +def test_cli(runner): + result = runner.invoke(cli, ['--version']) + assert __version__ in result.output + diff --git a/tests/test_s3.py b/tests/test_s3.py new file mode 100644 index 0000000..0a52885 --- /dev/null +++ b/tests/test_s3.py @@ -0,0 +1,26 @@ +import pytest +import boto3 +from moto import mock_s3 +from paws.s3 import download + +@pytest.yield_fixture(scope="session") +def mock_boto(): + """Setup Mock Objects""" + + mock_s3().start() + output_str = 'Something' + resource = boto3.resource('s3') + resource.create_bucket(Bucket="gdelt-open-data") + resource.Bucket("gdelt-open-data").put_object(Key="events/1979.csv", + Body=output_str) + yield resource + mock_s3().stop() + +def test_download(mock_boto): + """Test s3 download function""" + + resource = mock_boto + res = download(resource=resource, bucket="gdelt-open-data", + key="events/1979.csv",filename="1979.csv") + assert res == "1979.csv" +