-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit e1ec954
Showing
11 changed files
with
577 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
setup: | ||
python3 -m venv ~/.pragai-aws | ||
|
||
install: | ||
pip install -r requirements.txt | ||
|
||
test: | ||
PYTHONPATH=. && pytest -vv --cov=paws tests/*.py | ||
PYTHONPATH=. && py.test --nbval-lax notebooks/*.ipynb | ||
cov=pli | ||
|
||
|
||
lint: | ||
pylint --disable=R,C paws | ||
|
||
all: | ||
setup install test lint | ||
|
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,169 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
"2017-10-04 09:54:16,937 - Paws - INFO - Attempting download: gdelt-open-data, events/1979.csv, 1979.csv\n" | ||
] | ||
}, | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"DONE\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"#Add root checkout to path\n", | ||
"import sys\n", | ||
"sys.path.append(\"..\")\n", | ||
"import pandas as pd\n", | ||
"from paws.s3 import (boto_s3_resource, download)\n", | ||
"resource = boto_s3_resource()\n", | ||
"# csv_file = download(resource=resource, \n", | ||
"# bucket=\"gdelt-open-data\", \n", | ||
"# key=\"events/1979.csv\",\n", | ||
"# filename=\"1979.csv\")\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/html": [ | ||
"<div>\n", | ||
"<style>\n", | ||
" .dataframe thead tr:only-child th {\n", | ||
" text-align: right;\n", | ||
" }\n", | ||
"\n", | ||
" .dataframe thead th {\n", | ||
" text-align: left;\n", | ||
" }\n", | ||
"\n", | ||
" .dataframe tbody tr th {\n", | ||
" vertical-align: top;\n", | ||
" }\n", | ||
"</style>\n", | ||
"<table border=\"1\" class=\"dataframe\">\n", | ||
" <thead>\n", | ||
" <tr style=\"text-align: right;\">\n", | ||
" <th></th>\n", | ||
" <th>0</th>\n", | ||
" <th>1</th>\n", | ||
" <th>2</th>\n", | ||
" <th>3</th>\n", | ||
" <th>4</th>\n", | ||
" </tr>\n", | ||
" </thead>\n", | ||
" <tbody>\n", | ||
" <tr>\n", | ||
" <th>count</th>\n", | ||
" <td>101</td>\n", | ||
" <td>63</td>\n", | ||
" <td>63</td>\n", | ||
" <td>59</td>\n", | ||
" <td>56</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>unique</th>\n", | ||
" <td>100</td>\n", | ||
" <td>34</td>\n", | ||
" <td>46</td>\n", | ||
" <td>36</td>\n", | ||
" <td>36</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>top</th>\n", | ||
" <td>0</td>\n", | ||
" <td>District of Columbia</td>\n", | ||
" <td>United States\\tUS\\tUSDC\\t38.8951\\t-77.0364\\t5...</td>\n", | ||
" <td>District of Columbia</td>\n", | ||
" <td>United States\\tUS\\tUSDC\\t38.8951\\t-77.0364\\t5...</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>freq</th>\n", | ||
" <td>2</td>\n", | ||
" <td>6</td>\n", | ||
" <td>6</td>\n", | ||
" <td>7</td>\n", | ||
" <td>7</td>\n", | ||
" </tr>\n", | ||
" </tbody>\n", | ||
"</table>\n", | ||
"</div>" | ||
], | ||
"text/plain": [ | ||
" 0 1 \\\n", | ||
"count 101 63 \n", | ||
"unique 100 34 \n", | ||
"top 0 District of Columbia \n", | ||
"freq 2 6 \n", | ||
"\n", | ||
" 2 \\\n", | ||
"count 63 \n", | ||
"unique 46 \n", | ||
"top United States\\tUS\\tUSDC\\t38.8951\\t-77.0364\\t5... \n", | ||
"freq 6 \n", | ||
"\n", | ||
" 3 \\\n", | ||
"count 59 \n", | ||
"unique 36 \n", | ||
"top District of Columbia \n", | ||
"freq 7 \n", | ||
"\n", | ||
" 4 \n", | ||
"count 56 \n", | ||
"unique 36 \n", | ||
"top United States\\tUS\\tUSDC\\t38.8951\\t-77.0364\\t5... \n", | ||
"freq 7 " | ||
] | ||
}, | ||
"execution_count": 4, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"#Load the file, truncate it and save.\n", | ||
"# df = pd.read_csv(csv_file, names=range(5))\n", | ||
"# df = df.head(100)\n", | ||
"# df.to_csv(csv_file)\n", | ||
"df = pd.read_csv(\"1979.csv\", names=range(5))\n", | ||
"df.head()\n", | ||
"df.describe()" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.6.2" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
Oops, something went wrong.