-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Adicionar notebook, relatorio e csv do ano de 2012
- Loading branch information
1 parent
0df80f9
commit c0de8f1
Showing
3 changed files
with
103,426 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,221 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 12, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"9565483\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"import dask as dask\n", | ||
"import dask.dataframe as dd\n", | ||
"\n", | ||
"\n", | ||
"#add csv to dataframe\n", | ||
"dataframe = dd.read_csv('DM_ALUNO.CSV', delimiter='|', encoding='ISO-8859-1', assume_missing=True, usecols=['CO_COR_RACA_ALUNO',\n", | ||
" 'IN_SEXO_ALUNO',\n", | ||
" 'NU_ANO_ALUNO_NASC',\n", | ||
" 'IN_RESERVA_VAGAS',\n", | ||
" 'IN_FINANC_ESTUDANTIL',\n", | ||
" 'IN_RESERVA_ETNICO',\n", | ||
" 'IN_ING_VESTIBULAR',\n", | ||
" 'IN_ING_ENEM',\n", | ||
" 'IN_ING_OUTRO_TIPO_SELECAO',\n", | ||
" 'IN_ING_CONVENIO_PECG',\n", | ||
" 'IN_ING_OUTRA_FORMA',\n", | ||
" 'IN_RESERVA_ETNICO',\n", | ||
" 'IN_RESERVA_DEFICIENCIA',\n", | ||
" 'IN_RESERVA_ENSINO_PUBLICO',\n", | ||
" 'IN_RESERVA_RENDA_FAMILIAR',\n", | ||
" 'IN_RESERVA_OUTROS',\n", | ||
" 'IN_FIN_REEMB_FIES',\n", | ||
" 'IN_FIN_REEMB_ESTADUAL',\n", | ||
" 'IN_FIN_REEMB_MUNICIPAL',\n", | ||
" 'IN_FIN_REEMB_PROG_IES',\n", | ||
" 'IN_FIN_REEMB_ENT_EXTERNA',\n", | ||
" 'IN_FIN_REEMB_OUTRA',\n", | ||
" 'IN_FIN_NAOREEMB_PROUNI_INTEGR',\n", | ||
" 'IN_FIN_NAOREEMB_PROUNI_PARCIAL',\n", | ||
" 'IN_FIN_NAOREEMB_ESTADUAL',\n", | ||
" 'IN_FIN_NAOREEMB_MUNICIPAL',\n", | ||
" 'IN_FIN_NAOREEMB_PROG_IES',\n", | ||
" 'IN_FIN_NAOREEMB_ENT_EXTERNA',\n", | ||
" 'ANO_INGRESSO']); \n", | ||
"\n", | ||
"# print(len(dataframe))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 21, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"95658\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"# generate sample from original dataframe\n", | ||
"sample = dataframe.sample(frac=0.01);\n", | ||
"# print(len(sample))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 15, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
" CO_COR_RACA_ALUNO IN_SEXO_ALUNO NU_ANO_ALUNO_NASC IN_RESERVA_VAGAS \\\n", | ||
"50527 6.0 1.0 1972.0 0.0 \n", | ||
"22416 0.0 1.0 1994.0 0.0 \n", | ||
"54050 1.0 0.0 1982.0 0.0 \n", | ||
"37050 0.0 1.0 1986.0 0.0 \n", | ||
"76654 1.0 0.0 1987.0 0.0 \n", | ||
"\n", | ||
" IN_FINANC_ESTUDANTIL IN_ING_VESTIBULAR IN_ING_ENEM \\\n", | ||
"50527 0.0 1.0 0.0 \n", | ||
"22416 1.0 1.0 1.0 \n", | ||
"54050 0.0 1.0 0.0 \n", | ||
"37050 1.0 1.0 0.0 \n", | ||
"76654 0.0 1.0 0.0 \n", | ||
"\n", | ||
" IN_ING_OUTRO_TIPO_SELECAO IN_ING_CONVENIO_PECG IN_ING_OUTRA_FORMA \\\n", | ||
"50527 0.0 0.0 0.0 \n", | ||
"22416 0.0 0.0 0.0 \n", | ||
"54050 0.0 0.0 0.0 \n", | ||
"37050 0.0 0.0 0.0 \n", | ||
"76654 0.0 NaN 0.0 \n", | ||
"\n", | ||
" ... IN_FIN_REEMB_PROG_IES IN_FIN_REEMB_ENT_EXTERNA \\\n", | ||
"50527 ... NaN NaN \n", | ||
"22416 ... 0.0 0.0 \n", | ||
"54050 ... NaN NaN \n", | ||
"37050 ... 0.0 0.0 \n", | ||
"76654 ... NaN NaN \n", | ||
"\n", | ||
" IN_FIN_REEMB_OUTRA IN_FIN_NAOREEMB_PROUNI_INTEGR \\\n", | ||
"50527 NaN NaN \n", | ||
"22416 0.0 0.0 \n", | ||
"54050 NaN NaN \n", | ||
"37050 0.0 0.0 \n", | ||
"76654 NaN NaN \n", | ||
"\n", | ||
" IN_FIN_NAOREEMB_PROUNI_PARCIAL IN_FIN_NAOREEMB_ESTADUAL \\\n", | ||
"50527 NaN NaN \n", | ||
"22416 0.0 0.0 \n", | ||
"54050 NaN NaN \n", | ||
"37050 0.0 0.0 \n", | ||
"76654 NaN NaN \n", | ||
"\n", | ||
" IN_FIN_NAOREEMB_MUNICIPAL IN_FIN_NAOREEMB_PROG_IES \\\n", | ||
"50527 NaN NaN \n", | ||
"22416 1.0 1.0 \n", | ||
"54050 NaN NaN \n", | ||
"37050 0.0 1.0 \n", | ||
"76654 NaN NaN \n", | ||
"\n", | ||
" IN_FIN_NAOREEMB_ENT_EXTERNA ANO_INGRESSO \n", | ||
"50527 NaN 2012.0 \n", | ||
"22416 0.0 2012.0 \n", | ||
"54050 NaN 2011.0 \n", | ||
"37050 0.0 2008.0 \n", | ||
"76654 NaN 2010.0 \n", | ||
"\n", | ||
"[5 rows x 28 columns]\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"#check sample head\n", | ||
"print(sample.head())" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 16, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import pandas_profiling as pf" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 19, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"#generate report\n", | ||
"report_2012 = pf.ProfileReport(sample.compute());" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 20, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"#export report to html\n", | ||
"report_2012.to_file(outputfile=\"report_2012.html\");" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 24, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"ename": "TypeError", | ||
"evalue": "to_csv() got an unexpected keyword argument 'delimiter'", | ||
"output_type": "error", | ||
"traceback": [ | ||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | ||
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", | ||
"\u001b[0;32m<ipython-input-24-e56bb1a5934b>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m#export sample to csv\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0msample\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcompute\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'sample_2012.csv'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdelimiter\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'|'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencoding\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'ISO-8859-1'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m;\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", | ||
"\u001b[0;31mTypeError\u001b[0m: to_csv() got an unexpected keyword argument 'delimiter'" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"#export sample to csv\n", | ||
"sample.compute().to_csv('sample_2012.csv');" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.7.0" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
Oops, something went wrong.