From 3763880002d8cb4fe65b37742738cc4cf68fcc4d Mon Sep 17 00:00:00 2001 From: Chaitanya Date: Sun, 17 Mar 2024 01:42:43 +0530 Subject: [PATCH] Pandas lecture using Pyscript --- _sources/index_en.rst | 1 + _sources/index_es.rst | 1 + _sources/lectures/TWP28/TWP28.rst | 198 ++++++++++++++++++++++++++ _sources/lectures/TWP28/TWP28_en.rst | 199 +++++++++++++++++++++++++++ 4 files changed, 399 insertions(+) create mode 100644 _sources/lectures/TWP28/TWP28.rst create mode 100644 _sources/lectures/TWP28/TWP28_en.rst diff --git a/_sources/index_en.rst b/_sources/index_en.rst index 07582b56c2..1b3fbfe800 100644 --- a/_sources/index_en.rst +++ b/_sources/index_en.rst @@ -18,6 +18,7 @@ Contents: lectures/TWP20/toctree_en lectures/TWP23/toctree_en lectures/TWP25/toctree_en + lectures/TWP28/TWP28_en lectures/TWP30/toctree_en lectures/TWP33/toctree_en lectures/TWP35/toctree_en diff --git a/_sources/index_es.rst b/_sources/index_es.rst index ecf1551c24..1fb189adef 100644 --- a/_sources/index_es.rst +++ b/_sources/index_es.rst @@ -18,6 +18,7 @@ Contenidos: lectures/TWP20/toctree lectures/TWP23/toctree lectures/TWP25/toctree + lectures/TWP28/TWP28 lectures/TWP30/toctree lectures/TWP33/toctree lectures/TWP35/toctree diff --git a/_sources/lectures/TWP28/TWP28.rst b/_sources/lectures/TWP28/TWP28.rst new file mode 100644 index 0000000000..ff6c3d8dc2 --- /dev/null +++ b/_sources/lectures/TWP28/TWP28.rst @@ -0,0 +1,198 @@ +=============== +Poder de Pandas +=============== + +.. image:: ../img/TWP10_001.jpeg + :height: 14.925cm + :width: 9.258cm + :align: center + :alt: + +Introducción +------------ +Esta conferencia trata sobre una de las herramientas más poderosas en el ecosistema de Python para análisis de datos: Pandas. En esta conferencia, nos embarcaremos en un viaje para explorar los entresijos de Pandas, comprendiendo sus capacidades para manejar, manipular y analizar datos de manera efectiva. + +Entendiendo los Conceptos Básicos de Pandas +-------------------------------------------- +- Pandas, construido sobre NumPy, proporciona estructuras de datos y funciones para trabajar con datos estructurados. +- Componentes clave: Serie (arreglo unidimensional etiquetado) y DataFrame (estructura de datos etiquetada bidimensional). +- Importando Pandas y cargando datos: ``import pandas as pd`` y ``pd.read_csv()``. + +Análisis Exploratorio de Datos (EDA) con Pandas +----------------------------------------------- +- Verificando las dimensiones de los datos: ``df.shape``. +- Obteniendo estadísticas resumidas: ``df.describe()``. +- Examinando tipos de datos y valores faltantes: ``df.info()``. + +Limpieza y Transformación de Datos +---------------------------------- +- Renombrando columnas para mayor claridad: ``df.rename(columns={'old_name': 'new_name'}, inplace=True)``. +- Manejando datos faltantes: ``df.dropna()``, ``df.fillna()``. +- Conversión de tipos de datos: ``df.astype()``. + +Manipulación y Agregación de Datos +----------------------------------- +- Seleccionando columnas y filas: ``df['column_name']``, ``df.loc[]``, ``df.iloc[]``. +- Filtrando datos: ``df.query()``. +- Agrupación y agregación de datos: ``df.groupby().agg()``. + +Visualización de Datos con Pandas +--------------------------------- +- Utilizando la integración de Matplotlib y Seaborn para visualizaciones. +- Gráficos básicos: ``df.plot()``. +- Gráficos de barras, histogramas, diagramas de caja: ``df.plot(kind='bar')``, ``df.plot(kind='hist')``, ``df.plot(kind='box')``. + +Técnicas Avanzadas de Análisis de Datos +---------------------------------------- +- Análisis de series temporales: Manejo de datos de fecha y hora con Pandas. +- Fusión y unión de conjuntos de datos: ``pd.merge()``, ``pd.concat()``. +- Manejo de duplicados: ``df.drop_duplicates()``. + +Aplicaciones del Mundo Real y Estudios de Caso +---------------------------------------------- +- Análisis de datos de atención médica: Exploración de tiempos de espera de pacientes, distribución de servicios y tendencias geográficas. +- Análisis de datos financieros: Análisis del mercado de valores, gestión de carteras. +- Análisis de datos en redes sociales: Análisis de sentimientos, detección de tendencias. + +Mejores Prácticas y Optimización del Rendimiento +------------------------------------------------ +- Carga y almacenamiento eficientes de datos: Utilización de fragmentación, optimización de tipos de datos. +- Operaciones vectorizadas: Aprovechamiento de operaciones vectorizadas de Pandas para cálculos más rápidos. +- Gestión de memoria: Reducción del uso de memoria para conjuntos de datos grandes. + +Ejemplo +------- + +- Aquí hay algo que puedes desarrollar usando Pandas. Este ejemplo utiliza datos de productos de helado de Beijing. Los datos se leen de un archivo CSV y luego se visualizan usando matplotlib. El usuario puede seleccionar un sabor de helado y el gráfico mostrará la calificación del sabor seleccionado. + +.. code:: python + + import js + import pandas as pd + import matplotlib.pyplot as plt + + from pyodide.http import open_url + from pyodide.ffi import create_proxy + + url = ( + "https://raw.githubusercontent.com/Cheukting/pyscript-ice-cream/main/bj-products.csv" + ) + ice_data = pd.read_csv(open_url(url)) + + current_selected = [] + flavour_elements = js.document.getElementsByName("flavour") + + def plot(data): + plt.rcParams["figure.figsize"] = (22,20) + fig, ax = plt.subplots() + bars = ax.barh(data["name"], data["rating"], height=0.7) + ax.bar_label(bars) + plt.title("Rating of ice cream flavours of your choice") + display(fig, target="graph-area", append=False) + + def select_flavour(event): + for ele in flavour_elements: + if ele.checked: + current_selected = ele.value + break + if current_selected == "ALL": + plot(ice_data) + else: + filter = ice_data.apply(lambda x: ele.value in x["ingredients"], axis=1) + plot(ice_data[filter]) + + ele_proxy = create_proxy(select_flavour) + + for ele in flavour_elements: + if ele.value == "ALL": + ele.checked = True + current_selected = ele.value + ele.addEventListener("change", ele_proxy) + + plot(ice_data) + + +.. raw:: html + +
+ + + Selector de Helado + + + + + + + + packages = ["matplotlib", "pandas"] + + + + + import js + import pandas as pd + import matplotlib.pyplot as plt + + from pyodide.http import open_url + from pyodide.ffi import create_proxy + + url = ( + "https://raw.githubusercontent.com/Cheukting/pyscript-ice-cream/main/bj-products.csv" + ) + ice_data = pd.read_csv(open_url(url)) + + current_selected = [] + flavour_elements = js.document.getElementsByName("flavour") + + def plot(data): + plt.rcParams["figure.figsize"] = (22,20) + fig, ax = plt.subplots() + bars = ax.barh(data["name"], data["rating"], height=0.7) + ax.bar_label(bars) + plt.title("Rating of ice cream flavours of your choice") + display(fig, target="graph-area", append=False) + + def select_flavour(event): + for ele in flavour_elements: + if ele.checked: + current_selected = ele.value + break + if current_selected == "ALL": + plot(ice_data) + else: + filter = ice_data.apply(lambda x: ele.value in x["ingredients"], axis=1) + plot(ice_data[filter]) + + ele_proxy = create_proxy(select_flavour) + + for ele in flavour_elements: + if ele.value == "ALL": + ele.checked = True + current_selected = ele.value + ele.addEventListener("change", ele_proxy) + + plot(ice_data) + + + +
+ Seleccione su sabor de 🍨:
+ + + + + + + + + + + + +
+ +
+ + + diff --git a/_sources/lectures/TWP28/TWP28_en.rst b/_sources/lectures/TWP28/TWP28_en.rst new file mode 100644 index 0000000000..e1ebbf6204 --- /dev/null +++ b/_sources/lectures/TWP28/TWP28_en.rst @@ -0,0 +1,199 @@ +=============== +Power of Pandas +=============== + +.. image:: ../img/TWP10_001.jpeg + :height: 14.925cm + :width: 9.258cm + :align: center + :alt: + +Introduction +------------ +This lecture is on one of the most powerful tools in the Python ecosystem for data analysis - Pandas. In this lecture, we'll embark on a journey to explore the ins and outs of Pandas, understanding its capabilities in handling, manipulating, and analyzing data effectively. + +Understanding Pandas Basics +--------------------------- +- Pandas, built on top of NumPy, provides data structures and functions to work with structured data. +- Key components: Series (1-dimensional labeled array) and DataFrame (2-dimensional labeled data structure). +- Importing Pandas and loading data: ``import pandas as pd`` and ``pd.read_csv()``. + +Exploratory Data Analysis (EDA) with Pandas +------------------------------------------- +- Checking data dimensions: ``df.shape``. +- Getting summary statistics: ``df.describe()``. +- Examining data types and missing values: ``df.info()``. + +Data Cleaning and Transformation +-------------------------------- +- Renaming columns for clarity: ``df.rename(columns={'old_name': 'new_name'}, inplace=True)``. +- Handling missing data: ``df.dropna()``, ``df.fillna()``. +- Data type conversion: ``df.astype()``. + +Data Manipulation and Aggregation +--------------------------------- +- Selecting columns and rows: ``df['column_name']``, ``df.loc[]``, ``df.iloc[]``. +- Filtering data: ``df.query()``. +- Grouping and aggregating data: ``df.groupby().agg()``. + +Data Visualization with Pandas +------------------------------ +- Utilizing Matplotlib and Seaborn integration for visualizations. +- Basic plots: ``df.plot()``. +- Bar plots, histograms, box plots: ``df.plot(kind='bar')``, ``df.plot(kind='hist')``, ``df.plot(kind='box')``. + +Advanced Data Analysis Techniques +--------------------------------- +- Time series analysis: Handling datetime data with Pandas. +- Merging and joining datasets: ``pd.merge()``, ``pd.concat()``. +- Handling duplicates: ``df.drop_duplicates()``. + +Real-world Applications and Case Studies +---------------------------------------- +- Analyzing healthcare data: Exploring patient wait times, service distribution, and geographical trends. +- Financial data analysis: Stock market analysis, portfolio management. +- Social media data analysis: Sentiment analysis, trend detection. + +Best Practices and Performance Optimization +------------------------------------------- +- Efficient data loading and storage: Utilizing chunking, optimizing data types. +- Vectorized operations: Leveraging Pandas' vectorized operations for faster computations. +- Memory management: Reducing memory usage for large datasets. + +Example +------- + +- Here is something you can develop using panda. This example uses the data of ice cream products from Beijing. The data is read from a CSV file and then visualized using matplotlib. The user can select a flavour of ice cream and the graph will display the rating of the selected flavour. + +.. code:: python + + import js + import pandas as pd + import matplotlib.pyplot as plt + + from pyodide.http import open_url + from pyodide.ffi import create_proxy + + url = ( + "https://raw.githubusercontent.com/Cheukting/pyscript-ice-cream/main/bj-products.csv" + ) + ice_data = pd.read_csv(open_url(url)) + + current_selected = [] + flavour_elements = js.document.getElementsByName("flavour") + + def plot(data): + plt.rcParams["figure.figsize"] = (22,20) + fig, ax = plt.subplots() + bars = ax.barh(data["name"], data["rating"], height=0.7) + ax.bar_label(bars) + plt.title("Rating of ice cream flavours of your choice") + display(fig, target="graph-area", append=False) + + def select_flavour(event): + for ele in flavour_elements: + if ele.checked: + current_selected = ele.value + break + if current_selected == "ALL": + plot(ice_data) + else: + filter = ice_data.apply(lambda x: ele.value in x["ingredients"], axis=1) + plot(ice_data[filter]) + + ele_proxy = create_proxy(select_flavour) + + for ele in flavour_elements: + if ele.value == "ALL": + ele.checked = True + current_selected = ele.value + ele.addEventListener("change", ele_proxy) + + plot(ice_data) + + +.. raw:: html + +
+ + + Ice Cream Picker + + + + + + + + packages = ["matplotlib", "pandas"] + + + + + import js + import pandas as pd + import matplotlib.pyplot as plt + + from pyodide.http import open_url + from pyodide.ffi import create_proxy + + url = ( + "https://raw.githubusercontent.com/Cheukting/pyscript-ice-cream/main/bj-products.csv" + ) + ice_data = pd.read_csv(open_url(url)) + + current_selected = [] + flavour_elements = js.document.getElementsByName("flavour") + + def plot(data): + plt.rcParams["figure.figsize"] = (22,20) + fig, ax = plt.subplots() + bars = ax.barh(data["name"], data["rating"], height=0.7) + ax.bar_label(bars) + plt.title("Rating of ice cream flavours of your choice") + display(fig, target="graph-area", append=False) + + def select_flavour(event): + for ele in flavour_elements: + if ele.checked: + current_selected = ele.value + break + if current_selected == "ALL": + plot(ice_data) + else: + filter = ice_data.apply(lambda x: ele.value in x["ingredients"], axis=1) + plot(ice_data[filter]) + + ele_proxy = create_proxy(select_flavour) + + for ele in flavour_elements: + if ele.value == "ALL": + ele.checked = True + current_selected = ele.value + ele.addEventListener("change", ele_proxy) + + plot(ice_data) + + + +
+ Select your 🍨 flavour:
+ + + + + + + + + + + + +
+ +
+ + + +