From 3763880002d8cb4fe65b37742738cc4cf68fcc4d Mon Sep 17 00:00:00 2001
From: Chaitanya <ct765@snu.edu.in>
Date: Sun, 17 Mar 2024 01:42:43 +0530
Subject: [PATCH] Pandas lecture using Pyscript

---
 _sources/index_en.rst                |   1 +
 _sources/index_es.rst                |   1 +
 _sources/lectures/TWP28/TWP28.rst    | 198 ++++++++++++++++++++++++++
 _sources/lectures/TWP28/TWP28_en.rst | 199 +++++++++++++++++++++++++++
 4 files changed, 399 insertions(+)
 create mode 100644 _sources/lectures/TWP28/TWP28.rst
 create mode 100644 _sources/lectures/TWP28/TWP28_en.rst

diff --git a/_sources/index_en.rst b/_sources/index_en.rst
index 07582b56c2..1b3fbfe800 100644
--- a/_sources/index_en.rst
+++ b/_sources/index_en.rst
@@ -18,6 +18,7 @@ Contents:
    lectures/TWP20/toctree_en
    lectures/TWP23/toctree_en
    lectures/TWP25/toctree_en
+   lectures/TWP28/TWP28_en
    lectures/TWP30/toctree_en
    lectures/TWP33/toctree_en
    lectures/TWP35/toctree_en
diff --git a/_sources/index_es.rst b/_sources/index_es.rst
index ecf1551c24..1fb189adef 100644
--- a/_sources/index_es.rst
+++ b/_sources/index_es.rst
@@ -18,6 +18,7 @@ Contenidos:
    lectures/TWP20/toctree
    lectures/TWP23/toctree
    lectures/TWP25/toctree
+   lectures/TWP28/TWP28
    lectures/TWP30/toctree
    lectures/TWP33/toctree
    lectures/TWP35/toctree
diff --git a/_sources/lectures/TWP28/TWP28.rst b/_sources/lectures/TWP28/TWP28.rst
new file mode 100644
index 0000000000..ff6c3d8dc2
--- /dev/null
+++ b/_sources/lectures/TWP28/TWP28.rst
@@ -0,0 +1,198 @@
+===============
+Poder de Pandas
+===============
+
+.. image:: ../img/TWP10_001.jpeg
+    :height: 14.925cm
+    :width: 9.258cm
+    :align: center
+    :alt:
+    
+Introducción
+------------
+Esta conferencia trata sobre una de las herramientas más poderosas en el ecosistema de Python para análisis de datos: Pandas. En esta conferencia, nos embarcaremos en un viaje para explorar los entresijos de Pandas, comprendiendo sus capacidades para manejar, manipular y analizar datos de manera efectiva.
+
+Entendiendo los Conceptos Básicos de Pandas
+--------------------------------------------
+- Pandas, construido sobre NumPy, proporciona estructuras de datos y funciones para trabajar con datos estructurados.
+- Componentes clave: Serie (arreglo unidimensional etiquetado) y DataFrame (estructura de datos etiquetada bidimensional).
+- Importando Pandas y cargando datos: ``import pandas as pd`` y ``pd.read_csv()``.
+
+Análisis Exploratorio de Datos (EDA) con Pandas
+-----------------------------------------------
+- Verificando las dimensiones de los datos: ``df.shape``.
+- Obteniendo estadísticas resumidas: ``df.describe()``.
+- Examinando tipos de datos y valores faltantes: ``df.info()``.
+
+Limpieza y Transformación de Datos
+----------------------------------
+- Renombrando columnas para mayor claridad: ``df.rename(columns={'old_name': 'new_name'}, inplace=True)``.
+- Manejando datos faltantes: ``df.dropna()``, ``df.fillna()``.
+- Conversión de tipos de datos: ``df.astype()``.
+
+Manipulación y Agregación de Datos
+-----------------------------------
+- Seleccionando columnas y filas: ``df['column_name']``, ``df.loc[]``, ``df.iloc[]``.
+- Filtrando datos: ``df.query()``.
+- Agrupación y agregación de datos: ``df.groupby().agg()``.
+
+Visualización de Datos con Pandas
+---------------------------------
+- Utilizando la integración de Matplotlib y Seaborn para visualizaciones.
+- Gráficos básicos: ``df.plot()``.
+- Gráficos de barras, histogramas, diagramas de caja: ``df.plot(kind='bar')``, ``df.plot(kind='hist')``, ``df.plot(kind='box')``.
+
+Técnicas Avanzadas de Análisis de Datos
+----------------------------------------
+- Análisis de series temporales: Manejo de datos de fecha y hora con Pandas.
+- Fusión y unión de conjuntos de datos: ``pd.merge()``, ``pd.concat()``.
+- Manejo de duplicados: ``df.drop_duplicates()``.
+
+Aplicaciones del Mundo Real y Estudios de Caso
+----------------------------------------------
+- Análisis de datos de atención médica: Exploración de tiempos de espera de pacientes, distribución de servicios y tendencias geográficas.
+- Análisis de datos financieros: Análisis del mercado de valores, gestión de carteras.
+- Análisis de datos en redes sociales: Análisis de sentimientos, detección de tendencias.
+
+Mejores Prácticas y Optimización del Rendimiento
+------------------------------------------------
+- Carga y almacenamiento eficientes de datos: Utilización de fragmentación, optimización de tipos de datos.
+- Operaciones vectorizadas: Aprovechamiento de operaciones vectorizadas de Pandas para cálculos más rápidos.
+- Gestión de memoria: Reducción del uso de memoria para conjuntos de datos grandes.
+
+Ejemplo
+-------
+
+- Aquí hay algo que puedes desarrollar usando Pandas. Este ejemplo utiliza datos de productos de helado de Beijing. Los datos se leen de un archivo CSV y luego se visualizan usando matplotlib. El usuario puede seleccionar un sabor de helado y el gráfico mostrará la calificación del sabor seleccionado.
+
+.. code:: python
+
+    import js
+    import pandas as pd
+    import matplotlib.pyplot as plt
+
+    from pyodide.http import open_url
+    from pyodide.ffi import create_proxy
+
+    url = (
+        "https://raw.githubusercontent.com/Cheukting/pyscript-ice-cream/main/bj-products.csv"
+    )
+    ice_data = pd.read_csv(open_url(url))
+
+    current_selected = []
+    flavour_elements = js.document.getElementsByName("flavour")
+
+    def plot(data):
+        plt.rcParams["figure.figsize"] = (22,20)
+        fig, ax = plt.subplots()
+        bars = ax.barh(data["name"], data["rating"], height=0.7)
+        ax.bar_label(bars)
+        plt.title("Rating of ice cream flavours of your choice")
+        display(fig, target="graph-area", append=False)
+
+    def select_flavour(event):
+        for ele in flavour_elements:
+            if ele.checked:
+                current_selected = ele.value
+                break
+        if current_selected == "ALL":
+            plot(ice_data)
+        else:
+            filter = ice_data.apply(lambda x: ele.value in x["ingredients"], axis=1)
+            plot(ice_data[filter])
+
+    ele_proxy = create_proxy(select_flavour)
+
+    for ele in flavour_elements:
+        if ele.value == "ALL":
+            ele.checked = True
+            current_selected = ele.value
+        ele.addEventListener("change", ele_proxy)
+
+    plot(ice_data)
+
+
+.. raw:: html
+
+    <br>
+    <html>
+      <head>
+        <title>Selector de Helado</title>
+        <meta charset="utf-8">
+        <link rel="stylesheet" href="https://pyscript.net/latest/pyscript.css" />
+        <script defer src="https://pyscript.net/latest/pyscript.js"></script>
+      </head>
+      <body>
+
+        <py-config>
+          packages = ["matplotlib", "pandas"]
+        </py-config>
+
+        <py-script>
+
+        import js
+        import pandas as pd
+        import matplotlib.pyplot as plt
+
+        from pyodide.http import open_url
+        from pyodide.ffi import create_proxy
+
+        url = (
+            "https://raw.githubusercontent.com/Cheukting/pyscript-ice-cream/main/bj-products.csv"
+        )
+        ice_data = pd.read_csv(open_url(url))
+
+        current_selected = []
+        flavour_elements = js.document.getElementsByName("flavour")
+
+        def plot(data):
+            plt.rcParams["figure.figsize"] = (22,20)
+            fig, ax = plt.subplots()
+            bars = ax.barh(data["name"], data["rating"], height=0.7)
+            ax.bar_label(bars)
+            plt.title("Rating of ice cream flavours of your choice")
+            display(fig, target="graph-area", append=False)
+
+        def select_flavour(event):
+            for ele in flavour_elements:
+                if ele.checked:
+                    current_selected = ele.value
+                    break
+            if current_selected == "ALL":
+                plot(ice_data)
+            else:
+                filter = ice_data.apply(lambda x: ele.value in x["ingredients"], axis=1)
+                plot(ice_data[filter])
+
+        ele_proxy = create_proxy(select_flavour)
+
+        for ele in flavour_elements:
+            if ele.value == "ALL":
+                ele.checked = True
+                current_selected = ele.value
+            ele.addEventListener("change", ele_proxy)
+
+        plot(ice_data)
+
+        </py-script>
+
+        <div id="input" style="margin-left: 250px;">
+            Seleccione su sabor de 🍨: <br/>
+            <input type="radio" id="all" name="flavour" value="ALL">
+            <label for="all"> Todos 🍧</label>
+            <input type="radio" id="chocolate" name="flavour" value="COCOA">
+            <label for="chocolate"> Chocolate 🍫</label>
+            <input type="radio" id="cherrie" name="flavour" value="CHERRIES">
+            <label for="cherrie"> Cerezas 🍒</label>
+            <input type="radio" id="berries" name="flavour" value="BERRY">
+            <label for="berries"> Bayas 🍓</label>
+            <input type="radio" id="cheese" name="flavour" value="CHEESE">
+            <label for="cheese"> Queso 🧀</label>
+            <input type="radio" id="peanut" name="flavour" value="PEANUT">
+            <label for="peanut"> Cacahuate 🥜</label>
+        </div>
+
+        <div id="graph-area" style="width: 1000px; height: 600px;"></div>
+
+      </body>
+    </html>
diff --git a/_sources/lectures/TWP28/TWP28_en.rst b/_sources/lectures/TWP28/TWP28_en.rst
new file mode 100644
index 0000000000..e1ebbf6204
--- /dev/null
+++ b/_sources/lectures/TWP28/TWP28_en.rst
@@ -0,0 +1,199 @@
+===============
+Power of Pandas
+===============
+
+.. image:: ../img/TWP10_001.jpeg
+    :height: 14.925cm
+    :width: 9.258cm
+    :align: center
+    :alt:
+    
+Introduction
+------------
+This lecture is on one of the most powerful tools in the Python ecosystem for data analysis - Pandas. In this lecture, we'll embark on a journey to explore the ins and outs of Pandas, understanding its capabilities in handling, manipulating, and analyzing data effectively.
+
+Understanding Pandas Basics
+---------------------------
+- Pandas, built on top of NumPy, provides data structures and functions to work with structured data.
+- Key components: Series (1-dimensional labeled array) and DataFrame (2-dimensional labeled data structure).
+- Importing Pandas and loading data: ``import pandas as pd`` and ``pd.read_csv()``.
+
+Exploratory Data Analysis (EDA) with Pandas
+-------------------------------------------
+- Checking data dimensions: ``df.shape``.
+- Getting summary statistics: ``df.describe()``.
+- Examining data types and missing values: ``df.info()``.
+
+Data Cleaning and Transformation
+--------------------------------
+- Renaming columns for clarity: ``df.rename(columns={'old_name': 'new_name'}, inplace=True)``.
+- Handling missing data: ``df.dropna()``, ``df.fillna()``.
+- Data type conversion: ``df.astype()``.
+
+Data Manipulation and Aggregation
+---------------------------------
+- Selecting columns and rows: ``df['column_name']``, ``df.loc[]``, ``df.iloc[]``.
+- Filtering data: ``df.query()``.
+- Grouping and aggregating data: ``df.groupby().agg()``.
+
+Data Visualization with Pandas
+------------------------------
+- Utilizing Matplotlib and Seaborn integration for visualizations.
+- Basic plots: ``df.plot()``.
+- Bar plots, histograms, box plots: ``df.plot(kind='bar')``, ``df.plot(kind='hist')``, ``df.plot(kind='box')``.
+
+Advanced Data Analysis Techniques
+---------------------------------
+- Time series analysis: Handling datetime data with Pandas.
+- Merging and joining datasets: ``pd.merge()``, ``pd.concat()``.
+- Handling duplicates: ``df.drop_duplicates()``.
+
+Real-world Applications and Case Studies
+----------------------------------------
+- Analyzing healthcare data: Exploring patient wait times, service distribution, and geographical trends.
+- Financial data analysis: Stock market analysis, portfolio management.
+- Social media data analysis: Sentiment analysis, trend detection.
+
+Best Practices and Performance Optimization
+-------------------------------------------
+- Efficient data loading and storage: Utilizing chunking, optimizing data types.
+- Vectorized operations: Leveraging Pandas' vectorized operations for faster computations.
+- Memory management: Reducing memory usage for large datasets.
+
+Example
+-------
+
+- Here is something you can develop using panda. This example uses the data of ice cream products from Beijing. The data is read from a CSV file and then visualized using matplotlib. The user can select a flavour of ice cream and the graph will display the rating of the selected flavour.
+
+.. code:: python
+
+    import js
+    import pandas as pd
+    import matplotlib.pyplot as plt
+
+    from pyodide.http import open_url
+    from pyodide.ffi import create_proxy
+
+    url = (
+        "https://raw.githubusercontent.com/Cheukting/pyscript-ice-cream/main/bj-products.csv"
+    )
+    ice_data = pd.read_csv(open_url(url))
+
+    current_selected = []
+    flavour_elements = js.document.getElementsByName("flavour")
+
+    def plot(data):
+        plt.rcParams["figure.figsize"] = (22,20)
+        fig, ax = plt.subplots()
+        bars = ax.barh(data["name"], data["rating"], height=0.7)
+        ax.bar_label(bars)
+        plt.title("Rating of ice cream flavours of your choice")
+        display(fig, target="graph-area", append=False)
+
+    def select_flavour(event):
+        for ele in flavour_elements:
+            if ele.checked:
+                current_selected = ele.value
+                break
+        if current_selected == "ALL":
+            plot(ice_data)
+        else:
+            filter = ice_data.apply(lambda x: ele.value in x["ingredients"], axis=1)
+            plot(ice_data[filter])
+
+    ele_proxy = create_proxy(select_flavour)
+
+    for ele in flavour_elements:
+        if ele.value == "ALL":
+            ele.checked = True
+            current_selected = ele.value
+        ele.addEventListener("change", ele_proxy)
+
+    plot(ice_data)
+
+
+.. raw:: html
+
+    <br>
+    <html>
+      <head>
+        <title>Ice Cream Picker</title>
+        <meta charset="utf-8">
+        <link rel="stylesheet" href="https://pyscript.net/latest/pyscript.css" />
+        <script defer src="https://pyscript.net/latest/pyscript.js"></script>
+      </head>
+      <body>
+
+        <py-config>
+          packages = ["matplotlib", "pandas"]
+        </py-config>
+
+        <py-script>
+
+        import js
+        import pandas as pd
+        import matplotlib.pyplot as plt
+
+        from pyodide.http import open_url
+        from pyodide.ffi import create_proxy
+
+        url = (
+            "https://raw.githubusercontent.com/Cheukting/pyscript-ice-cream/main/bj-products.csv"
+        )
+        ice_data = pd.read_csv(open_url(url))
+
+        current_selected = []
+        flavour_elements = js.document.getElementsByName("flavour")
+
+        def plot(data):
+            plt.rcParams["figure.figsize"] = (22,20)
+            fig, ax = plt.subplots()
+            bars = ax.barh(data["name"], data["rating"], height=0.7)
+            ax.bar_label(bars)
+            plt.title("Rating of ice cream flavours of your choice")
+            display(fig, target="graph-area", append=False)
+
+        def select_flavour(event):
+            for ele in flavour_elements:
+                if ele.checked:
+                    current_selected = ele.value
+                    break
+            if current_selected == "ALL":
+                plot(ice_data)
+            else:
+                filter = ice_data.apply(lambda x: ele.value in x["ingredients"], axis=1)
+                plot(ice_data[filter])
+
+        ele_proxy = create_proxy(select_flavour)
+
+        for ele in flavour_elements:
+            if ele.value == "ALL":
+                ele.checked = True
+                current_selected = ele.value
+            ele.addEventListener("change", ele_proxy)
+
+        plot(ice_data)
+
+        </py-script>
+
+        <div id="input" style="margin-left: 250px;">
+            Select your 🍨 flavour: <br/>
+            <input type="radio" id="all" name="flavour" value="ALL">
+            <label for="all"> All 🍧</label>
+            <input type="radio" id="chocolate" name="flavour" value="COCOA">
+            <label for="chocolate"> Chocolate 🍫</label>
+            <input type="radio" id="cherrie" name="flavour" value="CHERRIES">
+            <label for="cherrie"> Cherries 🍒</label>
+            <input type="radio" id="berries" name="flavour" value="BERRY">
+            <label for="berries"> Berries 🍓</label>
+            <input type="radio" id="cheese" name="flavour" value="CHEESE">
+            <label for="cheese"> Cheese 🧀</label>
+            <input type="radio" id="peanut" name="flavour" value="PEANUT">
+            <label for="peanut"> Peanut 🥜</label>
+        </div>
+
+        <div id="graph-area" style="width: 1000px; height: 600px;"></div>
+
+      </body>
+    </html>
+