sustainability-lab · Tanvi-Jain01 · Jun 30, 2023 · Jun 30, 2023 · Jun 30, 2023 · Jun 30, 2023
diff --git a/vayu/googleMaps.py b/vayu/googleMaps.py
@@ -1,4 +1,11 @@
-def googleMaps(df, lat, long, pollutant, dataLoc):
+import folium
+import webbrowser
+import pandas as pd
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+
+def google_maps(df:pd.DataFrame, lat:str, long:str, pollutant:str, date:str, markersize:int,zoom:int):
     """Plots a geographical plot.
 
     Plots a folium plot of longitude and latitude points 
@@ -15,67 +22,32 @@ def googleMaps(df, lat, long, pollutant, dataLoc):
     long: str
         Name of column in df of where longitude points are
     pollutant: str
-        Name of pollutant 
-    dataLoc: str
-        Name of df column where pollutanat values are stored
+        Name of pollutant where values of that pollutant is stored.
+    date: str
+        visualizing the pollutant of a specific date.
+    markersize: int
+        The int by which the value of pollutant will be multiplied.
+    zoom: int
+        The int by which you want to zoom in the plot
 
     """
-    import folium
-    import webbrowser
-    import pandas as pd
-    import matplotlib.pyplot as plt
-    import numpy as np
-    import pandas as pd
+
+    df1 = df[df['date'] == date]
 
-    latitude = 37.0902
-    longitude = -95.7129
-    Arithmetic_Mean_map = folium.Map(location=[latitude, longitude], zoom_start=4)
+    lat= df1[lat].values[0] 
+    long=df1[long].values[0] 
+    my_map4 = folium.Map(location = [lat, long], zoom_start = zoom)
 
-    # =============================================================================
-    # df = pd.read_csv('interpolData.csv')
-    # =============================================================================
+    for lat,long,pol,st in zip(df['latitude'],df['longitude'],df[pollutant],df['station']):
+        folium.CircleMarker([lat, long],radius=markersize * pol, popup=(str(st).capitalize()+"<br>"+ str(round(pol, 3))), fill=True, fill_opacity=0.7, color = 'red').add_to(my_map4)
 
-    some_value = pollutant
-    df = df.loc[df["Parameter Name"] == some_value]
-
-    some_value = "2018-05-07"
-    df = df.loc[df["Date Local"] == some_value]
-
-    df = df.sample(frac=1)
-
-    # df_train, df_test = train_test_split(df, test_size=0.2)
-    df["Arithmetic Mean Q"] = pd.qcut(df[dataLoc], 4, labels=False)
-    colordict = {0: "lightblue", 1: "lightgreen", 2: "orange", 3: "red"}
-
-    for lat, lon, Arithmetic_Mean_Q, Arithmetic_Mean, city, AQI in zip(
-        df[lat],
-        df[long],
-        df["Arithmetic Mean Q"],
-        df[dataLoc],
-        df["City Name"],
-        df["AQI"],
-    ):
-        folium.CircleMarker(
-            [lat, lon],
-            radius=0.15 * AQI,
-            popup=(
-                "City: "
-                + str(city).capitalize()
-                + "<br>"
-                #'Bike score: ' + str(bike) + '<br>'
-                "Arithmetic_Mean level: "
-                + str(Arithmetic_Mean)
-                + "%"
-            ),
-            color="b",
-            key_on=Arithmetic_Mean_Q,
-            threshold_scale=[0, 1, 2, 3],
-            fill_color=colordict[Arithmetic_Mean_Q],
-            fill=True,
-            fill_opacity=0.7,
-        ).add_to(Arithmetic_Mean_map)
-    Arithmetic_Mean_map.save("mymap.html")
+    my_map4.save("googleMaps.html")
+    print('your map has been saved')
+    return my_map4
 
 
+#Example:
 # df = pd.read_csv('interpolData.csv')
-# googleMaps(df,'Latitude','Longitude','Ozone','Arithmetic Mean')
+# Call the function and display the map in Jupyter Notebook
+# map_obj = google_maps(df, 'latitude', 'longitude', 'pm25', '2022-02-23', 5,10)
+# map_obj
diff --git a/vayu/scatterPlot.py b/vayu/scatterPlot.py
@@ -19,48 +19,28 @@ def scatterPlot(df, x, y, **kwargs):
     import matplotlib.cm as cm
     from math import pi
 
-    pm10 = df.pm10
-    o3 = df.o3
-    ws = df.ws
-    wd = df.wd
-    nox = df.nox
-    no2 = df.no2
-
+
     #########################################
     # converts wind data to randians
-    df = pd.DataFrame({"speed": ws, "direction": wd})
-    df["speed_x"] = df["speed"] * np.sin(df["direction"] * pi / 180.0)
-    df["speed_y"] = df["speed"] * np.cos(df["direction"] * pi / 180.0)
+    #df1 = pd.DataFrame({"speed": ws, "direction": wd})
+    df["speed"+str(x)] = df['ws'] * np.sin(df['wd'] * pi / 180.0)
+    df["speed"+str(y)] = df['ws'] * np.cos(df['wd'] * pi / 180.0)   
     fig, ax = plt.subplots(figsize=(8, 8), dpi=80)
     x0, x1 = ax.get_xlim()
     y0, y1 = ax.get_ylim()
-    ax.set_aspect("equal")
-    _ = df.plot(kind="scatter", x="speed_x", y="speed_y", alpha=0.35, ax=ax)
+    #ax.set_aspect("equal")
+    _ = df.plot(kind="scatter", x="speed"+str(x), y="speed"+str(y), alpha=0.35, ax=ax)
+    plt.show()
+
 
     ####################################
     # simple seaborn plot that shows how given variables relate with one another
-    if x == "nox":
-        x = nox
-    elif x == "no2":
-        x = no2
-    elif x == "o3":
-        x = o3
-    elif x == "pm10":
-        x = pm10
-    if y == "nox":
-        y = nox
-    elif y == "no2":
-        y = no2
-    elif y == "o3":
-        y = o3
-    elif y == "pm10":
-        y = pm10
-
-    sns.jointplot(x=x, y=y, kind="hex")
-
+    sns.jointplot(x=df[x].values, y=df[y].values, kind="hex")
+    plt.xlabel(x)
+    plt.ylabel(y)
     plt.show()
-
-
+    
+    
 # =============================================================================
 # df = pd.read_csv("mydata.csv")
 # scatterPlot(df,'nox','no2')

diff --git a/vayu/selectByDate.py b/vayu/selectByDate.py
@@ -1,28 +1,48 @@
-def selectByDate(df, year):
-    """ 
-    Utility function to cut given dataframe by the year 
-    and find the average value of each day 
+import pandas as pd
+import numpy as np
+
+def select_by(df:pd.Dataframe, year:str, group:list=None, time_period:str='day'):
+    """
+    Utility function to cut a given dataframe by year and find the average value
+    of each day, month, or year. Optionally, data can be grouped by specified columns.
 
     Parameters
     ----------
     df: data frame
-        a data frame containing a date field
+        A data frame containing a date field and optional grouping columns.
     year: type string
-        a year to select to cut data
+        A year to select and filter the data.
+    group: list, optional
+        A list of columns to group the data by. Default is None (no grouping).
+    time_period: {'day', 'month', 'year'}, optional
+        The time period to compute the average value. Default is 'day'.
+
+    Returns
+    -------
+    data frame
+        A data frame with the average value of each day, month, or year.
+        If group is specified, the data will be grouped accordingly.
     """
-    import pandas as pd
-    import numpy as np
-
-    df.index = pd.to_datetime(df.date)
-    df = df.drop("date", axis=1)
-    df_n = df[year].resample("1D").mean()
-    df_n = df_n.fillna(method="ffill")
-    df_n["month"] = df_n.index.month
-    df_n.index.dayofweek
-    print(df_n)
+
+    df['date'] = pd.to_datetime(df['date'])
+    df_year = df[df['date'].dt.year == int(year)]
+
+    if group:
+        df_grouped = df_year.groupby(group).resample(time_period[0], on='date').mean(numeric_only=True)
+        return df_grouped
+
+    if time_period == 'month':
+        df_month = df_year.resample('M', on='date').mean(numeric_only=True)
+        return df_month
+    elif time_period == 'year':
+        df_yearly = df_year.resample('Y', on='date').mean(numeric_only=True)
+        return df_yearly
+
+    df_day = df_year.resample('D', on='date').mean(numeric_only=True)
+    return df_day
 
 
 # =============================================================================
 # df = pd.read_csv("mydata.csv")
-# selectByDate(df,'2003')
+#select_by(df1,'2022',group=['latitude','longitude','station'], time_period='month')
 # =============================================================================
diff --git a/vayu/summary_plot.py b/vayu/summary_plot.py
@@ -0,0 +1,130 @@
+import datetime as dt
+import matplotlib.pyplot as plt
+import matplotlib as mpl
+import numpy as np
+import pandas as pd
+from numpy import array
+import matplotlib.patches as mpatches
+import seaborn as sns
+from matplotlib.pyplot import figure
+
+def summary_plot(df: pd.DataFrame):
+    """ Plots import summary of data frame given. Plots line plots
+        and histograms for each polutant as well as statiscs such as 
+        mean,max,min,median, and 95th percentile
+
+        Parameters
+        ----------
+        df: data frame
+            data frame to be summarised. Must contain a date field
+            and at least one other parameter 
+    """
+
+    # Initialize variables
+    pollutants = ["pm10", "pm25", "sox", "co", "o3", "nox", "pb", "nh3"]
+    categories = ["s", "m", "h"]
+
+    counts = {pollutant: {category: 0 for category in categories} for pollutant in pollutants}
+
+
+    df.index = pd.to_datetime(df.date)
+    df = df.drop("date", axis=1)
+    df_all = df.resample("1D")
+    df_all = df.copy()
+    df_all = df_all.fillna(method="ffill")
+    #print(df_all.columns)
+
+    # Calculate counts for each pollutant category
+    for pollutant in pollutants:
+        if pollutant in df_all.columns:
+            column_data = df_all[pollutant]
+            #print(df_all)
+            for _, data in column_data.iteritems():
+                if pollutant in ["pm10", "pm25"]:
+                    if data < 100:
+                        counts[pollutant]["s"] += 1
+                    elif data < 250:
+                        counts[pollutant]["m"] += 1
+                    else:
+                        counts[pollutant]["h"] += 1
+                elif pollutant == "co":
+                    if data < 2:
+                        counts[pollutant]["s"] += 1
+                    elif data < 10:
+                        counts[pollutant]["m"] += 1
+                    else:
+                        counts[pollutant]["h"] += 1
+                elif pollutant == "sox":
+                    if data <= 80:
+                        counts[pollutant]["s"] += 1
+                    elif data <= 380:
+                        counts[pollutant]["m"] += 1
+                    else:
+                        counts[pollutant]["h"] += 1
+                elif pollutant == "o3":
+                    if data < 100:
+                        counts[pollutant]["s"] += 1
+                    elif data < 168:
+                        counts[pollutant]["m"] += 1
+                    else:
+                        counts[pollutant]["h"] += 1
+                elif pollutant == "nox":
+                    if data < 80:
+                        counts[pollutant]["s"] += 1
+                    elif data < 180:
+                        counts[pollutant]["m"] += 1
+                    else:
+                        counts[pollutant]["h"] += 1
+                elif pollutant == "pb":
+                    if data <= 1:
+                        counts[pollutant]["s"] += 1
+                    elif data <= 2:
+                        counts[pollutant]["m"] += 1
+                    else:
+                        counts[pollutant]["h"] += 1
+                elif pollutant == "nh3":
+                    if data <= 400:
+                        counts[pollutant]["s"] += 1
+                    elif data <= 800:
+                        counts[pollutant]["m"] += 1
+                    else:
+                        counts[pollutant]["h"] += 1
+
+
+
+    # Plot line, histogram, and pie charts for each pollutant
+    fig, axes = plt.subplots(len(df_all.columns), 3, figsize=(25,25))
+
+    for i, pollutant in enumerate(df_all.columns):
+        ax_line = axes[i, 0]
+        ax_hist = axes[i, 1]
+        ax_pie = axes[i, 2]
+
+        df_all[pollutant].plot.line(ax=ax_line, color="gold")
+        ax_line.axes.get_xaxis().set_visible(False)
+        ax_line.yaxis.set_label_position("left")
+        ax_line.set_ylabel(pollutant, fontsize=30, bbox=dict(facecolor="whitesmoke"))
+
+        ax_hist.hist(df_all[pollutant], bins=50, color="green")
+
+        labels = ["Safe", "Moderate", "High"]
+        sizes = [counts[pollutant][category] for category in categories]
+        explode = [0, 0, 1]
+
+        ax_pie.pie(sizes, explode=explode, labels=labels, autopct="%1.1f%%", shadow=False, startangle=90)
+        ax_pie.axis("equal")
+
+        ax_pie.set_xlabel("Statistics")
+
+        print(f"{pollutant}\nmin = {df_all[pollutant].min():.2f}\nmax = {df_all[pollutant].max():.2f}\nmissing = {df_all[pollutant].isna().sum()}\nmean = {df_all[pollutant].mean():.2f}\nmedian = {df_all[pollutant].median():.2f}\n95th percentile = {df_all[pollutant].quantile(0.95):.2f}\n")
+
+    plt.savefig("summary_plot.png", dpi=300, format="png")
+    plt.show()
+    print("your plots has also been saved")
+    plt.close()
+
+
+# =============================================================================
+# df = pd.read_csv('mydata.csv')
+# summary_plot(df)
+# =============================================================================