From 6fd9b7cfec4b426832fabd694d862abc86ffb860 Mon Sep 17 00:00:00 2001 From: Zac Deziel Date: Thu, 5 Sep 2024 11:45:25 -0700 Subject: [PATCH] Port demo notebook to R (#31) * Port demo notebook to R * Update based on review Use httr2 instead of httr. Optimize geodataframe creation. Add custom breaks colormap. * Match classification of R notebook --- notebooks/space2stats_api_demo.ipynb | 210 +++++++++++++-------------- notebooks/space2stats_api_demo_R.Rmd | 126 ++++++++++++++++ 2 files changed, 226 insertions(+), 110 deletions(-) create mode 100644 notebooks/space2stats_api_demo_R.Rmd diff --git a/notebooks/space2stats_api_demo.ipynb b/notebooks/space2stats_api_demo.ipynb index 971688a..4765991 100644 --- a/notebooks/space2stats_api_demo.ipynb +++ b/notebooks/space2stats_api_demo.ipynb @@ -8,20 +8,18 @@ "source": [ "from typing import Dict\n", "\n", + "import numpy as np\n", "import requests\n", "import pandas as pd\n", "import geopandas as gpd\n", "from shapely.geometry import shape\n", - "\n", - "from lonboard.colormap import apply_continuous_cmap\n", "from lonboard import Map, ScatterplotLayer\n", - "from palettable.cartocolors.sequential import BurgYl_2\n", "from geojson_pydantic import Feature, Polygon" ] }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -32,7 +30,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -54,68 +52,40 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "AOIModel = Feature[Polygon, Dict]\n", "\n", - "# kenya\n", + "# ~kenya\n", "aoi = {\n", - " \"type\": \"Feature\",\n", - " \"geometry\": {\n", + " \"type\": \"Feature\",\n", + " \"geometry\": {\n", " \"type\": \"Polygon\",\n", " \"coordinates\": [\n", - " [\n", - " [41.85508309264397, -1.68325],\n", - " [40.98105, -2.49979],\n", - " [40.993, -3.444],\n", - " [41.58513, -3.91909],\n", - " [40.88477, -4.95913],\n", - " [39.55938425876585, -4.437641590288629],\n", - " [39.25451, -3.42206],\n", - " [37.7669, -3.67712],\n", - " [37.69869, -3.09699],\n", - " [34.07262, -1.05982],\n", - " [33.90371119710453, -0.95],\n", - " [33.893568969666944, 0.109813537861896],\n", - " [34.18, 0.515],\n", - " [34.6721, 1.17694],\n", - " [35.03599, 1.90584],\n", - " [34.59607, 3.05374],\n", - " [34.47913, 3.5556],\n", - " [35.298007118232946, 4.77696566346189],\n", - " [35.817447662353516, 4.77696566346189],\n", - " [36.159078632855646, 4.447864127672769],\n", - " [36.85509323800812, 4.447864127672769],\n", - " [38.120915, 3.598605],\n", - " [38.43697, 3.58851],\n", - " [38.67114, 3.61607],\n", - " [38.89251, 3.50074],\n", - " [39.55938425876585, 3.42206],\n", - " [39.85494, 3.83879],\n", - " [40.76848, 4.25702],\n", - " [41.1718, 3.91909],\n", - " [41.85508309264397, 2.97959],\n", - " [41.58513, 2.09],\n", - " [40.993, 1.657],\n", - " [40.98105, 1.002],\n", - " [41.85508309264397, -1.68325]\n", - " ]\n", + " [\n", + " [33.78593974945852, 5.115816884114494],\n", + " [33.78593974945852, -4.725410543134203],\n", + " [41.94362577283266, -4.725410543134203],\n", + " [41.94362577283266, 5.115816884114494],\n", + " [33.78593974945852, 5.115816884114494]\n", + " ]\n", " ]\n", - " },\n", - " \"properties\": {\n", - " \"name\": \"Kenya\"\n", - " }\n", + " },\n", + " \"properties\": {\n", + " \"name\": \"Updated AOI\"\n", " }\n", - "\n", + "}\n", + " \n", + " \n", "\n", "feat = AOIModel(**aoi)" ] }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -138,7 +108,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -170,33 +140,33 @@ " \n", " \n", " 0\n", - " 866a4a48fffffff\n", - " POINT (35.76352 2.99589)\n", - " 399.860905\n", + " 866a4a017ffffff\n", + " POINT (37.02598 -1.85051)\n", + " 676.912804\n", " \n", " \n", " 1\n", - " 866a4a497ffffff\n", - " POINT (40.58048 -3.79365)\n", - " 582.555159\n", + " 866a4a01fffffff\n", + " POINT (38.62960 -2.14241)\n", + " 347.182722\n", " \n", " \n", " 2\n", - " 866a4a49fffffff\n", - " POINT (41.10421 3.37873)\n", - " 749.911237\n", + " 866a4a04fffffff\n", + " POINT (36.51100 -0.42121)\n", + " 285.943490\n", " \n", " \n", " 3\n", - " 866a4a4d7ffffff\n", - " POINT (37.26153 3.74581)\n", - " 863.888290\n", + " 866a4a057ffffff\n", + " POINT (35.44853 -1.11991)\n", + " 279.463912\n", " \n", " \n", " 4\n", - " 866a5820fffffff\n", - " POINT (40.01148 1.53124)\n", - " 525.085147\n", + " 866a4a05fffffff\n", + " POINT (41.78961 -0.19877)\n", + " 235.258474\n", " \n", " \n", " ...\n", @@ -205,58 +175,58 @@ " ...\n", " \n", " \n", - " 16212\n", - " 867b5dd77ffffff\n", - " POINT (34.94474 1.24558)\n", - " -36.000000\n", + " 25801\n", + " 86969ed97ffffff\n", + " POINT (39.45061 -2.85307)\n", + " 196.448391\n", " \n", " \n", - " 16213\n", - " 867b5dd87ffffff\n", - " POINT (40.95343 -1.83280)\n", - " -36.000000\n", + " 25802\n", + " 86969ed9fffffff\n", + " POINT (35.00971 1.49503)\n", + " 296.740893\n", " \n", " \n", - " 16214\n", - " 867b5dd8fffffff\n", - " POINT (35.20290 -0.29666)\n", - " -36.000000\n", + " 25803\n", + " 86969eda7ffffff\n", + " POINT (35.16252 4.03827)\n", + " 154.818453\n", " \n", " \n", - " 16215\n", - " 867b5dd9fffffff\n", - " POINT (41.28333 -1.08552)\n", - " -36.000000\n", + " 25804\n", + " 86969edafffffff\n", + " POINT (37.66441 4.11738)\n", + " 157.875350\n", " \n", " \n", - " 16216\n", - " 867b5ddafffffff\n", - " POINT (36.63048 1.35038)\n", - " -36.000000\n", + " 25805\n", + " 86969edb7ffffff\n", + " POINT (35.12293 -4.05223)\n", + " 236.127331\n", " \n", " \n", "\n", - "

16217 rows × 3 columns

\n", + "

25806 rows × 3 columns

\n", "" ], "text/plain": [ " hex_id geometry sum_pop_2020\n", - "0 866a4a48fffffff POINT (35.76352 2.99589) 399.860905\n", - "1 866a4a497ffffff POINT (40.58048 -3.79365) 582.555159\n", - "2 866a4a49fffffff POINT (41.10421 3.37873) 749.911237\n", - "3 866a4a4d7ffffff POINT (37.26153 3.74581) 863.888290\n", - "4 866a5820fffffff POINT (40.01148 1.53124) 525.085147\n", + "0 866a4a017ffffff POINT (37.02598 -1.85051) 676.912804\n", + "1 866a4a01fffffff POINT (38.62960 -2.14241) 347.182722\n", + "2 866a4a04fffffff POINT (36.51100 -0.42121) 285.943490\n", + "3 866a4a057ffffff POINT (35.44853 -1.11991) 279.463912\n", + "4 866a4a05fffffff POINT (41.78961 -0.19877) 235.258474\n", "... ... ... ...\n", - "16212 867b5dd77ffffff POINT (34.94474 1.24558) -36.000000\n", - "16213 867b5dd87ffffff POINT (40.95343 -1.83280) -36.000000\n", - "16214 867b5dd8fffffff POINT (35.20290 -0.29666) -36.000000\n", - "16215 867b5dd9fffffff POINT (41.28333 -1.08552) -36.000000\n", - "16216 867b5ddafffffff POINT (36.63048 1.35038) -36.000000\n", + "25801 86969ed97ffffff POINT (39.45061 -2.85307) 196.448391\n", + "25802 86969ed9fffffff POINT (35.00971 1.49503) 296.740893\n", + "25803 86969eda7ffffff POINT (35.16252 4.03827) 154.818453\n", + "25804 86969edafffffff POINT (37.66441 4.11738) 157.875350\n", + "25805 86969edb7ffffff POINT (35.12293 -4.05223) 236.127331\n", "\n", - "[16217 rows x 3 columns]" + "[25806 rows x 3 columns]" ] }, - "execution_count": 20, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -269,37 +239,57 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "00bbfea95ae440d3a73ebb161e3142ab", + "model_id": "25303c5410c44eda985a7b80299b95a3", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "Map(layers=[ScatterplotLayer(get_fill_color=\n", + "Map(layers=[ScatterplotLayer(get_fill_color=\n", "[\n", " [\n", " 2…" ] }, - "execution_count": 21, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "max = gdf[\"sum_pop_2020\"].max()\n", - "min = gdf[\"sum_pop_2020\"].min()\n", - "normalized_sum_pop_2020 = (gdf[\"sum_pop_2020\"] - min) / (max - min)\n", - "normalized_sum_pop_2020\n", + "# Define custom breaks and corresponding RGBA colors\n", + "breaks = [0, 1, 1000, 10000, 50000, 100000, 200000, gdf[\"sum_pop_2020\"].max()]\n", + "colors = np.array([\n", + " [211, 211, 211, 255], # Light gray for 0\n", + " [255, 255, 0, 255], # Yellow for 1-1000\n", + " [255, 165, 0, 255], # Orange for 1000-10000\n", + " [255, 0, 0, 255], # Red for 10000-50000\n", + " [128, 0, 128, 255], # Purple for 50000-100000\n", + " [0, 0, 255, 255], # Blue for 100000-200000\n", + " [0, 0, 139, 255], # Dark blue for 200000+\n", + "])\n", + "\n", + "# Function to assign colors based on custom bins\n", + "def assign_color(value, breaks, colors):\n", + " for i in range(len(breaks) - 1):\n", + " if breaks[i] <= value < breaks[i + 1]:\n", + " return colors[i]\n", + " return colors[-1] # In case value exceeds all breaks\n", + "\n", + "# Map sum_pop_2020 values to colors using the custom function\n", + "gdf['color'] = gdf[\"sum_pop_2020\"].apply(lambda x: assign_color(x, breaks, colors))\n", + "colors = np.uint8(gdf['color'].tolist())\n", + "\n", + "# Create the scatterplot layer with the assigned colors\n", + "layer = ScatterplotLayer.from_geopandas(gdf, get_radius=2000, get_fill_color=colors)\n", "\n", - "layer = ScatterplotLayer.from_geopandas(gdf, get_radius=2000, get_fill_color=apply_continuous_cmap(normalized_sum_pop_2020, BurgYl_2, alpha=0.7))\n", "m = Map(layer)\n", - "m\n" + "m" ] } ], diff --git a/notebooks/space2stats_api_demo_R.Rmd b/notebooks/space2stats_api_demo_R.Rmd new file mode 100644 index 0000000..6b83c82 --- /dev/null +++ b/notebooks/space2stats_api_demo_R.Rmd @@ -0,0 +1,126 @@ +--- +title: "Space2Stats API Demo in R" +output: html_notebook +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set(echo = TRUE) +library(httr2) +library(jsonlite) +library(sf) +library(dplyr) +library(leaflet) +library(viridis) +``` + +## Set Up API Endpoints + +```{r} +base_url <- "https://space2stats.ds.io" +fields_endpoint <- paste0(base_url, "/fields") +summary_endpoint <- paste0(base_url, "/summary") +``` + +## Fetch Available Fields + +```{r} +# Set up the request to fetch available fields +req <- request(base_url) |> + req_url_path_append("fields") # Append the correct endpoint + +# Perform the request and get the response +resp <- req |> req_perform() + +# Check the status code +if (resp_status(resp) != 200) { + stop("Failed to get fields: ", resp_body_string(resp)) +} + +# Parse the response body as JSON +available_fields <- resp |> resp_body_json() + +# Print the available fields in a simplified format +print("Available Fields:") +print(unlist(available_fields)) +``` + +## Define Area of Interest (AOI) + +```{r} +# Define Area of Interest (AOI) with NULL for properties to ensure it's treated as a valid dictionary +aoi <- list( + type = "Feature", + properties = NULL, # Empty properties + geometry = list( + type = "Polygon", + coordinates = list( + list( + c(33.78593974945852, 5.115816884114494), + c(33.78593974945852, -4.725410543134203), + c(41.94362577283266, -4.725410543134203), + c(41.94362577283266, 5.115816884114494), + c(33.78593974945852, 5.115816884114494) + ) + ) + ) +) +``` + +## Request Summary Data + +```{r} +request_payload <- list( + aoi = aoi, + spatial_join_method = "centroid", + fields = list("sum_pop_2020"), + geometry = "point" +) + +# Set up the base URL and create the request +req <- request(base_url) |> + req_url_path_append("summary") |> + req_body_json(request_payload) + +# Perform the request and get the response +resp <- req |> req_perform() + +# Turn response into a data frame +summary_data <- resp |> resp_body_string() |> fromJSON(flatten = TRUE) + +# Extract coordinates and convert to a spatial data frame (sf object) +summary_data$x <- sapply(summary_data$geometry.coordinates, function(x) unlist(x)[1]) +summary_data$y <- sapply(summary_data$geometry.coordinates, function(x) unlist(x)[2]) + +# Convert to sf, drop extra geometry fields +gdf <- st_as_sf(summary_data, coords = c("x", "y"), crs = 4326)[, c(1, 2, 5)] +``` + +## Visualization + +```{r} + +# Replace NA values in sum_pop_2020 with 0 +gdf$sum_pop_2020[is.na(gdf$sum_pop_2020)] <- 0 + +# Create a custom binned color palette with non-uniform breaks +# For example: 0 (distinct color), 1-200000 (gradient), 200001+ (another color) +breaks <- c(0, 1, 1000, 10000, 50000, 100000, 200000, max(gdf$sum_pop_2020)) + +custom_pal <- colorBin(palette = c("lightgray", "yellow", "orange", "red", "purple", "blue"), + domain = gdf$sum_pop_2020, bins = breaks) + +# Create the leaflet map with custom binned coloring +leaflet(gdf) %>% + addTiles() %>% # Add default OpenStreetMap map tiles + addCircleMarkers( + radius = 3, # Adjust size as needed + color = ~custom_pal(sum_pop_2020), + stroke = FALSE, fillOpacity = 0.7, + popup = ~paste("Hex ID:", hex_id, "
", "Population 2020:", sum_pop_2020) # Add a popup with details + ) %>% + addLegend( + pal = custom_pal, values = gdf$sum_pop_2020, title = "Population 2020 (Custom Binned Scale)", + opacity = 1 + ) %>% + setView(lng = 37.5, lat = 0, zoom = 6) # Center the map based on AOI +```