Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature Request: Add NDVI and NDBI to ETL pipline #1016

Open
nlebovits opened this issue Nov 23, 2024 · 1 comment
Open

Feature Request: Add NDVI and NDBI to ETL pipline #1016

nlebovits opened this issue Nov 23, 2024 · 1 comment

Comments

@nlebovits
Copy link
Collaborator

Is your feature request related to a problem? Please describe.
We've discussed adding satellite image-derived NDVI and NDBI to our ETL pipeline to facilitate the identification of vacant land and vacant buildings.

Describe the solution you'd like
Using the pystac API and Sentinel 2 images, calculate NDVI and NDBI from a composite of the the 3 previous Sentinel 2 images (it has a revisit time of 10 days, so this should roughly correspond to 1 month), using a bounding box for the borders of Philadelphia. Then, extract those NDVI and NDBI raster values to a point on the surface (not a centroid) of all the polygons in our full dataset of the 500k+ properties in Philadelphia (the one created in new_etl via main.py, not the script.py pipeline).

Additional context
Here's Python code that should get you started:

# Import required libraries
import odc.stac
import pandas as pd
import planetary_computer
import pystac_client
import xarray as xr
import hvplot.xarray
import panel as pn
from shapely.geometry import box
import geopandas as gpd

# Enable Panel for interactive visualizations
pn.extension()

# Step 1: Connect to Planetary Computer Catalog
catalog = pystac_client.Client.open(
    "https://planetarycomputer.microsoft.com/api/stac/v1",
    modifier=planetary_computer.sign_inplace,  # Automatically signs requests
)

# List available collections
all_collections = [i.id for i in catalog.get_collections()]
sentinel_collections = [collection for collection in all_collections if "sentinel" in collection]
print("Available Sentinel Collections:", sentinel_collections)

# Corrected Query for Sentinel-2 with Cloud Cover Filter
bbox = [-75.2803, 39.8670, -74.9557, 40.1379]  # Philadelphia bounding box
datetime = "2024-06-01/2024-08-31"  # Summer 2024
cloudy_less_than = 10  # Percent cloud cover threshold

search = catalog.search(
    collections=["sentinel-2-l2a"],
    bbox=bbox,
    datetime=datetime,
    query={"eo:cloud_cover": {"lt": cloudy_less_than}}
)
items = search.item_collection()

# Print Results
print(f"Returned {len(items)} Items:")
if items:
    for item in items:
        print(item.id)
else:
    print("No items found with the current filter.")

selected_item = items[0]
selected_item

bands_of_interest = ["red", "green", "blue", "nir", "swir16"]

# Load all selected items (tiles) into a list of datasets
datasets = []
for item in items:
    ds_tile = odc.stac.stac_load(
        [item],
        bands=bands_of_interest,
        bbox=bbox,
        chunks={}  # Enable Dask for memory efficiency
    )
    datasets.append(ds_tile)

# Merge all datasets into one composite dataset
ds_merged = xr.concat(datasets, dim="time")

# Create a composite by taking the median across the `time` dimension
ds_composite = ds_merged.median(dim="time")

# note that this chunk may take about a minute to run
da = ds_composite.to_array(dim="band").compute()

da.attrs = selected_item.properties
da.attrs["crs"] = f"epsg:{selected_item.properties['proj:epsg']}"
da.attrs["crs"]

import matplotlib.pyplot as plt
import numpy as np

# Extract RGB bands
# Extract individual bands
red = da.sel(band="red").values
green = da.sel(band="green").values
blue = da.sel(band="blue").values

# Normalize bands, handling NaN values
red_norm = red / np.nanmax(red)
green_norm = green / np.nanmax(green)
blue_norm = blue / np.nanmax(blue)

# Stack into an RGB image
rgb_image = np.dstack((red_norm, green_norm, blue_norm))

# Plot the image
plt.figure(figsize=(10, 10))
plt.imshow(rgb_image)
plt.title("RGB Composite")
plt.axis("off")
plt.show()

red = da.sel(band="red").values
nir = da.sel(band="nir").values

print("Red Band Range:", red.min(), red.max())
print("NIR Band Range:", nir.min(), nir.max())

if red.max() > 1:
    red = red / 10000.0
if nir.max() > 1:
    nir = nir / 10000.0

# Calculate NDVI
ndvi = (nir - red) / (nir + red)

# Mask invalid values (divide by zero or NaN)
ndvi = np.nan_to_num(ndvi, nan=-9999)  # Replace NaN with a placeholder

import matplotlib.pyplot as plt
from matplotlib.colors import Normalize

plt.figure(figsize=(10, 8))
plt.imshow(ndvi, cmap="RdYlGn", extent=[da["x"].values.min(), da["x"].values.max(),
                                        da["y"].values.min(), da["y"].values.max()])
plt.colorbar(label="NDVI")
plt.title("NDVI Map")
plt.xlabel("Longitude")
plt.ylabel("Latitude")
plt.show()

swir = da.sel(band="swir16").values

# Normalize bands if needed
if swir.max() > 1:
    swir = swir / 10000.0
if nir.max() > 1:
    nir = nir / 10000.0

ndbi = (swir - nir) / (swir + nir)
ndbi = np.nan_to_num(ndbi, nan=-9999)

# Plot NDBI
plt.figure(figsize=(10, 8))
plt.imshow(ndbi, cmap="gray", extent=[da["x"].values.min(), da["x"].values.max(),
                                      da["y"].values.min(), da["y"].values.max()])
plt.colorbar(label="NDBI")
plt.title("NDBI (Built-up Index)")
plt.xlabel("Longitude")
plt.ylabel("Latitude")
plt.show()
Copy link

This issue has been marked as stale because it has been open for 30 days with no activity.

@github-actions github-actions bot added the stale label Dec 24, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
Status: To Do
Development

No branches or pull requests

1 participant