Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(building-comparison): add metadata table #791

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
12 changes: 11 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,20 @@
# Changelog

## Current Main

### Other Changes

- road-comparison: covered Microsoft Roads are now calculated with tag `highway=* and type:way` ([#791])
- comparison-indicators: now request metadata from database ([#791])


[#791]: https://github.com/GIScience/ohsome-quality-api/pull/791

## Release 1.3.0

### Breaking Changes

- major-roads-length: rename to `roads` and chnage filter to `highway=* and type:way` ([#786])
- major-roads-length: rename to `roads` and change filter to `highway=* and type:way` ([#786])

### New Features

Expand Down
12 changes: 6 additions & 6 deletions ohsome_quality_api/geodatabase/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,14 +70,14 @@ async def get_shdi(bpoly: Feature | FeatureCollection) -> list[Record]:


# TODO: Check calls of the function
async def get_reference_coverage(table_name: str) -> Feature:
async def get_reference_coverage(table_name: str, coverage_type: str) -> Feature:
"""Get reference coverage for a bounding polygon."""
file_path = os.path.join(WORKING_DIR, "select_coverage.sql")
with open(file_path, "r") as file:
query = file.read()
query = file.read().replace("{coverage_type}", coverage_type)
async with get_connection() as conn:
result = await conn.fetch(query.format(table_name=table_name))
return Feature(geometry=geojson.loads(result[0]["geom"]))
result = await conn.fetchrow(query, table_name)
return Feature(geometry=geojson.loads(result["geom"]))


async def get_intersection_area(bpoly: Feature, table_name: str) -> float:
Expand All @@ -91,7 +91,7 @@ async def get_intersection_area(bpoly: Feature, table_name: str) -> float:
query = file.read()
geom = str(bpoly.geometry)
async with get_connection() as conn:
result = await conn.fetch(query.format(table_name=table_name), geom)
result = await conn.fetch(query, geom, table_name)
if result:
return result[0]["area_ratio"]
else:
Expand All @@ -105,7 +105,7 @@ async def get_intersection_geom(bpoly: Feature, table_name: str) -> Feature:
query = file.read()
geom = str(bpoly.geometry)
async with get_connection() as conn:
result = await conn.fetch(query.format(table_name=table_name), geom)
result = await conn.fetch(query, geom, table_name)
if result:
return Feature(geometry=geojson.loads(result[0]["geom"]))
else:
Expand Down
8 changes: 6 additions & 2 deletions ohsome_quality_api/geodatabase/select_coverage.sql
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
SELECT
ST_AsGeoJSON (ST_Transform (geom, 4326)) as geom
ST_AsGeoJSON (ST_Transform ({coverage_type}, 4326)) as geom
FROM
{table_name};
comparison_indicators_metadata
WHERE
dataset_name_snake_case like $1;


16 changes: 11 additions & 5 deletions ohsome_quality_api/geodatabase/select_intersection.sql
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
WITH bpoly AS (
SELECT
ST_Setsrid (ST_GeomFromGeoJSON ($1), 4326) AS geom
ST_SetSRID(ST_GeomFromGeoJSON($1), 4326) AS geom
),
selected_coverage AS (
SELECT coverage_simple AS coverage
FROM comparison_indicators_metadata
WHERE dataset_name_snake_case LIKE $2
)
SELECT
-- ratio of area within coverage (empty if outside, between 0-1 if intersection)
ST_Area (ST_Intersection (bpoly.geom, coverage.geom)) / ST_Area (bpoly.geom) as area_ratio,
ST_AsGeoJSON (ST_Intersection (bpoly.geom, coverage.geom)) AS geom
ST_Area(ST_Intersection(bpoly.geom, selected_coverage.coverage)) / ST_Area(bpoly.geom) AS area_ratio,
ST_AsGeoJSON(ST_Intersection(bpoly.geom, selected_coverage.coverage)) AS geom
FROM
bpoly,
{table_name} coverage
selected_coverage
WHERE
ST_Intersects (bpoly.geom, coverage.geom)
ST_Intersects(bpoly.geom, selected_coverage.coverage);

Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ def __init__(self, topic: Topic, feature: Feature, attribute_key: str) -> None:
self.absolute_value_1 = None
self.absolute_value_2 = None

async def init(self) -> None:
pass

async def preprocess(self) -> None:
attribute = get_attribute(self.topic.key, self.attribute_key)
# Get attribute filter
Expand Down
8 changes: 8 additions & 0 deletions ohsome_quality_api/indicators/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,14 @@ async def coverage(cls, inverse=False) -> list[Feature]:
else:
return [Feature(Polygon(coordinates=[]))]

@abstractmethod
async def init(self) -> None:
"""Initialize the indicator.

This method should be used to initialize data.
"""
pass

@abstractmethod
async def preprocess(self) -> None:
"""Get fetch and preprocess data.
Expand Down
25 changes: 0 additions & 25 deletions ohsome_quality_api/indicators/building_comparison/datasets.yaml

This file was deleted.

94 changes: 67 additions & 27 deletions ohsome_quality_api/indicators/building_comparison/indicator.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import geojson
import plotly.graph_objects as pgo
import psycopg
import yaml
from async_lru import alru_cache
from dateutil import parser
from geojson import Feature
Expand Down Expand Up @@ -42,51 +41,58 @@ def __init__(
self.area_cov: dict[str, float | None] = {}
self.ratio: dict[str, float | None] = {}
# self.data_ref: list = load_reference_datasets() # reference datasets
for key, val in load_datasets_metadata().items():
self.data_ref[key] = val
self.area_osm[key] = None # osm building area
self.area_ref[key] = None # reference building area [sqkm]
self.area_cov[key] = None # covered area [%]
self.ratio[key] = None

@classmethod
async def coverage(cls, inverse=False) -> list[Feature]:
# TODO: could also return a Feature Collection
features = []
datasets = load_datasets_metadata()
datasets = await load_datasets_metadata()
for val in datasets.values():
if inverse:
table = val["coverage"]["inversed"]
coverage_type = "coverage_inversed"
else:
table = val["coverage"]["simple"]
feature = await db_client.get_reference_coverage(table)
feature.properties.update({"refernce_dataset": val["name"]})
coverage_type = "coverage_simple"
feature = await db_client.get_reference_coverage(
val["dataset_name_snake_case"], coverage_type
)
feature.properties.update(
{"refernce_dataset": val["dataset_name_snake_case"]}
)
features.append(feature)
return features

@classmethod
def attribution(cls) -> str:
return get_attribution(["OSM", "EUBUCCO", "Microsoft Buildings"])

async def init(self) -> None:
datasets_metadata = await load_datasets_metadata()
for key, val in datasets_metadata.items():
self.data_ref[key] = val
self.area_osm[key] = None # osm building area
self.area_ref[key] = None # reference building area [sqkm]
self.area_cov[key] = None # covered area [%]
self.ratio[key] = None

async def preprocess(self) -> None:
for key, val in self.data_ref.items():
# get coverage [%]
self.area_cov[key] = await db_client.get_intersection_area(
self.feature,
val["coverage"]["simple"],
val["dataset_name_snake_case"],
)
if self.check_major_edge_cases(key) != "":
continue

# clip input geom with coverage of reference dataset
feature = await db_client.get_intersection_geom(
self.feature,
val["coverage"]["simple"],
val["dataset_name_snake_case"],
)

# get reference building area
result = await get_reference_building_area(
geojson.dumps(feature), val["table_name"]
geojson.dumps(feature), val["dataset_name_snake_case"]
)
self.area_ref[key] = result / (1000 * 1000)

Expand Down Expand Up @@ -115,7 +121,7 @@ def calculate(self) -> None:
description = template.substitute(
ratio=round(self.ratio[key] * 100, 2),
coverage=round(self.area_cov[key] * 100, 2),
dataset=self.data_ref[key]["name"],
dataset=self.data_ref[key]["dataset_name_snake_case"],
)
result_description = " ".join((result_description, edge_case, description))

Expand Down Expand Up @@ -169,12 +175,12 @@ def create_figure(self) -> None:
for key, dataset in self.data_ref.items():
if None in (self.area_ref[key], self.area_osm[key]):
continue
ref_x.append(dataset["name"])
ref_x.append(dataset["dataset_name_snake_case"])
ref_y.append(round(self.area_ref[key], 2))
ref_data.append(dataset)
osm_x.append(dataset["name"])
osm_x.append(dataset["dataset_name_snake_case"])
osm_y.append(round(self.area_osm[key], 2))
ref_hover.append(f"{dataset['name']} ({dataset['date']})")
ref_hover.append(f"{dataset['table_name']} ({dataset['date']})")
osm_hover.append(f"OSM ({self.result.timestamp_osm:%b %d, %Y})")
ref_color.append(Color[dataset["color"]].value)
osm_area.append(round(self.area_osm[key], 2))
Expand Down Expand Up @@ -267,11 +273,11 @@ def check_minor_edge_cases(self, dataset: str) -> str:

def format_sources(self):
sources = []
for dataset in self.data_ref.values():
if dataset["link"] is not None:
sources.append(f"<a href='{dataset['link']}'>" f"{dataset['name']}</a>")
for dataset_key, dataset_value in self.data_ref.items():
if dataset_value["link"] is not None:
sources.append(f"<a href='{dataset_value['link']}'>{dataset_key}</a>")
else:
sources.append(f"{dataset}")
sources.append(dataset_key)
result = ", ".join(sources)
return result

Expand Down Expand Up @@ -300,7 +306,41 @@ async def get_reference_building_area(feature_str: str, table_name: str) -> floa
return res[0] or 0.0


def load_datasets_metadata() -> dict:
file_path = os.path.join(os.path.dirname(__file__), "datasets.yaml")
with open(file_path, "r") as f:
return yaml.safe_load(f)
async def load_datasets_metadata() -> dict:
"""Load dataset metadata from the database."""
dns = "postgres://{user}:{password}@{host}:{port}/{database}".format(
host=get_config_value("postgres_host"),
port=get_config_value("postgres_port"),
database=get_config_value("postgres_db"),
user=get_config_value("postgres_user"),
password=get_config_value("postgres_password"),
)

dataset_metadata = {}

async with await psycopg.AsyncConnection.connect(dns) as con:
async with con.cursor() as cur:
await cur.execute(
"SELECT * "
"FROM comparison_indicators_metadata "
"WHERE indicator = 'building_comparison';"
)
async for row in cur:
dataset_name = row[0]
dataset_name_snake_case = row[1]
link = row[2]
date = row[3].strftime("%Y-%m-%d") # Convert date object to string
description = row[4]
color = row[5]
table_name = row[6]
dataset_metadata[dataset_name] = {
"dataset_name": dataset_name,
"dataset_name_snake_case": dataset_name_snake_case,
"link": link,
"date": date,
"description": description,
"color": color,
"table_name": table_name,
}

return dataset_metadata
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,9 @@ def threshhold_yellow(cls):
"""Above or equal to this value label should be yellow."""
return 0.2

async def init(self) -> None:
pass

async def preprocess(self) -> None:
# Get hex-cells
hex_cells: FeatureCollection = await get_hex_cells(self.feature)
Expand Down
3 changes: 3 additions & 0 deletions ohsome_quality_api/indicators/currentness/indicator.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ def __init__(
self.bin_in_between: Bin
self.bin_out_of_date: Bin

async def init(self) -> None:
pass

async def preprocess(self):
"""Fetch all latest contributions in monthly buckets since 2008

Expand Down
3 changes: 3 additions & 0 deletions ohsome_quality_api/indicators/density/indicator.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ def green_threshold_function(self, area):
def yellow_threshold_function(self, area):
return self.threshold_red * area

async def init(self) -> None:
pass

async def preprocess(self) -> None:
query_results_count = await ohsome_client.query(self.topic, self.feature)
self.area_sqkm = calculate_area(self.feature) / (1000 * 1000)
Expand Down
3 changes: 3 additions & 0 deletions ohsome_quality_api/indicators/mapping_saturation/indicator.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@ def __init__(
self.best_fit: models.BaseStatModel | None = None
self.fitted_models: list[models.BaseStatModel] = []

async def init(self) -> None:
pass

async def preprocess(self) -> None:
query_results = await ohsome_client.query(
self.topic,
Expand Down
3 changes: 3 additions & 0 deletions ohsome_quality_api/indicators/minimal/indicator.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ def __init__(self, topic: Topic, feature: Feature) -> None:
super().__init__(topic=topic, feature=feature)
self.count = 0

async def init(self) -> None:
pass

async def preprocess(self) -> None:
query_results = await ohsome_client.query(self.topic, self.feature)
self.count = query_results["result"][0]["value"]
Expand Down
13 changes: 0 additions & 13 deletions ohsome_quality_api/indicators/road_comparison/datasets.yaml

This file was deleted.

Loading