Skip to content

Commit

Permalink
Merge pull request #162 from ParkenDD/fix-geo-distance
Browse files Browse the repository at this point in the history
fix calculating geo distance, output it at cli client and csv
  • Loading branch information
the-infinity authored Jun 14, 2024
2 parents 36885e4 + 040bbbb commit e296ced
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 20 deletions.
1 change: 1 addition & 0 deletions scripts/_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ def save_duplicates(duplicates_file_path: Path, items: list[dict], append: bool
'id',
'duplicate_id',
'status',
'distance',
'source_id',
'source_uid',
'lat',
Expand Down
39 changes: 19 additions & 20 deletions webapp/services/matching_service/matching_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@
Use of this source code is governed by an MIT-style license that can be found in the LICENSE.txt.
"""

import math
from dataclasses import asdict, dataclass
from decimal import Decimal
from math import acos, cos, sin
from typing import Optional

from parkapi_sources.models.enums import ParkAndRideType, ParkingSiteType
from pyproj import Geod

from webapp.common.logging.models import LogMessageType
from webapp.models import ParkingSite
Expand All @@ -26,6 +27,7 @@ class DuplicatedParkingSite:
source_uid: str
lat: Decimal
lon: Decimal
distance: float
name: str
capacity: int
api_url: str
Expand All @@ -43,6 +45,7 @@ def to_dict(self) -> dict:

class MatchingService(BaseService):
parking_site_repository: ParkingSiteRepository
geo_distance_service = Geod(ellps='WGS84')

def __init__(self, *args, parking_site_repository: ParkingSiteRepository, **kwargs):
super().__init__(*args, **kwargs)
Expand All @@ -53,7 +56,7 @@ def generate_duplicates(
existing_matches: list[tuple[int, int]],
match_radius: Optional[int] = None,
) -> list[DuplicatedParkingSite]:
matches: list[tuple[ParkingSiteLocation, ParkingSiteLocation]] = []
matches: list[tuple[ParkingSiteLocation, ParkingSiteLocation, float]] = []
if match_radius is None:
match_radius: int = self.config_helper.get('MATCH_RADIUS', 100)

Expand All @@ -73,27 +76,19 @@ def generate_duplicates(
continue

# If distance is over match radius: ignore possible match
try:
if self.distance(parking_site_locations[i], parking_site_locations[j]) > match_radius:
continue
# Ignore (and log) invalid data at distance calculations (eg 'math domain error')
except ValueError:
self.logger.warning(
LogMessageType.DUPLICATE_HANDLING,
f'Cannot calculate distance between {parking_site_locations[i]} and {parking_site_locations[j]}',
)
distance = self.distance(parking_site_locations[i], parking_site_locations[j])
if math.isnan(distance) or distance > match_radius:
continue

matches.append((parking_site_locations[i], parking_site_locations[j]))
matches.append((parking_site_locations[i], parking_site_locations[j], distance))

duplicates: list[DuplicatedParkingSite] = []
parking_site_ids: list[int] = list(set([match[0].id for match in matches] + [match[1].id for match in matches]))
parking_sites = self.parking_site_repository.fetch_parking_site_by_ids(parking_site_ids)
parking_sites_by_id: dict[int, ParkingSite] = {parking_site.id: parking_site for parking_site in parking_sites}

for match in matches:
duplicates.append(self.parking_site_to_duplicate(parking_sites_by_id[match[0].id], match[1].id))
duplicates.append(self.parking_site_to_duplicate(parking_sites_by_id[match[1].id], match[0].id))
duplicates.append(self.parking_site_to_duplicate(parking_sites_by_id[match[0].id], match[1].id, match[2]))
duplicates.append(self.parking_site_to_duplicate(parking_sites_by_id[match[1].id], match[0].id, match[2]))

return duplicates

Expand All @@ -104,7 +99,7 @@ def apply_duplicates(self, duplicates: list[tuple[int, int]]):
duplicate_parking_site.duplicate_of_parking_site_id = parking_site.id
self.parking_site_repository.save_parking_site(duplicate_parking_site)

def parking_site_to_duplicate(self, parking_site: ParkingSite, duplicate_id: int) -> DuplicatedParkingSite:
def parking_site_to_duplicate(self, parking_site: ParkingSite, duplicate_id: int, distance: float) -> DuplicatedParkingSite:
return DuplicatedParkingSite(
id=parking_site.id,
duplicate_id=duplicate_id,
Expand All @@ -113,6 +108,7 @@ def parking_site_to_duplicate(self, parking_site: ParkingSite, duplicate_id: int
source_uid=parking_site.source.uid,
lat=parking_site.lat,
lon=parking_site.lon,
distance=distance,
address=parking_site.address,
capacity=parking_site.capacity,
name=parking_site.name,
Expand All @@ -125,8 +121,11 @@ def parking_site_to_duplicate(self, parking_site: ParkingSite, duplicate_id: int
opening_hours=parking_site.opening_hours,
)

@staticmethod
def distance(location_1: ParkingSiteLocation, location_2: ParkingSiteLocation) -> float:
return 6371010 * acos(
sin(location_1.lat) * sin(location_2.lat) + cos(location_1.lat) * cos(location_2.lat) * cos(location_1.lon - location_2.lon),
def distance(self, location_1: ParkingSiteLocation, location_2: ParkingSiteLocation) -> float:
_, _, distance = self.geo_distance_service.inv(
float(location_1.lat),
float(location_1.lon),
float(location_2.lat),
float(location_2.lon),
)
return distance

0 comments on commit e296ced

Please sign in to comment.