Skip to content

Commit

Permalink
Merge pull request #18 from dpriskorn/switch_to_missing_in_last_updated
Browse files Browse the repository at this point in the history
Switch to using the "missing in" property instead
  • Loading branch information
dpriskorn committed Jul 13, 2023
2 parents fd62d9e + 304d6b7 commit 16615ac
Show file tree
Hide file tree
Showing 24 changed files with 977 additions and 597 deletions.
34 changes: 34 additions & 0 deletions .github/workflows/lint_python.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
name: lint_python
on:
push:
branches: ["master"]
pull_request:
branches: ["master"]
jobs:
ruff:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- run: pip install --user ruff
- run: ruff --format=github --target-version=py37 .

lint_python:
needs: ruff
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Install poetry
run: pipx install poetry
- uses: actions/setup-python@v4
with:
python-version: 3.x
cache: 'poetry'
- run: pip install --upgrade pip wheel
- run: poetry install --with=dev
- run: poetry run black --check .
- run: poetry run codespell src/ tests/ *.md *.py # --ignore-words-list="" --skip="*.css,*.js,*.lock"
- run: mkdir --parents --verbose .mypy_cache
- run: poetry run mypy --ignore-missing-imports --install-types --non-interactive .
- run: poetry run safety check
- run: cp config.sample.py config.py
- run: poetry run pytest .
35 changes: 31 additions & 4 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,17 @@ repos:
entry: black .
types: [python]

- id: ruff
name: ruff
language: system
entry: ruff
args:
# Tell ruff to fix sorting of imports
- "--fix"
- "--format=github"
- "--target-version=py37"
types: [python]

- id: codespell
name: codespell
language: system
Expand Down Expand Up @@ -43,11 +54,27 @@ repos:
"--statistics"]
types: [ python ]

# https://jaredkhan.com/blog/mypy-pre-commit
- id: mypy
name: mypy
entry: mypy
language: python
# use your preferred Python version
# language_version: python3.7
# additional_dependencies: ["mypy==0.790"]
types: [python]
# use require_serial so that script
# is only called once per commit
require_serial: true
# exclude: shape.py|compareshape.py
# Print the number of files as a sanity-check
# verbose: true

# - id: pytest
# name: pytest
# language: system
# entry: pytest
# #args:
## - "--durations=10"
## - "--ignore=tests/test_wcdimportbot.py"
# types: [ python ]
# args:
# # - "--durations=10"
# - "-x"
# pass_filenames: false
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ If yes the tool presents the match for approval to the user.
If no match is found via OSM Wikidata Link the tool
proceeds to lookup the label of the route in the Waymarked Trails database.
If the user cannot decide whether they match, they are provided with links to make further investigatation easier.
If the user choose "no match" then a no-value statement with the current date is uploaded to Wikidata.
If the user choose "no match" then a no-value statement with the current date is uploaded to Wikidata.
If a no value statement already exists a point in time-qualifier will be appended.
If the user accepts a match it is uploaded to Wikidata at once.

## Installation and setup
Expand Down
2 changes: 1 addition & 1 deletion app.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from src.enrich_hiking_trails import EnrichHikingTrails
from src.models.enrich_hiking_trails import EnrichHikingTrails

eht = EnrichHikingTrails()
eht.add_osm_property_to_items()
10 changes: 8 additions & 2 deletions config.sample.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
import logging

loglevel = logging.INFO
upload_to_wikidata = False
validate_before_upload = True
upload_to_wikidata = True
request_timeout = 10
user_name = ""
bot_password = "" # nosec
user_name_only = "input your user name here"
user_agent = f"hiking_trail_matcher, see https://github.com/dpriskorn/hiking_trail_matcher/ User:{user_name_only}"
user_agent = (
f"hiking_trail_matcher, "
f"see https://github.com/dpriskorn/"
f"hiking_trail_matcher/ User:{user_name_only}"
)

# This controls which hiking trails to fetch and work on
language_code = "en"
Expand Down
1,066 changes: 634 additions & 432 deletions poetry.lock

Large diffs are not rendered by default.

11 changes: 8 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
[tool.poetry]
name = "hiking-trail-matcher"
version = "0.1.0"
description = ""
authors = ["Your Name <[email protected]>"]
description = "Script that helps link together hiking trails in Wikidata and OpenStreetMap based on user validation that they are the same."
authors = ["Dennis Priskorn"]
license = "GPLv3+"
readme = "README.md"
packages = [{include = "hiking_trail_matcher"}]
# packages = [{include = "app.py"}]

[tool.poetry.dependencies]
python = ">=3.10,<3.13"
Expand All @@ -15,6 +15,7 @@ rich = "^13.3.1"
questionary = "^1.10.0"
python-dateutil = "^2.8.2"
wikibaseintegrator = "^0.12.3"
pydash = "^7.0.4"


[tool.poetry.group.dev.dependencies]
Expand All @@ -28,6 +29,10 @@ mypy = "^1.0.0"
pre-commit = "^3.0.4"
pytest = "^7.2.1"
pyupgrade = "^3.3.1"
types-python-dateutil = "^2.8.19.13"
types-requests = "^2.31.0.1"
ruff = "^0.0.278"
safety = "^2.3.5"

[build-system]
requires = ["poetry-core"]
Expand Down
5 changes: 5 additions & 0 deletions src/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,15 @@ class Property(Enum):
POINT_IN_TIME = "P585"
RETRIEVED = "P813"
STATED_IN = "P248"
NOT_FOUND_IN = "P9660"
LAST_UPDATE = "P5017"
BASED_ON_HEURISTIC = "P887"


class ItemEnum(Enum):
OPENSTREETMAP = "Q936"
LOOKUP_IN_WAYMARKED_TRAILS_API = "Q119970009"
USER_VALIDATION = "Q119970060"


class OsmIdSource(Enum):
Expand Down
14 changes: 14 additions & 0 deletions src/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
class SummaryError(BaseException):
pass


class MissingInformationError(BaseException):
pass


class DebugExit(BaseException):
pass


class NoItemError(BaseException):
pass
Empty file added src/models/__init__.py
Empty file.
55 changes: 29 additions & 26 deletions src/enrich_hiking_trails.py → src/models/enrich_hiking_trails.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,22 @@
import logging
from typing import Dict, Optional, Any, List
from typing import Any, Dict, List, Optional

import requests # type: ignore
from pydantic import validate_arguments
from wikibaseintegrator import WikibaseIntegrator, wbi_config # type: ignore
from wikibaseintegrator.datatypes import ExternalID, Item, Time # type: ignore
from wikibaseintegrator.wbi_enums import ( # type: ignore
WikibaseDatePrecision,
WikibaseSnakType,
)
from wikibaseintegrator import WikibaseIntegrator # type: ignore
from wikibaseintegrator.wbi_helpers import execute_sparql_query # type: ignore
from wikibaseintegrator.wbi_login import Login # type: ignore

import config
from src.console import console
from src.enums import OsmIdSource, Status
from src.project_base_model import ProjectBaseModel
from src.trail_item import TrailItem
from src.exceptions import MissingInformationError, NoItemError
from src.models.project_base_model import ProjectBaseModel
from src.models.trail_item import TrailItem

logging.basicConfig(level=config.loglevel)
logger = logging.getLogger(__name__)


class MissingInformationError(BaseException):
pass


class EnrichHikingTrails(ProjectBaseModel):
rdf_entity_prefix = "http://www.wikidata.org/entity/"
wbi: Optional[WikibaseIntegrator]
Expand All @@ -43,17 +34,18 @@ def __get_hiking_trails_missing_osm_id__(self) -> None:
self.__extract_item_ids__()

def __get_sparql_result__(self):
# For now we limit to swedish trails
"""Get all trails in the specified country and
with labels in the specified language"""
self.setup_wbi()
# We hardcode swedish for now
self.sparql_result = execute_sparql_query(
f"""
SELECT DISTINCT ?item ?itemLabel WHERE {{
?item wdt:P31 wd:Q2143825;
wdt:P17 wd:{config.country_qid}.
minus{{?item wdt:P402 []}}
# Fetch labels for easier debugging
SERVICE wikibase:label {{ bd:serviceParam wikibase:language "{config.language_code}". }}
# Fetch labels also
SERVICE wikibase:label
{{ bd:serviceParam wikibase:language "{config.language_code}". }}
}}
"""
)
Expand All @@ -62,7 +54,8 @@ def __get_sparql_result__(self):
def __extract_wcdqs_json_entity_id__(
self, data: Dict, sparql_variable: str = "item"
) -> str:
"""We default to "item" as sparql value because it is customary in the Wikibase ecosystem"""
"""We default to "item" as sparql value because
it is customary in the Wikibase ecosystem"""
return str(data[sparql_variable]["value"].replace(self.rdf_entity_prefix, ""))

@validate_arguments
Expand All @@ -71,7 +64,9 @@ def __extract_item_ids__(self) -> None:
self.item_ids = []
if self.sparql_result:
for binding in self.sparql_result["results"]["bindings"]:
self.item_ids.append(self.__extract_wcdqs_json_entity_id__(data=binding))
self.item_ids.append(
self.__extract_wcdqs_json_entity_id__(data=binding)
)
console.print(f"Got {self.number_of_items} from WDQS")

def add_osm_property_to_items(self):
Expand All @@ -96,7 +91,7 @@ def __lookup_in_osm_wikidata_link__(trail_item: TrailItem) -> TrailItem:
trail_item.__ask_user_to_approve_match_from_osm_wikidata_link__()
else:
if trail_item.osm_wikidata_link_return.no_match:
console.print(f"Got no match from OSM Wikidata Link API")
console.print("Got no match from OSM Wikidata Link API")
# Return mutated object
return trail_item

Expand All @@ -106,10 +101,13 @@ def __lookup_in_waymarked_trails__(trail_item: TrailItem) -> None:
# if trail_item.questionary_return.quit:
# break
if trail_item.questionary_return.could_not_decide:
if not trail_item.item:
raise NoItemError()
console.print(
f"Try looking at {trail_item.waymarked_hiking_trails_search_url} "
f"and see if any fit with {trail_item.wikidata_url}"
f"and see if any fit with {trail_item.item.get_entity_url()}"
)
# TODO help the user match again
else:
trail_item.osm_id_source = OsmIdSource.QUESTIONNAIRE
trail_item.enrich_wikidata()
Expand All @@ -132,17 +130,22 @@ def __iterate_items__(self):
console.print(f"Working on {count}/{self.number_of_items}")
trail_item = TrailItem(qid=qid, wbi=self.wbi)
if trail_item.time_to_check_again():
logger.debug("It's time to check or no relation id is present")
logger.debug("It's time to check")
trail_item = self.__lookup_in_osm_wikidata_link__(trail_item=trail_item)
if (
trail_item.osm_wikidata_link_match_prompt_return == Status.DECLINED
or trail_item.osm_wikidata_link_return.no_match is True
trail_item.osm_wikidata_link_match_prompt_return == Status.DECLINED
or trail_item.osm_wikidata_link_return.no_match is True
):
logger.info("Falling back to Waymarked Trails API")
# TODO annotate the results here are by downloading their
# geometry from Overpass API and checking if
# each of them are in the right
# 1) country 2) region 3) municipality
self.__lookup_in_waymarked_trails__(trail_item=trail_item)
else:
logger.info(
f"Skipping item with recent no-value statement, see {trail_item.wikidata_url}"
f"Skipping item with recent last update statement, "
f"see {trail_item.item.get_entity_url()}"
)
count += 1
logger.debug("end of loop")
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from src.project_base_model import ProjectBaseModel
from src.models.project_base_model import ProjectBaseModel


class QuestionaryReturn(ProjectBaseModel):
Expand Down
2 changes: 1 addition & 1 deletion src/subroute.py → src/models/subroute.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
class Subroute(BaseModel):
name: str = ""
id: int
ref: str = ""
ref: str = ""
Loading

0 comments on commit 16615ac

Please sign in to comment.