Skip to content

Commit

Permalink
Big bag of work in progress in one big commit :/
Browse files Browse the repository at this point in the history
  • Loading branch information
GeoWill committed Jun 12, 2021
1 parent a25155f commit 693ffb8
Show file tree
Hide file tree
Showing 12 changed files with 432 additions and 64 deletions.
1 change: 1 addition & 0 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ python-dateutil = "*"
retry = "*"
black = "*"
rich = "*"
arcgis2geojson = "*"

[dev-packages]
pytest-mypy-plugins = "*"
Expand Down
102 changes: 38 additions & 64 deletions Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions lgsf/conf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ def __init__(self):
"councillors",
"templates",
"metadata",
"polling_stations"
# 'parties',
# "scrapers",
# 'reconcilers',
Expand Down
Empty file.
101 changes: 101 additions & 0 deletions lgsf/polling_stations/commands.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
from rich.progress import Progress

from lgsf.commands.base import PerCouncilCommandBase
from lgsf.path_utils import load_scraper, load_council_info



class Command(PerCouncilCommandBase):
command_name = "polling_stations"

def add_arguments(self, parser):
parser.add_argument(
"--check-only",
action="store_true",
help="Just check for updated pages, don't scrape anything",
)
parser.add_argument(
"--list-missing",
action="store_true",
help="Print missing councils",
)
parser.add_argument(
"--list-disabled",
action="store_true",
help="Print disabled councils",
)

def _run_single(self, scraper, progress, summary):
try:
progress.console.print(
scraper.options["council"]
)
scraper.run()
summary["completed"] += 1
except KeyboardInterrupt:
raise
except:
if self.options.get("verbose"):
raise
summary["failed"] += 1
progress.console.print(
"Error running asdasd {}, see {} for more".format(
self.options["council"], scraper._error_file_name()
),
style="red",
)


def handle(self, options):
self.options = options
if options["list_missing"]:
self.output_missing()

if options["list_disabled"]:
self.output_disabled()

self.output_status()

self.normalise_codes()
to_run = self.councils_to_run()
summary = {
"completed": 0,
"missing scraper": 0,
"failed": 0,
"skipped": 0,
}
with Progress() as progress:
tasks = {
"total": progress.add_task(description=f"Total", total=len(to_run)),
}

while not progress.finished:
for council in to_run:
self.options["council"] = council
self.options["council_info"] = load_council_info(council)
scraper_cls = load_scraper(council, self.command_name)
if not scraper_cls:
summary["missing scraper"] += 1
continue
with scraper_cls((self.options), progress.console) as scraper:
should_run = True
if scraper.disabled:
should_run = False

if should_run and options["refresh"]:
if scraper.run_since():
should_run = False

if should_run and options["tags"]:
required_tags = set(options["tags"].split(","))
scraper_tags = set(scraper.get_tags)
if not required_tags.issubset(scraper_tags):
should_run = False

if should_run:
self._run_single(scraper, progress, summary)
else:
summary["skipped"] += 1

progress.update(tasks["total"], advance=1)
self.console.print(summary)
26 changes: 26 additions & 0 deletions lgsf/polling_stations/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import json

class PollingStationsList:
def __init__(
self, stations
):
self.stations = stations

def as_file_name(self):
return "stations"

def as_json(self):
return json.dumps(self.stations, indent=4)


class PollingDistrictsList:
def __init__(
self, districts
):
self.districts = districts

def as_file_name(self):
return "districts"

def as_json(self):
return json.dumps(self.districts, indent=4)
Empty file.
57 changes: 57 additions & 0 deletions lgsf/polling_stations/scrapers/arcgis_scraper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import json
from arcgis2geojson import arcgis2geojson

from lgsf.polling_stations.models import PollingStationsList, PollingDistrictsList
from lgsf.scrapers.base import ScraperBase
from lgsf.polling_stations.scrapers.common import (
# BaseScraper,
# get_data_from_url,
# save,
summarise,
sync_db_to_github,
truncate, PollingStationScraperBase,
)

class ArcGisScraper(PollingStationScraperBase):

def make_geometry(self, feature):
return json.dumps(arcgis2geojson(feature), sort_keys=True)

def get_data(self, url): # pragma: no cover
response = self.get(url)
data_str = response.content
data = json.loads(data_str.decode(self.encoding))
return (data_str, data)


def process_feature(self, feature, fields=None):
# assemble record
record = {
'council_id': self.council_id,
'geometry': self.make_geometry(feature),
}
for field in fields:
value = feature['attributes'][field['name']]
if isinstance(value, str):
record[field['name']] = value.strip()
else:
record[field['name']] = value
return record


def scrape(self, url, type='features'):
# load json
data_str, data = self.get_data(url)
print(f"found {len(data['features'])} {type}")

# grab field names
fields = data['fields']
features = data['features']

return self.process_features(features, fields)


# print summary
# summarise(self.table)

# self.store_history(data_str, self.council_id)
Loading

0 comments on commit 693ffb8

Please sign in to comment.