Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Add Jupyter notebook analysis examples #1096

Draft
wants to merge 23 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
c834907
Store the connection string in the DataStore so we can use it to conn…
robintw Nov 29, 2021
70c99d1
Simple demo of connecting to Pepys, and using interact with geopandas…
robintw Nov 29, 2021
26b53e5
Add plots of speed vs time
robintw Nov 29, 2021
d5e4df6
introduce pandas requirement
IanMayo Dec 1, 2021
50208d6
introduce other missing dependency
IanMayo Dec 1, 2021
2db3366
More dependencies
IanMayo Dec 1, 2021
b6e2573
introduce more dependencies (maybe scikit pulls in some of these)
IanMayo Dec 1, 2021
3eadf9c
Add migration to extend length of platform identifier field
robintw Dec 2, 2021
c710e66
Add importer for AIS data
robintw Dec 2, 2021
056f02e
Merge branch 'jupyter-playground' of github.com:debrief/pepys-import …
robintw Dec 2, 2021
2156631
Merge branch 'extend-field-lengths' into jupyter-playground
robintw Dec 3, 2021
e9a30a9
Add new AIS importer
robintw Dec 14, 2021
737e504
Update to latest version of notebook
robintw Dec 14, 2021
7857e92
Split Jupyter demos out into separate notebooks
robintw Jan 31, 2022
dfdb2d6
Add other statistics on close vessels
robintw Jan 31, 2022
0e62796
Merge branch 'develop' of github.com:debrief/pepys-import into jupyte…
robintw Feb 2, 2022
d86fbbe
Update requirements, plus notebooks
robintw Feb 2, 2022
a09a108
Test installing .whl files for GDAL/Fiona
robintw Feb 2, 2022
a31ad55
Change dependency from jupyter to notebook to just get notebook not t…
robintw Feb 2, 2022
2e9bb11
Add option to Admin CLI to run Jupyter server
robintw Feb 2, 2022
d0c0f80
Update CI workflow to install whl files
robintw Feb 2, 2022
d542277
Undo whl install in wrong place, now do it in setup_windows
robintw Feb 2, 2022
9151464
Add ipywidgets to requirements so we can run the interactive UIs in t…
robintw Feb 4, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion .github/workflows/setup_windows.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -108,4 +108,9 @@ catch {
$DLL = python .\.github\workflows\get_DLL.py | Out-String
Copy-Item $DLL.Trim() -Destination ".\lib\sqlite-python"

Write-Output "INFO: Copied SQLite pyd file"
Write-Output "INFO: Copied SQLite pyd file"


# Install whl files
python -m pip install .\bin\GDAL-3.4.1-cp39-cp39-win_amd64.whl
python -m pip install .\bin\Fiona-1.8.20-cp39-cp39-win_amd64.whl
2,966 changes: 2,966 additions & 0 deletions Jupyter Playground.ipynb

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions create_deployment.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,10 @@ catch {
}

try {
# Install GDAL and Fiona from wheels
.\python\python.exe -m pip install .\bin\GDAL-3.4.1-cp39-cp39-win_amd64.whl
.\python\python.exe -m pip install .\bin\Fiona-1.8.20-cp39-cp39-win_amd64.whl

# Do a standard pip install of the requirements and dev requirements, not warning us that scripts will be unavailable
.\python\python.exe -m pip install -r requirements.txt -r requirements_dev.txt --no-warn-script-location --no-cache-dir --use-deprecated=legacy-resolver

Expand Down
534 changes: 534 additions & 0 deletions demos/Interactive vessel attribute plots.ipynb

Large diffs are not rendered by default.

2,104 changes: 2,104 additions & 0 deletions demos/Interactive vessel location plots.ipynb

Large diffs are not rendered by default.

672 changes: 672 additions & 0 deletions demos/Vessels within distance.ipynb

Large diffs are not rendered by default.

130 changes: 130 additions & 0 deletions importers/ais_dstl_importer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
from datetime import datetime

from pepys_import.core.formats import unit_registry
from pepys_import.core.formats.location import Location
from pepys_import.core.validators import constants
from pepys_import.file.highlighter.level import HighlightLevel
from pepys_import.file.highlighter.support.combine import combine_tokens
from pepys_import.file.importer import Importer
from pepys_import.utils.sqlalchemy_utils import get_lowest_privacy
from pepys_import.utils.unit_utils import convert_absolute_angle, convert_speed


class AIS_DSTL_Importer(Importer):
"""Imports AIS data from https://www.gov.uk/government/news/dstl-shares-new-open-source-framework-initiative"""

def __init__(self):
super().__init__(
name="AIS DSTL Format Importer",
validation_level=constants.BASIC_LEVEL,
short_name="AIS DSTL Importer",
default_privacy="Public",
datafile_type="AIS",
)
self.text_label = None

self.set_highlighting_level(HighlightLevel.NONE)

def can_load_this_type(self, suffix):
return suffix.upper() == ".CSV"

def can_load_this_filename(self, filename):
return True

def can_load_this_header(self, header):
return "Time,MMSI,Latitude_degrees,Longitude_degrees," in header

def can_load_this_file(self, file_contents):
return True

# @profile
def _load_this_line(self, data_store, line_number, line, datafile, change_id):
# Skip the header
if line_number == 1:
return

tokens = line.tokens(line.CSV_TOKENISER, ",")
if len(tokens) <= 1:
# the last line may be empty, don't worry
return

# separate token strings
datetime_token = tokens[0]
imo_id_token = tokens[1]
lat_degrees_token = tokens[2]
long_degrees_token = tokens[3]
course_token = tokens[4]
speed_token = tokens[5]

imo_id = imo_id_token.text
imo_id_token.record(self.name, "imo_id", imo_id)

timestamp = self.parse_timestamp(datetime_token.text)
if timestamp:
datetime_token.record(self.name, "timestamp", timestamp)
else:
# Skip line if invalid timestamp
self.errors.append(
{self.error_type: f"Line {line_number}. Error in timestamp parsing."}
)
return

if imo_id.strip() != "" and imo_id.strip() != "IMO0000000":
plat_name = imo_id
else:
plat_name = None

# and finally store it
platform = self.get_cached_platform(
data_store, platform_name=plat_name, change_id=change_id, unknown=True
)
sensor_type = data_store.add_to_sensor_types("Broadcast", change_id=change_id).name
privacy = get_lowest_privacy(data_store)
sensor = platform.get_sensor(
data_store=data_store,
sensor_name="AIS",
sensor_type=sensor_type,
privacy=privacy,
change_id=change_id,
)
state = datafile.create_state(data_store, platform, sensor, timestamp, self.short_name)

location = Location(errors=self.errors, error_type=self.error_type)
lat_success = location.set_latitude_decimal_degrees(lat_degrees_token.text)
lon_success = location.set_longitude_decimal_degrees(long_degrees_token.text)
if lat_success and lon_success:
state.location = location
combine_tokens(long_degrees_token, lat_degrees_token).record(
self.name, "location", state.location, "decimal degrees"
)

course_valid, course = convert_absolute_angle(
course_token.text, line_number, self.errors, self.error_type
)
if course_valid:
state.course = course
course_token.record(self.name, "course", course)

speed_valid, speed = convert_speed(
speed_token.text,
unit_registry.knots,
line_number,
self.errors,
self.error_type,
)
if speed_valid:
state.speed = speed
speed_token.record(self.name, "speed", speed)

datafile.flush_extracted_tokens()

@staticmethod
def parse_timestamp(datetime_str):
# Parses the following format
# 2016-01-11 08:32:49.765
try:
res = datetime.strptime(datetime_str, "%Y-%m-%d %H:%M:%S.%f")
except ValueError:
return False

return res
159 changes: 159 additions & 0 deletions importers/ais_marine_cadastre_importer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
from datetime import datetime

from pepys_import.core.formats import unit_registry
from pepys_import.core.formats.location import Location
from pepys_import.core.validators import constants
from pepys_import.file.highlighter.level import HighlightLevel
from pepys_import.file.highlighter.support.combine import combine_tokens
from pepys_import.file.importer import Importer
from pepys_import.utils.sqlalchemy_utils import get_lowest_privacy
from pepys_import.utils.unit_utils import convert_absolute_angle, convert_speed


class AIS_MarineCadastre_Importer(Importer):
"""Imports AIS data from https://marinecadastre.gov/ais/"""

def __init__(self):
super().__init__(
name="AIS Marine Cadastre Format Importer",
validation_level=constants.BASIC_LEVEL,
short_name="AIS Marine Cadastre Importer",
default_privacy="Public",
datafile_type="AIS",
)
self.text_label = None

self.set_highlighting_level(HighlightLevel.NONE)

def can_load_this_type(self, suffix):
return suffix.upper() == ".CSV"

def can_load_this_filename(self, filename):
return True

def can_load_this_header(self, header):
return "MMSI,BaseDateTime" in header

def can_load_this_file(self, file_contents):
return True

# @profile
def _load_this_line(self, data_store, line_number, line, datafile, change_id):
# Skip the header
if line_number == 1:
return

tokens = line.tokens(line.CSV_TOKENISER, ",")
if len(tokens) <= 1:
# the last line may be empty, don't worry
return
if len(tokens) < 17:
self.errors.append(
{self.error_type: f"Error on line {line_number}. Not enough tokens: {line}"}
)
return

# separate token strings
datetime_token = tokens[1]
lat_degrees_token = tokens[2]
long_degrees_token = tokens[3]
speed_token = tokens[4]
course_token = tokens[5]
heading_token = tokens[6]
name_token = tokens[7]
imo_id_token = tokens[8]

imo_id = imo_id_token.text
imo_id_token.record(self.name, "imo_id", imo_id)

vessel_name = name_token.text
name_token.record(self.name, "vessel name", vessel_name)

if len(datetime_token.text) != 19:
self.errors.append(
{
self.error_type: f"Error on line {line_number}. Datetime format '{datetime_token.text}' "
f"should be 19 characters long"
}
)
return

timestamp = self.parse_timestamp(datetime_token.text)
if timestamp:
datetime_token.record(self.name, "timestamp", timestamp)
else:
# Skip line if invalid timestamp
self.errors.append(
{self.error_type: f"Line {line_number}. Error in timestamp parsing."}
)
return

if imo_id.strip() != "" and imo_id.strip() != "IMO0000000":
plat_name = imo_id
else:
if vessel_name.strip() != "":
plat_name = vessel_name
else:
plat_name = None

# and finally store it
platform = self.get_cached_platform(
data_store, platform_name=plat_name, change_id=change_id, unknown=True
)
sensor_type = data_store.add_to_sensor_types("Broadcast", change_id=change_id).name
privacy = get_lowest_privacy(data_store)
sensor = platform.get_sensor(
data_store=data_store,
sensor_name="AIS",
sensor_type=sensor_type,
privacy=privacy,
change_id=change_id,
)
state = datafile.create_state(data_store, platform, sensor, timestamp, self.short_name)

location = Location(errors=self.errors, error_type=self.error_type)
lat_success = location.set_latitude_decimal_degrees(lat_degrees_token.text)
lon_success = location.set_longitude_decimal_degrees(long_degrees_token.text)
if lat_success and lon_success:
state.location = location
combine_tokens(long_degrees_token, lat_degrees_token).record(
self.name, "location", state.location, "decimal degrees"
)

heading_valid, heading = convert_absolute_angle(
heading_token.text, line_number, self.errors, self.error_type
)
if heading_valid:
state.heading = heading
heading_token.record(self.name, "heading", heading)

course_valid, course = convert_absolute_angle(
course_token.text, line_number, self.errors, self.error_type
)
if course_valid:
state.course = course
course_token.record(self.name, "course", course)

speed_valid, speed = convert_speed(
speed_token.text,
unit_registry.knots,
line_number,
self.errors,
self.error_type,
)
if speed_valid:
state.speed = speed
speed_token.record(self.name, "speed", speed)

datafile.flush_extracted_tokens()

@staticmethod
def parse_timestamp(datetime_str):
# Parses the following format
# 2020-01-01T00:00:00
try:
res = datetime.strptime(datetime_str, "%Y-%m-%dT%H:%M:%S")
except ValueError:
return False

return res
6 changes: 6 additions & 0 deletions pepys_admin/admin_cli.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
import subprocess
import sys
import webbrowser
from datetime import datetime
Expand Down Expand Up @@ -46,6 +47,7 @@ class AdminShell(BaseShell):
(8) Maintenance
(9) Maintain tasks
(10) View dashboard
(11) Run Jupyter Notebook server
(.) Exit
"""
prompt = "(pepys-admin) "
Expand All @@ -67,6 +69,7 @@ def __init__(self, data_store, csv_path=DIR_PATH):
"8": self.do_maintenance_gui,
"9": self.do_tasks_gui,
"10": self.do_view_dashboard,
"11": self.do_run_jupyter,
}

self.cfg = Config(os.path.join(ROOT_DIRECTORY, "alembic.ini"))
Expand All @@ -75,6 +78,9 @@ def __init__(self, data_store, csv_path=DIR_PATH):
self.cfg.attributes["database_type"] = data_store.db_type
self.cfg.attributes["connection"] = data_store.engine

def do_run_jupyter(self):
subprocess.run([sys.executable, "-m", "jupyter", "notebook"], cwd=os.path.expanduser("~"))

def do_view_dashboard(self):
if self.data_store.db_type == "sqlite":
print("The Pepys dashboard cannot be used with a SQLite database")
Expand Down
9 changes: 5 additions & 4 deletions pepys_import/core/store/data_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,13 +162,14 @@ def __init__(
else:
self.in_memory_database = False

connection_string = "{}://{}:{}@{}:{}/{}".format(
self.connection_string = "{}://{}:{}@{}:{}/{}".format(
driver, db_username, db_password, db_host, db_port, db_name
)

try:
if db_type == "postgres":
self.engine = create_engine(
connection_string, echo=False, executemany_mode="batch", future=True
self.connection_string, echo=False, executemany_mode="batch", future=True
)

BasePostGIS.metadata.bind = self.engine
Expand All @@ -188,7 +189,7 @@ def __init__(
)
self.check_migration_version(POSTGRES_REVISIONS_IDS)
elif db_type == "sqlite":
self.engine = create_engine(connection_string, echo=False, future=True)
self.engine = create_engine(self.connection_string, echo=False, future=True)
# These 'listen' calls must be the first things run after the engine is created
# as they set up things to happen on the first connect (which will happen when
# check_migration_version is called below)
Expand All @@ -210,7 +211,7 @@ def __init__(
sys.exit(1)

# Try to connect to the engine to check if there is any problem
with handle_first_connection_error(connection_string):
with handle_first_connection_error(self.connection_string):
inspector = inspect(self.engine)
_ = inspector.get_table_names()

Expand Down
Loading