Merge pull request #54 from mobidata-bw/sharing-changes

Sharing Asset changes
mobidata-bw · Dec 19, 2023 · 1bf6a66 · 1bf6a66
2 parents ae74d7f + 7a50716
commit 1bf6a66
Show file tree

Hide file tree

Showing 3 changed files with 12 additions and 8 deletions.
diff --git a/pipeline/assets/sharing.py b/pipeline/assets/sharing.py
@@ -2,6 +2,7 @@
 
 import pandas as pd
 from dagster import (
+    AutoMaterializePolicy,
     DefaultScheduleStatus,
     DefaultSensorStatus,
     DynamicPartitionsDefinition,
@@ -22,7 +23,8 @@
     io_manager_key='pg_gpd_io_manager',
     compute_kind='Lamassu',
     group_name='sharing',
-    freshness_policy=FreshnessPolicy(maximum_lag_minutes=1),
+    freshness_policy=FreshnessPolicy(maximum_lag_minutes=60),
+    auto_materialize_policy=AutoMaterializePolicy.eager(),
 )
 def sharing_stations(context, lamassu: LamassuResource) -> pd.DataFrame:
     """

diff --git a/pipeline/resources/postgis_geopandas_io_manager.py b/pipeline/resources/postgis_geopandas_io_manager.py
@@ -171,8 +171,8 @@ def handle_output(self, context: OutputContext, obj: geopandas.GeoDataFrame):
         schema, table = self._get_schema_table(context.asset_key)
 
         if isinstance(obj, geopandas.GeoDataFrame):
-            len(obj)
             with connect_postgresql(config=self._config) as con:
+                self._create_schema_if_not_exists(schema, con)
                 if context.has_partition_key:
                     # add additional column (name? for now just partition)
                     # to the frame and initialize with partition_name
@@ -189,9 +189,9 @@ def handle_output(self, context: OutputContext, obj: geopandas.GeoDataFrame):
                     # All data can be replaced (e.g. deleted before insertion).
                     # geopandas will take care of this.
                     if_exists_action = 'replace'
-
-                self._create_schema_if_not_exists(schema, con)
-                obj.to_postgis(con=con, name=table, schema=schema, if_exists=if_exists_action, chunksize=self.chunksize)
+                obj.to_postgis(
+                    con=con, name=table, index=True, schema=schema, if_exists=if_exists_action, chunksize=self.chunksize
+                )
                 context.add_output_metadata({'num_rows': len(obj), 'table_name': f'{schema}.{table}'})
         else:
             super().handle_output(context, obj)

diff --git a/pipeline/sources/lamassu.py b/pipeline/sources/lamassu.py
@@ -28,8 +28,9 @@
     'vehicle_id': pd.StringDtype(),
     'form_factor': pd.StringDtype(),
     'name': pd.StringDtype(),
-    'is_reserved': pd.BooleanDtype(),
     'propulsion_type': pd.StringDtype(),
+    'current_fuel_percent': pd.Float32Dtype(),
+    'current_range_meters': pd.Int32Dtype(),
     'max_range_meters': pd.Int32Dtype(),
     'rental_uris_android': pd.StringDtype(),
     'rental_uris_ios': pd.StringDtype(),
@@ -254,8 +255,9 @@ def _postprocess_columns_and_types(
         """
         df = df.reset_index()
         df['feed_id'] = feed_id
-        # convert seconds since epoch into datetime
-        df['last_reported'] = pd.to_datetime(df['last_reported'], unit='s', utc=True)
+        # convert seconds since epoch into datetime, if available (for vehicles, it's optional)
+        if 'last_reported' in df.columns:
+            df['last_reported'] = pd.to_datetime(df['last_reported'], unit='s', utc=True, errors='coerce')
         df_with_enforced_columns = Lamassu._enforce_columns(df, enforced_columns)
         return df_with_enforced_columns.set_index(index)