Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add autorefresh manifest for nasa fires frims import #991

Merged
merged 9 commits into from
Feb 23, 2024
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions import-automation/executor/app/executor/import_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -664,12 +664,12 @@ def _run_with_timeout_async(args: List[str],
)

# Log output continuously until the command completes.
for line in process.stdout:
stdout.append(line)
logging.info(f'Process stdout: {line}')
for line in process.stderr:
stderr.append(line)
logging.info(f'Process stderr: {line}')
for line in process.stdout:
stdout.append(line)
logging.info(f'Process stdout: {line}')

end_time = time.time()

Expand Down
4 changes: 2 additions & 2 deletions import-automation/executor/app/executor/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,8 +196,8 @@ def _is_import_spec_valid(import_spec, repo_dir, import_dir):
f'import specification ({import_spec})')

absolute_script_paths = [
os.path.join(repo_dir, import_dir, path)
for path in import_spec.get('scripts', [])
os.path.join(repo_dir, import_dir, path.split(' ')[0])
for path in import_spec.get('scripts', []) if path
]
missing_paths = _filter_missing_paths(absolute_script_paths)
if missing_paths:
Expand Down
37 changes: 33 additions & 4 deletions scripts/earthengine/earthengine_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,15 +48,21 @@
import sys
import time

# Workaround for collection.Callable needed for ee.Initialize()
import collections
import collections.abc
collections.Callable = collections.abc.Callable

from absl import app
from absl import flags
from absl import logging
from datetime import date
from datetime import datetime
from datetime import timedelta
from dateutil.relativedelta import relativedelta
from google.auth import compute_engine

_SCRIPTS_DIR = os.path.dirname(__file__)
_SCRIPTS_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(_SCRIPTS_DIR)
sys.path.append(os.path.dirname(_SCRIPTS_DIR))
sys.path.append(os.path.dirname(os.path.dirname(_SCRIPTS_DIR)))
Expand All @@ -69,6 +75,9 @@

from counters import Counters

flags.DEFINE_bool(
'ee_remote', False,
'Set to True to use service account auth when running remotely')
flags.DEFINE_string('ee_gcloud_project', 'datcom-import-automation-prod',
'Gcloud project with Earth Engine API enabled.')
flags.DEFINE_string('ee_dataset', '',
Expand Down Expand Up @@ -171,6 +180,8 @@
}

EE_DEFAULT_CONFIG = {
# Auth mode
'ee_remote': _FLAGS.ee_remote,
# GCloud project
'ee_gcloud_project': _FLAGS.ee_gcloud_project,
# Image loading settings.
Expand Down Expand Up @@ -596,7 +607,26 @@ def export_ee_image_to_gcs(ee_image: ee.Image, config: dict = {}) -> str:
return task


def ee_process(config) -> list:
def ee_init(config: dict):
'''Initialize Earth Engine APIs.
Args:
config: dict with the following parameters
ee_remote: bool if True uses EE service account auth.
ee_gcloud_project: Project to use with EE API.
'''
ee.Authenticate()
# By default use local credentials
credentials = 'persistent'
if config.get('ee_remote'):
# Use the service account scope
scopes = ["https://www.googleapis.com/auth/earthengine"]
credentials = compute_engine.Credentials(scopes=scopes)

ee.Initialize(credentials=credentials,
project=config.get('ee_gcloud_project'))


def ee_process(config: dict) -> list:
'''Generate earth engine images and export to GCS.
Called should wait for the task to complete.
Args:
Expand All @@ -609,8 +639,7 @@ def ee_process(config) -> list:
if config['ee_wait_task'] is True, else a list of tasks launched.
'''
ee_tasks = []
ee.Authenticate()
ee.Initialize(project=config.get('ee_gcloud_project'))
ee_init(config)
config['ee_image_count'] = config.get('ee_image_count', 1)
time_period = config.get('time_period', 'P1M')
cur_date = utils.date_format_by_time_period(utils.date_today(), time_period)
Expand Down
2 changes: 1 addition & 1 deletion scripts/earthengine/events_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@

_FLAGS = flags.FLAGS

_SCRIPTS_DIR = os.path.dirname(__file__)
_SCRIPTS_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(_SCRIPTS_DIR)
sys.path.append(os.path.dirname(_SCRIPTS_DIR))
sys.path.append(os.path.dirname(os.path.dirname(_SCRIPTS_DIR)))
Expand Down
2 changes: 1 addition & 1 deletion scripts/earthengine/pipeline_stage_bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from absl import logging
from google.cloud import bigquery

_SCRIPTS_DIR = os.path.dirname(__file__)
_SCRIPTS_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(_SCRIPTS_DIR)
sys.path.append(os.path.dirname(_SCRIPTS_DIR))
sys.path.append(os.path.dirname(os.path.dirname(_SCRIPTS_DIR)))
Expand Down
2 changes: 1 addition & 1 deletion scripts/earthengine/pipeline_stage_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

from absl import logging

_SCRIPTS_DIR = os.path.dirname(__file__)
_SCRIPTS_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(_SCRIPTS_DIR)
sys.path.append(os.path.dirname(_SCRIPTS_DIR))
sys.path.append(os.path.dirname(os.path.dirname(_SCRIPTS_DIR)))
Expand Down
2 changes: 1 addition & 1 deletion scripts/earthengine/pipeline_stage_earthengine.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

from absl import logging

_SCRIPTS_DIR = os.path.dirname(__file__)
_SCRIPTS_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(_SCRIPTS_DIR)
sys.path.append(os.path.dirname(_SCRIPTS_DIR))
sys.path.append(os.path.dirname(os.path.dirname(_SCRIPTS_DIR)))
Expand Down
2 changes: 1 addition & 1 deletion scripts/earthengine/pipeline_stage_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

from absl import logging

_SCRIPTS_DIR = os.path.dirname(__file__)
_SCRIPTS_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(_SCRIPTS_DIR)
sys.path.append(os.path.dirname(_SCRIPTS_DIR))
sys.path.append(os.path.dirname(os.path.dirname(_SCRIPTS_DIR)))
Expand Down
2 changes: 1 addition & 1 deletion scripts/earthengine/pipeline_stage_raster_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

from absl import logging

_SCRIPTS_DIR = os.path.dirname(__file__)
_SCRIPTS_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(_SCRIPTS_DIR)
sys.path.append(os.path.dirname(_SCRIPTS_DIR))
sys.path.append(os.path.dirname(os.path.dirname(_SCRIPTS_DIR)))
Expand Down
2 changes: 1 addition & 1 deletion scripts/earthengine/pipeline_stage_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

from absl import logging

_SCRIPTS_DIR = os.path.dirname(__file__)
_SCRIPTS_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(_SCRIPTS_DIR)
sys.path.append(os.path.dirname(_SCRIPTS_DIR))
sys.path.append(os.path.dirname(os.path.dirname(_SCRIPTS_DIR)))
Expand Down
2 changes: 1 addition & 1 deletion scripts/earthengine/process_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@

_FLAGS = flags.FLAGS

_SCRIPTS_DIR = os.path.dirname(__file__)
_SCRIPTS_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(_SCRIPTS_DIR)
sys.path.append(os.path.dirname(_SCRIPTS_DIR))
sys.path.append(os.path.dirname(os.path.dirname(_SCRIPTS_DIR)))
Expand Down
2 changes: 1 addition & 1 deletion scripts/earthengine/raster_to_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@
import rasterio
import s2sphere

_SCRIPTS_DIR = os.path.dirname(__file__)
_SCRIPTS_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(_SCRIPTS_DIR)
sys.path.append(os.path.dirname(_SCRIPTS_DIR))
sys.path.append(os.path.dirname(os.path.dirname(_SCRIPTS_DIR)))
Expand Down
2 changes: 1 addition & 1 deletion scripts/earthengine/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
from s2sphere import Cell, CellId, LatLng
from shapely.geometry import Polygon

_SCRIPTS_DIR = os.path.dirname(__file__)
_SCRIPTS_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(_SCRIPTS_DIR)
sys.path.append(os.path.dirname(_SCRIPTS_DIR))
sys.path.append(os.path.dirname(os.path.dirname(_SCRIPTS_DIR)))
Expand Down
34 changes: 34 additions & 0 deletions scripts/fires/firms/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Global fire events using NASA FIRMS dataset
This folder contains configs to generate global fire events using the fire data
from [NASA Fire Information for Resource Management System
(FIRMS)](https://firms.modaps.eosdis.nasa.gov/).

The active and historical fires can be viewed on the [FIRMS Fire
Map](https://firms.modaps.eosdis.nasa.gov/).

The [NASA API](https://firms.modaps.eosdis.nasa.gov/api/area/) provides the
fires data as a CSV with the location and area of each fire. This is used to
generate fire events by merging regions with fire that are next to each other
within time window.

The [events
pipeline](https://github.com/datacommonsorg/data/blob/master/scripts/earthengine/events_pipeline.py)
script is used with the `fire_events_pipeline_config.py` that downloads the
latest data form source incrementally and generates fire events for the current
year.

To run the script, get an API key from
[NASA](https://firms.modaps.eosdis.nasa.gov/api/area/), add it to the config
`fire_events_pipeline_config.py`, update the GCS project and buckets in the
config or set the `output_file` to a local foldea.
Then run the pipeline with the command:
```
pip install -r requirements.txt
python3 ../../earthengine/events_pipeline.config --pipeline_config=fire_events_pipeline_config.py
```

This generates the following output files:
- events.{csv,tmcf}: Data for each fire event
- events-svobs.{csv,tmcf}: StatVarObservations for area of each fire event
- place-svobs.{csv,tmcf}: StatVarObservations for area and count of fires
across places
7 changes: 7 additions & 0 deletions scripts/fires/firms/fire_event_svobs.tmcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Node: E:Events->E0
typeOf: dcs:StatVarObservation
observationAbout: C:Events->dcid
observationDate: C:Events->observationDate
variableMeasured: dcs:Area_FireEvent
value: C:Events->area
unit: dcs:SquareKilometer
12 changes: 12 additions & 0 deletions scripts/fires/firms/fire_events.tmcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
Node: E:Events->E0
dcid: C:Events->dcid
typeOf: C:Events->typeOf
name: C:Events->name
startDate: C:Events->startDate
endDate: C:Events->endDate
observationPeriod: C:Events->observationPeriod
startLocation: C:Events->startLocation
affectedPlace: C:Events->affectedPlace
area: C:Events->area
observationDate: C:Events->observationDate
geoJsonCoordinates: C:Events->geoJsonCoordinates
Loading
Loading