Skip to content

Commit

Permalink
Add a way to flag ERDDAP datasets to be refreshed
Browse files Browse the repository at this point in the history
  • Loading branch information
kwilcox committed Aug 14, 2017
1 parent e6f3eb1 commit 292ea98
Show file tree
Hide file tree
Showing 3 changed files with 102 additions and 64 deletions.
3 changes: 3 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,9 @@ VOLUME $GUTILS_CONFIG_DIRECTORY
ENV GUTILS_ERDDAP_CONTENT_PATH /gutils/erddap/content
VOLUME $GUTILS_ERDDAP_CONTENT_PATH

ENV GUTILS_ERDDAP_FLAG_PATH /gutils/erddap/flag
VOLUME $GUTILS_ERDDAP_FLAG_PATH

RUN mkdir -p /etc/my_init.d && \
mkdir -p /gutils
COPY docker/init/* /etc/my_init.d/
Expand Down
4 changes: 4 additions & 0 deletions example/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ services:
GUTILS_BINARY_DIRECTORY: /gutils/binary
GUTILS_CONFIG_DIRECTORY: /gutils/config
GUTILS_NETCDF_DIRECTORY: /gutils/netcdf
GUTILS_ERDDAP_CONTENT_PATH: /gutils/erddap/content
GUTILS_ERDDAP_FLAG_PATH: /gutils/erddap/flag
GUTILS_FTP_URL: ftp
GUTILS_FTP_USER: thisisme
GUTILS_FTP_PASS: thisismypass
Expand All @@ -30,6 +32,7 @@ services:
- "./netcdf:/gutils/netcdf"
- "./config:/gutils/config:ro"
- "./erddap/content:/gutils/erddap/content"
- "./erddap/flag:/gutils/erddap/flag"
depends_on:
- ftp

Expand All @@ -48,6 +51,7 @@ services:
image: axiom/docker-erddap
volumes:
- "./erddap/content/datasets.xml:/usr/local/tomcat/content/erddap/datasets.xml"
- "./erddap/flag:/erddapData/flag"
- "./netcdf:/gutils/netcdf:ro"
ports:
- "8080:8080"
Expand Down
159 changes: 95 additions & 64 deletions gutils/watch/netcdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import argparse
import tempfile
from ftplib import FTP
from datetime import datetime
from collections import namedtuple

import netCDF4 as nc4
Expand Down Expand Up @@ -175,11 +176,21 @@ def main_to_ftp():
return 0


def lxml_elements_equal(e1, e2):
if e1.tag != e2.tag: return False
if e1.text != e2.text: return False
if e1.tail != e2.tail: return False
if e1.attrib != e2.attrib: return False
if len(e1) != len(e2): return False
return all(lxml_elements_equal(c1, c2) for c1, c2 in zip(e1, e2))


class Netcdf2ErddapProcessor(ProcessEvent):

def my_init(self, outputs_path, erddap_content_path):
def my_init(self, outputs_path, erddap_content_path, erddap_flag_path):
self.outputs_path = os.path.realpath(outputs_path)
self.erddap_content_path = os.path.realpath(erddap_content_path)
self.erddap_flag_path = os.path.realpath(erddap_flag_path)

def process_IN_CLOSE(self, event):
if self.valid_extension(event.name):
Expand All @@ -198,67 +209,80 @@ def valid_extension(self, name):
def create_and_update_content(self, event):
tmp_handle, tmp_path = tempfile.mkstemp(prefix='gutils_errdap_', suffix='.xml')

loader = PackageLoader('gutils', 'templates')
jenv = Environment(loader=loader, autoescape=select_autoescape(['html', 'xml']))

# Copy datasets.xml to a tmpfile
datasets_path = os.path.join(self.erddap_content_path, 'datasets.xml')
if os.path.isfile(datasets_path):
shutil.copy(datasets_path, tmp_path)
else:
# Render the base template to the tmpfile
datasets_template_string = jenv.get_template('erddap_datasets.xml').render()
with open(tmp_path, 'wt') as f:
f.write(
etree.tostring(
etree.fromstring(datasets_template_string),
encoding='ISO-8859-1',
pretty_print=True,
xml_declaration=True
).decode('iso-8859-1')
)
try:
loader = PackageLoader('gutils', 'templates')
jenv = Environment(loader=loader, autoescape=select_autoescape(['html', 'xml']))

# Copy datasets.xml to a tmpfile
datasets_path = os.path.join(self.erddap_content_path, 'datasets.xml')
if os.path.isfile(datasets_path):
shutil.copy(datasets_path, tmp_path)
else:
# Render the base template to the tmpfile
datasets_template_string = jenv.get_template('erddap_datasets.xml').render()
with open(tmp_path, 'wt') as f:
f.write(
etree.tostring(
etree.fromstring(datasets_template_string),
encoding='ISO-8859-1',
pretty_print=True,
xml_declaration=True
).decode('iso-8859-1')
)
f.write('\n')

deployment_name = os.path.basename(event.path)
xmlstring = jenv.get_template('erddap_deployment.xml').render(
deployment_name=deployment_name,
deployment_directory=event.path
)
deployment_xml_node = etree.fromstring(xmlstring)

# Create
xmltree = etree.parse(tmp_path).getroot()
find_dataset = etree.XPath("//erddapDatasets/dataset[@datasetID=$name]")

# Find an existing datasetID within the datasets.xml file
dnode = find_dataset(xmltree, name=deployment_name)
if not dnode:
# No datasetID found, create a new one
xmltree.append(deployment_xml_node)
L.info("Added Deployment: {}".format(deployment_name))
else:
if lxml_elements_equal(dnode[0], deployment_xml_node):
L.info("Not replacing identical deployment XML node")
return
else:
# Update the existing datasetID with a new XML block
xmltree.replace(dnode[0], deployment_xml_node)
L.info("Replaced Deployment: {}".format(deployment_name))

# Create tempfile for the new modified file
new_datasets_handle, new_datasets_path = tempfile.mkstemp(prefix='gutils_erddap_', suffix='.xml')
with open(new_datasets_path, 'wt') as f:
f.write(etree.tostring(
xmltree,
encoding='ISO-8859-1',
pretty_print=True,
xml_declaration=True
).decode('iso-8859-1'))
f.write('\n')

deployment_name = os.path.basename(event.path)
xmlstring = jenv.get_template('erddap_deployment.xml').render(
deployment_name=deployment_name,
deployment_directory=event.path
)
deployment_xml_node = etree.fromstring(xmlstring)

# Create
xmltree = etree.parse(tmp_path).getroot()
find_dataset = etree.XPath("//erddapDatasets/dataset[@datasetID=$name]")

# Find an existing datasetID within the datasets.xml file
dnode = find_dataset(xmltree, name=deployment_name)
if not dnode:
# No datasetID found, create a new one
xmltree.append(deployment_xml_node)
L.info("Added Deployment: {}".format(deployment_name))
else:
# Update the existing datasetID with a new XML block
xmltree.replace(dnode[0], deployment_xml_node)
L.info("Replaced Deployment: {}".format(deployment_name))

# Create tempfile for the new modified file
new_datasets_handle, new_datasets_path = tempfile.mkstemp(prefix='gutils_erddap_', suffix='.xml')
with open(new_datasets_path, 'wt') as f:
f.write(etree.tostring(
xmltree,
encoding='ISO-8859-1',
pretty_print=True,
xml_declaration=True
).decode('iso-8859-1'))
f.write('\n')

# Replace old datasets.xml
os.close(new_datasets_handle)
shutil.move(new_datasets_path, datasets_path)

os.close(tmp_handle)
if os.path.exists(tmp_path):
os.remove(tmp_path)
# Replace old datasets.xml
os.close(new_datasets_handle)
shutil.move(new_datasets_path, datasets_path)

finally:
# Write dataset update flag if it doesn't exist
if self.erddap_flag_path is not None:
flagfile = os.path.join(self.erddap_flag_path, deployment_name)
if not os.path.isfile(flagfile):
with open(flagfile, 'w') as ff:
ff.write(datetime.utcnow().isoformat())

os.close(tmp_handle)
if os.path.exists(tmp_path):
os.remove(tmp_path)


def create_erddap_arg_parser():
Expand All @@ -278,6 +302,11 @@ def create_erddap_arg_parser():
help="Path to the ERDDAP content directory",
default=os.environ.get('GUTILS_ERDDAP_CONTENT_PATH')
)
parser.add_argument(
"--erddap_flag_path",
help="Path to the ERDDAP flag directory",
default=os.environ.get('GUTILS_ERDDAP_FLAG_PATH')
)
parser.add_argument(
"--daemonize",
help="To daemonize or not to daemonize",
Expand Down Expand Up @@ -315,18 +344,20 @@ def main_to_erddap():

processor = Netcdf2ErddapProcessor(
outputs_path=args.data_path,
erddap_content_path=args.erddap_content_path
erddap_content_path=args.erddap_content_path,
erddap_flag_path=args.erddap_flag_path
)
notifier = Notifier(wm, processor, read_freq=30) # Read every 30 seconds
# Enable coalescing of events. This merges event types of the same type on the same file
# together over the `read_freq` specified in the Notifier.
notifier.coalesce_events()

try:
L.info("Watching {} and Updating ERDDAP content at {}".format(
L.info("Watching {}, updating content at {} and flags at {}".format(
args.data_path,
args.erddap_content_path)
)
args.erddap_content_path,
args.erddap_flag_path
))
notifier.loop(daemonize=args.daemonize)
except NotifierError:
L.exception('Unable to start notifier loop')
Expand Down

0 comments on commit 292ea98

Please sign in to comment.