diff --git a/docs/assets/Sen2_tide_stats.png b/docs/assets/Sen2_tide_stats.png deleted file mode 100644 index 9705e6a..0000000 Binary files a/docs/assets/Sen2_tide_stats.png and /dev/null differ diff --git a/docs/assets/eo-tides-abstract.gif b/docs/assets/eo-tides-abstract.gif index a48df48..4c0439e 100644 Binary files a/docs/assets/eo-tides-abstract.gif and b/docs/assets/eo-tides-abstract.gif differ diff --git a/docs/paper/figures/joss_abstract.png b/docs/paper/figures/joss_abstract.png new file mode 100644 index 0000000..0cd994d Binary files /dev/null and b/docs/paper/figures/joss_abstract.png differ diff --git a/docs/paper/figures/joss_fig_list.png b/docs/paper/figures/joss_fig_list.png new file mode 100644 index 0000000..62bcfe5 Binary files /dev/null and b/docs/paper/figures/joss_fig_list.png differ diff --git a/docs/paper/figures/joss_fig_pixel.png b/docs/paper/figures/joss_fig_pixel.png new file mode 100644 index 0000000..f65a017 Binary files /dev/null and b/docs/paper/figures/joss_fig_pixel.png differ diff --git a/docs/paper/figures/joss_fig_stats.png b/docs/paper/figures/joss_fig_stats.png new file mode 100644 index 0000000..d533c5c Binary files /dev/null and b/docs/paper/figures/joss_fig_stats.png differ diff --git a/docs/paper/paper.bib b/docs/paper/paper.bib index c1730d0..d5e564b 100644 --- a/docs/paper/paper.bib +++ b/docs/paper/paper.bib @@ -1,19 +1,31 @@ -@misc{tyler_sutterley_2024, - author = {Tyler Sutterley}, - title = {tsutterley/{pyTMD}: v2.1.8}, - year = 2024, +@misc{pytmd, + author = {Sutterley, T.C. and Alley, K. and Brunt, K. and Howard, S., and Padman, L., and Siegried, M.}, + title = {pyTMD: Python-based tidal prediction software}, + year = 2017, publisher = {Zenodo}, - doi = {10.5281/zenodo.10929240}, - url = {https://doi.org/10.5281/zenodo.10929240}, + doi = {10.5281/zenodo.5555395}, + url = {https://doi.org/10.5281/zenodo.5555395}, } -@software{The_pandas_development_team_pandas-dev_pandas_Pandas, - author = {Development Team, {Pandas}}, - doi = {10.5281/zenodo.3509134}, - license = {BSD-3-Clause}, - title = {pandas-dev/{pandas}: v2.2.3}, - url = {https://github.com/pandas-dev/pandas}, - publisher = {GitHub}, +@software{reback2020pandas, + author = {pandas development team}, + title = {pandas-dev/pandas: Pandas}, + month = feb, + year = 2020, + publisher = {Zenodo}, + version = {latest}, + doi = {10.5281/zenodo.3509134}, + url = {https://doi.org/10.5281/zenodo.3509134} +} + +@InProceedings{ mckinney-proc-scipy-2010, + author = { {W}es {M}c{K}inney }, + title = { {D}ata {S}tructures for {S}tatistical {C}omputing in {P}ython }, + booktitle = { {P}roceedings of the 9th {P}ython in {S}cience {C}onference }, + pages = { 56 - 61 }, + year = { 2010 }, + editor = { {S}t\'efan van der {W}alt and {J}arrod {M}illman }, + doi = { 10.25080/Majora-92bf1922-00a } } @article{Hoyer_xarray_N-D_labeled_2017, @@ -27,14 +39,44 @@ @article{Hoyer_xarray_N-D_labeled_2017 year = {2017} } -@misc{odc-geo, - author = {Development Team, ODC-geo }, - title = {opendatacube/{odc-geo}: v0.4.8}, +@misc{odcgeo, + author = {odc-geo development team}, + title = {opendatacube/{odc-geo}}, license = {Apache 2.0}, publisher = {GitHub}, + journal = {GitHub repository}, year = 2024, url = {https://github.com/opendatacube/odc-geo}, } + +@article{murray2012continental, + title={Continental scale mapping of tidal flats across East Asia using the Landsat archive}, + author={Murray, Nicholas J and Phinn, Stuart R and Clemens, Robert S and Roelfsema, Chris M and Fuller, Richard A}, + journal={Remote Sensing}, + volume={4}, + number={11}, + pages={3417--3426}, + year={2012}, + publisher={Molecular Diversity Preservation International (MDPI)} +} + +@article{sagar2017item, + title={Extracting the intertidal extent and topography of the Australian coastline from a 28 year time series of Landsat observations}, + author={Sagar, S. and Roberts, D. and Bala, B. and Lymburner, L.}, + journal={Remote Sensing of Environment}, + volume={195}, + pages={153--169}, + year={2017}, + publisher={Elsevier} +} + +@inproceedings{carrere2022new, + title={A new barotropic tide model for global ocean: FES2022}, + author={Carrere, Loren and Lyard, Florent and Cancet, Mathilde and Allain, Damien and Dabat, Mei-Ling and Fouchet, Ergane and Sahuc, Etienne and Faugere, Yannice and Dibarboure, Gerald and Picot, Nicolas}, + booktitle={2022 Ocean Surface Topography Science Team Meeting}, + pages={43}, + year={2022} +} @article{GESLAv3, author = {Haigh, Ivan D. and Marcos, Marta and Talke, Stefan A. and Woodworth, Philip L. and Hunter, John R. and Hague, Ben S. and Arns, Arne and Bradshaw, Elizabeth and Thompson, Philip}, @@ -51,6 +93,48 @@ @article{GESLAv3 year = {2023} } +@misc{krause2021dea, + title={{Digital Earth Australia} notebooks and tools repository}, + author={Krause, C. and Dunn, B. and Bishop-Taylor, R. and Adams, C. and Burton, C. and Alger, M. and Chua, S. and Phillips, C. and Newey, V. and Kouzoubov, K. and Leith, A. and Ayers, D. and Hicks, A.}, + year={2021}, + publisher={Commonwealth of Australia (Geoscience Australia)}, + url={https://doi.org/10.26186/145234}, + howpublished={\url{https://github.com/GeoscienceAustralia/dea-notebooks/}}, + doi={10.26186/145234} +} + +@misc{deaintertidal, + title={{Digital Earth Australia Intertidal}}, + author={Bishop-Taylor, R. and Phillips, C. and Newey, V. and Sagar, S}, + year={2024}, + publisher={Commonwealth of Australia (Geoscience Australia)}, + url={https://dx.doi.org/10.26186/149403}, + doi={10.26186/149403} +} + +@article{Fitzpatrick2024, + doi = {10.21105/joss.06683}, + url = {https://doi.org/10.21105/joss.06683}, + year = {2024}, + publisher = {The Open Journal}, + volume = {9}, + number = {99}, + pages = {6683}, + author = {Sharon Fitzpatrick and Daniel Buscombe and Jonathan A. Warrick and Mark A. Lundine and Kilian Vos}, + title = {CoastSeg: an accessible and extendable hub for satellite-derived-shoreline (SDS) detection and mapping}, + journal = {Journal of Open Source Software} +} + +@article{eleveld2014estuarine, + title={Estuarine suspended particulate matter concentrations from sun-synchronous satellite remote sensing: Tidal and meteorological effects and biases}, + author={Eleveld, Marieke A and Van der Wal, Daphne and Van Kessel, Thijs}, + journal={Remote Sensing of Environment}, + volume={143}, + pages={204--215}, + year={2014}, + publisher={Elsevier} +} + @article{bishop2021mapping, title={Mapping {Australia}'s {Dynamic Coastline} at {Mean Sea Level} Using {Three Decades of Landsat Imagery}}, author={Bishop-Taylor, R. and Nanson, R. and Sagar, S. and Lymburner, L.}, @@ -85,4 +169,40 @@ @article{sagar2018composites publisher={MDPI}, doi={10.3390/rs10030480}, url={https://doi.org/10.3390/rs10030480} +} + +@article{vitousek2023future, + title={The future of coastal monitoring through satellite remote sensing}, + author={Vitousek, S. and Buscombe, D. and Vos, K. and Barnard, P. L. and Ritchie, A. C. and Warrick, J. A.}, + journal={Cambridge Prisms: Coastal Futures}, + volume={1}, + pages={e10}, + year={2023}, + publisher={Cambridge University Press}, + doi={10.1017/cft.2022.4}, + url={https://doi.org/10.1017/cft.2022.4} +} + +@article{turner2021satellite, + title={Satellite optical imagery in {Coastal Engineering}}, + author={Turner, I. L. and Harley, M. D. and Almar, R. and Bergsma, E. W. J.}, + journal={Coastal Engineering}, + volume={167}, + pages={103919}, + year={2021}, + publisher={Elsevier}, + doi={10.1016/j.coastaleng.2021.103919}, + url={https://doi.org/10.1016/j.coastaleng.2021.103919} +} + +@article{vos2019coastsat, + title={{CoastSat}: {A} {Google Earth Engine}-enabled {Python} toolkit to extract shorelines from publicly available satellite imagery}, + author={Vos, K. and Splinter, K. D. and Harley, M. D. and Simmons, J. A. and Turner, I. L.}, + journal={Environmental Modelling \& Software}, + volume={122}, + pages={104528}, + year={2019}, + doi={10.1016/j.envsoft.2019.104528}, + url={https://doi.org/10.1016/j.envsoft.2019.104528}, + publisher={Elsevier} } \ No newline at end of file diff --git a/docs/paper/paper.md b/docs/paper/paper.md index cd930ff..a18c6d7 100644 --- a/docs/paper/paper.md +++ b/docs/paper/paper.md @@ -4,20 +4,25 @@ tags: - Python - Earth observation - Tide modelling - - Parallelized - - Scalable + - Remote sensing + - Coastal + - Satellite data authors: - name: Robbi Bishop-Taylor corresponding: true orcid: 0000-0002-1533-2599 - equal-contrib: true # (This is how you can denote equal contributions between multiple authors) - affiliation: 1 - - name: Tyler Sutterley - affiliation: 2 - orcid: 0000-0002-6964-1194 + affiliation: 1 - name: Claire Phillips affiliation: 1 orcid: 0009-0003-9882-9131 + - name: Stephen Sagar + affiliation: 1 + orcid: 0000-0001-9568-9661 + - name: Vanessa Newey + affiliation: 1 + - name: Tyler Sutterley + affiliation: 2 + orcid: 0000-0002-6964-1194 affiliations: - name: Geoscience Australia, Australia index: 1 @@ -27,80 +32,87 @@ affiliations: ror: 03d17d270 date: 27 November 2024 bibliography: paper.bib - -# TODO add journal draft -# Optional fields if submitting to a AAS journal too, see this blog post: -# https://blog.joss.theoj.org/2018/12/a-new-collaboration-with-aas-publishing -# aas-doi: 10.3847/xxxxx <- update this with the DOI from AAS once you know it. -# aas-journal: Astrophysical Journal <- The name of the AAS journal. --- # Summary -Satellite Earth observation offers an unparalleled method to view and examine dynamic coastal environments over large temporal and spatial scales. The variable influence of tide in these regions provides another dimension to increase the utility of coastal Earth obseration data. `eo-tides` facilitates the attribution of tidal dimensions to satellite Earth observation data, the combination of which delivers a powerful reimagining of traditional multi-temporal Earth observation data analysis. Conventionally, satellite data dimensions consider the geographical 'where' and the temporal 'when' of data acquisition. The addition of tide height allows exploration of the 'where' in the local tide range (e.g. high or low tide) and 'when' in the tidal cycle (e.g. spring, neap, ebb or flow tides) that data was collected. This concept has been used to map the mean annual position of continental coastlines [@bishop2021mapping], generate national intertidal zone digital elevation models [@bishop2019NIDEM] and create synthetic imagery composites of coasts at low and high tide [@sagar2018composites]. +The `eo-tides` package provides powerful parallelized tools for integrating satellite Earth observation (EO) data with ocean tide modelling. The package provides a flexible Python-based API that facilitates the modelling and attribution of tide heights to a time-series of satellite images based on the spatial extent and acquisition time of each satellite observation (\autoref{fig:abstract}). + +`eo-tides` leverages advanced tide modelling functionality from the `pyTMD` tide prediction software [@pytmd], combining this fundamental tide modelling capability with EO spatial analysis tools from `odc-geo` [@odcgeo]. This allows tides to be modelled in parallel automatically using over 50 supported tide models, and returned in standardised `pandas` [@reback2020pandas; @mckinney-proc-scipy-2010] and `xarray` [@Hoyer_xarray_N-D_labeled_2017] data formats for further analysis. + +Tools from `eo-tides` are designed to be applied directly to petabytes of freely available satellite data loaded from the cloud using Open Data Cube's `odc-stac` or `datacube` packages (e.g. using [Digital Earth Australia](https://knowledge.dea.ga.gov.au/guides/setup/gis/stac/) or [Microsoft Planetary Computer's](https://planetarycomputer.microsoft.com/) SpatioTemporal Asset Catalogue). Additional functionality enables evaluating potential satellite-tide biases, and validating modelled tides using external tide gauge data — both important considerations for assessing the reliability and accuracy of coastal EO workflows. In combination, these open source tools support the efficient, scalable and robust analysis of coastal EO data for any time period or location globally. + +![An example of a typical `eo-tides` coastal EO workflow, with tide heights being modelled into every pixel in a spatio-temporal stack of satellite data (for example, from ESA's Sentinel-2 or NASA/USGS Landsat), then combined to derive insights into dynamic coastal environments.\label{fig:abstract}](figures/joss_abstract.png) # Statement of need -`eo-tides` is a powerful python based API that facilitates the modelling and attribution of global tide heights to satellite data for improved data utility and analysis in coastal and marine research. It leverages satellite data acquisition times, a wide range of global ocean tide models and a python based tide prediction software, `pyTMD` [@tyler_sutterley_2024]. `eo-tides` further adopts functionality from the `pandas` [@The_pandas_development_team_pandas-dev_pandas_Pandas], `xarray` [@Hoyer_xarray_N-D_labeled_2017] and `odc-geo` [@odc-geo] packages to deliver a suite of flexible and parallelized Earth observation (EO) based tide modelling tools that can be applied to petabytes of coastal satellite data for any time period or location globally. +Satellite remote sensing offers an unparalleled method to view and examine dynamic coastal environments over large temporal and spatial scales [@turner2021satellite; @vitousek2023future]. However, the variable and sometimes extreme influence of ocean tides in these regions can complicate analyses, making it difficult to separate the influence of changing tides from patterns of true coastal change over time [@vos2019coastsat]. This is a particularly significant challenge for continental- to global-scale coastal EO analyses, where failing to account for complex tide dynamics can lead to inaccurate or misleading insights into coastal processes observed by satellites. -## Modelling tides -At it's core, `eo_tides` upscales sophisticated ocean tide modelling from the `pyTMD` package [@tyler_sutterley_2024] to scales applicable to EO data e.g. 10m spatial pixel resolution with Sentinel-2 imagery. +Conversely, information about ocean tides can also provide unique environmental insights that can greatly enhance the utility of coastal EO data. Conventionally, satellite data dimensions consider the geographical "where" and the temporal "when" of data acquisition. The addition of tide height as a new analysis dimension allows data to be filtered, sorted and analysed with respect to tidal processes, delivering a powerful re-imagining of traditional multi-temporal EO data analysis [@sagar2017item]. For example, satellite data can be analysed to focus on specific ecologically-significant tidal stages (e.g. high, low tide, spring or neap tides) or on particular tidal processes (e.g. ebb or flow tides). -[TODO: Insert text here about the core functions of the `pyTMD` package with comment on the range of supported global tide models. Segue into how `eo-tides` enables the application of `pyTMD` modelling at EO appropriate scale] +This concept has been used to map tidally-corrected annual coastlines from Landsat satellite data at continental scale [@bishop2021mapping], generate maps of the extent and elevation of the intertidal zone [@murray2012continental; @sagar2017item; @bishop2019NIDEM], and create tidally-constrained imagery composites of the coastline at low and high tide [@sagar2018composites]. However, these approaches have been historically based on bespoke, closed-source or difficult to install tide modelling tools, limiting the reproducibility and portability of these techniques to new coastal EO applications. To support the next generation of coastal EO workflows, there is a pressing need for new open-source approaches for combining satellite data with tide modelling. -The tide modelling functionality in `eo-tides` can be used independently of EO data, e.g. for any application where you need to generate a time series of tide heights. However, it also underpins the more complex EO-related functions in the `eo-tides` package. Tide modelling functionality is further provided to support modelling of tidal phases at any location and time. This can be used to classify tides into high and low tide observations, or determine whether the tide was rising (i.e. flow tide) or falling (i.e. ebb tide) at any point in time. +The `eo-tides` package aims to address these challenges by providing a set of performant open-source Python tools for attributing satellite EO data with modelled ocean tides. This functionality is provided in five main analysis modules (`utils`, `model`, `eo`, `stats`, `validation`) which are described briefly below. -## Combining tides with satellite data -The attribution of corresponding tide heights to coastal EO data is the major focus of the `eo-tides` package. When combining tide heights with satellite data, `eo-tides` offers two approaches that differ in complexity and performance. A fast and efficient method for assigning tide heights to whole dataset timesteps is offered for small scale applications where tides are unlikely to vary across a study area. In contrast, for large scale, seamless coastal EO datasets, `eo-tides` offers an approach that models tides through both time and space, returning a tide height for every satellite pixel in every dataset timestep. +## Setting up tide models +A key barrier to utilising tide modelling in EO workflows is the complexity and difficulty of initially setting up global ocean tide models for analysis. To address this, the `eo_tides.utils` module contains useful tools for preparing tide model data files for use in `eo-tides`. This includes the `list_models` function that provides visual feedback on the tide models a user has available in their system, while highlighting the naming conventions and directory structures required by the underlying `pyTMD` tide prediction software (\autoref{fig:list}). -Using the former approach, the tide height at the geographic-centroid of the dataset is attributed to each timestep, representing the relative trend of the tide dynamics for the area of interest at that moment in time. Having tide height as a variable allows the selection and analysis of satellite data using information about tides. For example, in any area of interest, all available timesteps could be sorted by tide height, then the lowest and highest tide images in the time series are identifiable and able to be compared. +Running tide modelling using the default tide modelling data provided by external providers can be slow due to the large size of these files — especially for recent high-resolution models like FES2022 [@carrere2022new]. To improve tide modelling performance, it can be extremely useful to clip tide model files to a smaller region of interest (e.g. the extent of a country or coastal region). The `clip_models` function can be used to automatically clip all suitable NetCDF-format model data files to a user-supplied bounding box, potentially improving tide modelling performance by over an order of magnitude. -However, in reality, tides vary spatially – potentially by several metres in areas of complex tidal dynamics. This means that an individual satellite dataset can capture a range of tide conditions. The pixel-based, seamless tide height attribution approach is well suited for applications that require localised information on tides. For efficient processing, this approach first models tides into a low resolution grid surrounding each satellite image in the time series. This lower resolution data includes a buffer around the extent of the satellite data so that tides can be modelled seamlessly across analysis boundaries. Optionally, users can interpolate and re-project the low resolution tide data back into the resolution of the input satellite image, resulting in an individual tide height for every pixel in the dataset through time and space. +These tools are accompanied by comprehensive documentation explaining [how to set up several of the most commonly used global ocean tide models](https://geoscienceaustralia.github.io/eo-tides/setup/), including details on how to download or request access to model files, and how to uncompress and arrange the data on disk. -Further functionality in this approach allows users to calculate and return timesteps from the modelled tide height array to reveal important features of the input satellite time series. These include the minimum, maximum and median satellite-observed tide heights in the array which can be exploited for deeper analysis, such as coastline delineation [@bishop2021mapping]. +![An example output from `list_tides`, providing a useful summary table which clearly identifies available tide models.\label{fig:list}](figures/joss_fig_list.png) -## Calculating tide statistics and satellite biases -Complex interactions between temporal tide dynamics and the regular overpass timing of orbiting sensors like Landsat, Sentinel-1 and Sentinel-2 mean that satellites often do not observe the entire tidal cycle. Biases in satellite coverage of the tidal cycle can mean that tidal extremes (e.g. the lowest or highest tides at a location) may either never be captured by satellites, or be over-represented in the satellite EO record. Local tide dynamics can cause these biases to vary greatly both through time and spatially [@bishop2019NIDEM], making it challenging to consistently analyse and compare coastal processes consistently - particularly for large-scale (e.g. regional or global) analyses. +## Modelling tides +The `eo_tides.model` module builds upon advanced tide modelling capability provided by the `pyTMD` tide prediction software [@pytmd]. -To ensure that coastal EO analyses are not inadvertently affected by tide biases, it is important to understand and compare how well the tides observed by satellites match the full range of modelled tides at a location. Statistical capabilities in `eo-tides` compare the subset of tides observed by satellite data against the full range of tides modelled at a regular interval through time across the entire time period covered by the satellite dataset. This comparison is used to calculate several useful statistics that summarise how well your satellite data captures real-world tidal conditions. These statistics include: +[TODO Tyler: Insert brief paragraph here about the core capability of the `pyTMD` package, with general background to the science used to predict tides and the range of supported global tide models] - 1. Spread: The proportion of the full modelled astronomical tidal range that was observed by satellites. A high value indicating good coverage of the tide range. - 2. Offset high: The proportion of the highest tides not observed by satellites at any time, as a proportion of the full modelled astronomical tidal range. A high value indicates that the satellite data is biased towards never capturing high tides. - 3. Offset low: The proportion of the lowest tides not observed by satellites at any time, as a proportion of the full modelled astronomical tidal range. A high value indicates that the satellite data is biased towards never capturing low tides. +[TODO Robbi: Insert brief paragraph here about how `eo-tides` wraps `pyTMD` functionality to model tides in parallel and return data in pandas/xarray format required for EO analysis] -An interrogation of satellite tide-bias in any area of interest using `eo-tides` statistical functionality will return a report and plot (*Figure 1*), adding insightful tide-based context to your coastal EO analysis. +Tide modelling functionality in the `model_tides` function is primarily intended to support more complex EO-related capability in the `eo_tides.eo` module. However it can also be used independently of EO data, for example for any application that requires a time series of modelled tide heights. In addition to modelling tide heights, the `model_phases` function allows users to calculate tidal phases at any location and time. This can be used to classify tides into high and low tide observations, or determine whether the tide was rising (i.e. flow tide) or falling (i.e. ebb tide) at any point in time. -``` - Using tide modelling location: 122.21, -18.00 - Modelling tides with EOT20 +## Combining tides with satellite data +The `eo_tides.eo` module contains the package's core functionality, focusing on tools for attributing satellite data with modelled tide heights. +For tide attribution, `eo-tides` offers two approaches that differ in complexity and performance: `tag_tides` and `pixel_tides` (\autoref{tab:tide_stats}). - - Modelled astronomical tide range: 9.30 metres. - - Observed tide range: 6.29 metres. +The `tag_tides` function provides a fast and efficient method for small scale applications where tides are unlikely to vary across a study area. This approach allocates a single tide height to each satellite data timestep, based on the geographic-centroid of the dataset and the acquisition time of each image. Having tide height as a variable allows the selection and analysis of satellite data based on tides. For example, all available satellite observations for an area of interest could be sorted by tide height, or used to extract and compare the lowest and highest tide images in the time series. - - 68% of the modelled astronomical tide range was observed at this location. - - The highest 8% (0.77 metres) of the tide range was never observed. - - The lowest 24% (2.25 metres) of the tide range was never observed. +However, in reality tides vary spatially – potentially by many metres in areas of complex and extreme tidal dynamics. This means that an individual satellite image can capture a range of contrasting tide conditions. For larger scale coastal EO analysis, the `pixel_tides` function can be used to seamlessly model tides through both time and space, producing three-dimensional "tide height" datacube that can be integrated with satellite data. For efficient processing, `pixel_tides` `models tides into a customisable low resolution grid surrounding each satellite image in the time series. These modelled tides are then re-projected back into the original resolution of the input satellite image, returning a unique tide height for every individual satellite pixel through time (\autoref{fig:pixel}). - - Mean modelled astronomical tide height: -0.00 metres. - - Mean observed tide height: 0.69 metres. +Table: Comparison of the `tag_tides` and `pixel_tides` functions. \label{tab:tide_stats} - - The mean observed tide height was 0.69 metres higher than the mean modelled astronomical tide height. -``` -![](../assets/Sen2_tide_stats.png) -