Skip to content

Commit 7190507

Browse files
authored
Merge pull request #159 from TGSAI/readerExample
Statically build+link example with open data access
2 parents 2031129 + c9ef91d commit 7190507

File tree

7 files changed

+1000
-0
lines changed

7 files changed

+1000
-0
lines changed
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
cmake_minimum_required(VERSION 3.24)
2+
project(SeismicReader VERSION 1.0.0 LANGUAGES CXX)
3+
4+
# Set the C++ standard to C++17
5+
set(CMAKE_CXX_STANDARD 17)
6+
set(CMAKE_CXX_STANDARD_REQUIRED ON)
7+
8+
# Specify MDIO installation directory (set by bootstrap.sh)
9+
set(MDIO_INSTALL_DIR "${CMAKE_SOURCE_DIR}/inst")
10+
11+
# Add library directories
12+
link_directories(
13+
${MDIO_INSTALL_DIR}/lib
14+
${MDIO_INSTALL_DIR}/lib/drivers
15+
)
16+
17+
# CURL is built and installed by the MDIO installer; no need for separate find_package.
18+
19+
# Define MDIO linker flags - corrected format to match SCons configuration
20+
set(MDIO_LINK_FLAGS
21+
"-Wl,-rpath,${MDIO_INSTALL_DIR}/lib,-rpath,${MDIO_INSTALL_DIR}/lib/drivers,--whole-archive,-L${MDIO_INSTALL_DIR}/lib,-L${MDIO_INSTALL_DIR}/lib/drivers,\
22+
-lnlohmann_json_schema_validator,\
23+
-ltensorstore_driver_zarr_bzip2_compressor,\
24+
-ltensorstore_driver_zarr_driver,\
25+
-ltensorstore_driver_zarr_spec,\
26+
-ltensorstore_driver_zarr_zlib_compressor,\
27+
-ltensorstore_driver_zarr_zstd_compressor,\
28+
-ltensorstore_driver_zarr_blosc_compressor,\
29+
-ltensorstore_kvstore_gcs_http,\
30+
-ltensorstore_kvstore_gcs_gcs_resource,\
31+
-ltensorstore_kvstore_gcs_validate,\
32+
-ltensorstore_kvstore_gcs_http_gcs_resource,\
33+
-ltensorstore_kvstore_s3,\
34+
-ltensorstore_kvstore_s3_aws_credentials_resource,\
35+
-ltensorstore_kvstore_s3_credentials_default_credential_provider,\
36+
-ltensorstore_kvstore_s3_credentials_environment_credential_provider,\
37+
-ltensorstore_kvstore_s3_credentials_file_credential_provider,\
38+
-ltensorstore_kvstore_s3_credentials_ec2_credential_provider,\
39+
-ltensorstore_kvstore_s3_s3_metadata,\
40+
-ltensorstore_kvstore_s3_s3_resource,\
41+
-ltensorstore_driver_json,\
42+
-ltensorstore_internal_cache_cache_pool_resource,\
43+
-ltensorstore_internal_data_copy_concurrency_resource,\
44+
-ltensorstore_kvstore_file,\
45+
-ltensorstore_internal_file_io_concurrency_resource,\
46+
-ltensorstore_internal_cache_kvs_backed_chunk_cache,\
47+
-labsl,\
48+
-lblosc,\
49+
-ltensorstore,\
50+
-lre2,\
51+
-lriegeli,\
52+
-ltinyxml2_tinyxml2,\
53+
-lcurl,\
54+
-lopenssl,\
55+
--no-whole-archive,\
56+
-lpthread,\
57+
-lm"
58+
)
59+
60+
# Debug: Print out the MDIO_LINK_FLAGS
61+
message(STATUS "MDIO_LINK_FLAGS: ${MDIO_LINK_FLAGS}")
62+
63+
# Create the executable target. (Assumes main.cpp exists in your project.)
64+
add_executable(read main.cc)
65+
66+
# Append the linker flags to the target's link flags.
67+
set_target_properties(read PROPERTIES LINK_FLAGS "${MDIO_LINK_FLAGS}")
68+
69+
# Add compile definitions
70+
target_compile_definitions(read PRIVATE HAVE_MDIO MAX_NUM_SLICES=32)
71+
72+
# Add MDIO and third-party include directories for target 'read'
73+
# Collect all immediate subdirectories from the MDIO include directory.
74+
file(GLOB CHILD_DIRS LIST_DIRECTORIES true "${MDIO_INSTALL_DIR}/include/*")
75+
76+
# Also include the top-level include directory so that headers like "mdio/mdio.h" are found.
77+
list(INSERT CHILD_DIRS 0 "${MDIO_INSTALL_DIR}/include")
78+
79+
# Remove any unwanted directories (for example, the gtest-src directory)
80+
foreach(dir ${CHILD_DIRS})
81+
get_filename_component(basename ${dir} NAME)
82+
if(basename MATCHES "gtest-src")
83+
list(REMOVE_ITEM CHILD_DIRS "${dir}")
84+
endif()
85+
endforeach()
86+
87+
# Append additional directories that the installer uses but might not be one-level deep.
88+
list(APPEND CHILD_DIRS
89+
"${MDIO_INSTALL_DIR}/include/nlohmann_json-src/include"
90+
"${MDIO_INSTALL_DIR}/include/half-src/include"
91+
)
92+
93+
target_include_directories(read PRIVATE ${CHILD_DIRS})
94+
95+
# Debug: Print out the include directories for target 'read'
96+
get_target_property(READ_INCLUDES read INCLUDE_DIRECTORIES)
97+
message(STATUS "Target 'read' include directories: ${READ_INCLUDES}")

examples/seismic_reader/README.md

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
# Seismic data reader
2+
3+
The purpose of this example is to demonstrate one method of integrating the **MDIO** C++ library into a seismic data reader into an existing project.
4+
5+
Due to the variety of target build systems and differing complexities of integrating the library with existing CMake projects, this example will also expose a way to build MDIO and its dependencies as a set of shared and linkable libraries and integrate it that way.
6+
7+
## Table of contents
8+
- [Concepts demonstrated](#concepts-demonstrated)
9+
- [Overview](#overview)
10+
- [Running](#running)
11+
- [Glossary](#glossary)
12+
13+
## Concepts demonstrated
14+
- Index-based slicing
15+
- Value-based slicing
16+
- Dimension coordinates
17+
- Coordinates
18+
- Caching
19+
20+
Chunk-aligned opreations were briefly discussed but not examined in great detail for this example.
21+
22+
## Overview
23+
24+
This example assumes that you are able to build the main branch of **MDIO**.
25+
26+
This example will perform the following steps:
27+
28+
1. Clone the installer to this directory.
29+
2. Run the installer, building and installing several archives and shared files.
30+
3. Link the installed files in the `CMakeLists.txt`.
31+
4. Demonstrate opening a Dataset with a configurable cache.
32+
5. Demonstrate acquiring the corner points for the UTM grid on the Open Poseidon dataset from S3.
33+
- Calculates the latitude and longitude coordinates for the corner points.
34+
- Provides a web link to display the surface area on a map.
35+
6. Demonstrate acquiring the inline and crossline extents.
36+
7. Demonstrate an index-based slice for chunk-aligned, tracewise processing.
37+
- Calculates basic statistics for a small section of the dataset.
38+
- Tracks the highest (peak) and lowest (trough) amplitudes and their actual inline/crossline coordinate pairs.
39+
8. Demonstrate a value-based slice for more targeted analysis.
40+
- Corolates the cdp-x and cdp-y coordinate pair for the peak and trough values.
41+
- Provides a web link to display each one's location on a map.
42+
43+
## Running
44+
45+
```bash
46+
$ ./bootstrap.sh
47+
$ cd build
48+
$ cmake ..
49+
$ make -j
50+
```
51+
52+
The bootstrap shell script will build and install the mdio-cpp library as a set of shared and static objects.
53+
54+
It will then set up the build directory if it completes without error.
55+
56+
After the program has finished building it can be run with `./read`.
57+
58+
## Glossary
59+
- Dimension coordinate: A 1-dimensional Variable that describes a dimension of the dataset.
60+
- Coordinate: A Variable that helps describe data outside of its natural (logical) domain. May be greater than 1-dimensional.
61+
- Slicing: The act of subsetting a dataset along one or more dimension coordinates. A subset of a dataset is still considered a dataset.
62+
- Index-based slicing (*isel*): Subsetting a dataset based on the logical indices of its *dimension coordinate*(s).
63+
- Value-based slicing (*sel*): Subsetting a dataset based on the values contained by its *dimension coordinate*(s).
64+
- Chunk-aligned: Slicing the data along its logical chunk boundries for efficient I/O performance.

examples/seismic_reader/bootstrap.sh

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
#!/bin/bash
2+
# bootstrap.sh
3+
#
4+
# This script bootstraps the MDIO installation.
5+
# It clones the mdio-cpp-installer repository and installs MDIO
6+
# into the current directory (in the "inst" folder).
7+
#
8+
# Instruction from the installer:
9+
# $ ./install.sh [install_directory] [mdio_tag]
10+
# (the mdio_tag is ignored)
11+
# The hidden --curl flag is also used.
12+
13+
# Define installation directory as the "inst" folder in the current directory
14+
INST_DIR="$(pwd)/inst"
15+
16+
echo "Installing MDIO in: $INST_DIR"
17+
18+
# Clone the installer repo if it does not already exist
19+
if [ ! -d "mdio-cpp-installer" ]; then
20+
echo "Cloning mdio-cpp-installer repository..."
21+
git clone https://github.com/BrianMichell/mdio-cpp-installer.git || {
22+
echo "Failed to clone repository."; exit 1;
23+
}
24+
fi
25+
26+
# Change into the installer directory
27+
cd mdio-cpp-installer || {
28+
echo "Failed to change directory into mdio-cpp-installer."; exit 1;
29+
}
30+
31+
# Make sure the installer script is executable
32+
chmod +x install.sh
33+
34+
# Run the installer: Pass the installation directory, a dummy tag,
35+
# and the hidden "--curl" flag.
36+
echo "Running MDIO installer..."
37+
./install.sh "$INST_DIR" dummy_tag --curl || {
38+
echo "MDIO installation failed."; exit 1;
39+
}
40+
41+
echo "MDIO installation completed successfully."
42+
43+
# Return to the original directory
44+
cd ..
45+
46+
mkdir build

examples/seismic_reader/main.cc

Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,173 @@
1+
// Copyright 2025 TGS
2+
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "main.hh"
16+
17+
int main() {
18+
const std::string path = "s3://tgs-opendata-poseidon/full_stack_agc.mdio";
19+
const uint64_t cache_size_bytes = 1024ULL * 1024ULL * 1024ULL * 5ULL; // Use a 5GiB cache.
20+
mdio::Future<mdio::Dataset> dsFut = OpenDataset(path, cache_size_bytes);
21+
22+
if (!dsFut.status().ok()) {
23+
std::cerr << "Failed to open dataset: " << dsFut.status().message() << std::endl;
24+
return 1;
25+
}
26+
27+
mdio::Dataset ds = dsFut.value();
28+
std::cout << "Dataset opened successfully" << std::endl;
29+
std::cout << ds << std::endl;
30+
31+
auto cdpsFut = GetUTMCoords(ds);
32+
if (!cdpsFut.status().ok()) {
33+
std::cerr << "Failed to get UTM coordinates: " << cdpsFut.status().message() << std::endl;
34+
return 1;
35+
}
36+
37+
auto cdps = cdpsFut.value();
38+
auto cdp_x = cdps.first.value();
39+
auto cdp_y = cdps.second.value();
40+
41+
auto cdp_x_extents = GetExtents<mdio::dtypes::float64_t>(cdp_x);
42+
auto cdp_y_extents = GetExtents<mdio::dtypes::float64_t>(cdp_y);
43+
44+
std::cout << "========CDP Coordinates=========" << std::endl;
45+
std::cout << "CDP X extents: " << cdp_x_extents << std::endl;
46+
std::cout << "CDP Y extents: " << cdp_y_extents << std::endl;
47+
utm::print_corners(cdp_x_extents, cdp_y_extents);
48+
std::cout << std::endl;
49+
// This displays the maximum area of the extents on a web map.
50+
// The surveys actual polygon is not computed in this example.
51+
utm::web_display(cdp_x_extents, cdp_y_extents);
52+
std::cout << std::endl;
53+
std::cout << "=================================" << std::endl << std::endl;
54+
55+
auto linesFut = GetInlineCrossline(ds);
56+
if (!linesFut.status().ok()) {
57+
std::cerr << "Failed to get inline and crossline coordinates: " << linesFut.status().message() << std::endl;
58+
return 1;
59+
}
60+
61+
auto lines = linesFut.value();
62+
auto il = lines.first.value();
63+
auto xl = lines.second.value();
64+
65+
auto il_extents = GetExtents<mdio::dtypes::uint16_t>(il);
66+
auto xl_extents = GetExtents<mdio::dtypes::uint16_t>(xl);
67+
68+
std::cout << "Inline extents: " << il_extents << std::endl;
69+
std::cout << "Crossline extents: " << xl_extents << std::endl;
70+
71+
auto statsResult = stats::CalculateVolumeStatistics<mdio::dtypes::float32_t, mdio::dtypes::uint16_t>(ds);
72+
if (!statsResult.status().ok()) {
73+
std::cerr << "Failed to get volume statistics: " << statsResult.status().message() << std::endl;
74+
return 1;
75+
}
76+
77+
auto stats = statsResult.value();
78+
std::cout << stats << std::endl;
79+
80+
// Now that we have the statistics, lets pinpoint where our peak and trough amplitudes are located on the world.
81+
mdio::ValueDescriptor<mdio::dtypes::uint16_t> il_peak = {"inline", stats.peak_amplitude_inline};
82+
mdio::ValueDescriptor<mdio::dtypes::uint16_t> xl_peak = {"crossline", stats.peak_amplitude_crossline};
83+
mdio::ValueDescriptor<mdio::dtypes::uint16_t> il_trough = {"inline", stats.trough_amplitude_inline};
84+
mdio::ValueDescriptor<mdio::dtypes::uint16_t> xl_trough = {"crossline", stats.trough_amplitude_crossline};
85+
86+
auto peakSlicedDatasetRes = ds.sel(il_peak, xl_peak);
87+
if (!peakSlicedDatasetRes.status().ok()) {
88+
std::cerr << "Failed to slice dataset: " << peakSlicedDatasetRes.status().message() << std::endl;
89+
return 1;
90+
}
91+
92+
auto peakSlicedDataset = peakSlicedDatasetRes.value();
93+
// We can print the dataset and see that the inline and crossline indices are not the same as their values.
94+
// std::cout << "Peak sliced dataset: " << peakSlicedDataset << std::endl;
95+
96+
// We can re-use the same function (and variable) to get the cdp coordinates of the peak amplitude now.
97+
cdpsFut = GetUTMCoords(peakSlicedDataset);
98+
if (!cdpsFut.status().ok()) {
99+
std::cerr << "Failed to get UTM coordinates of peak amplitude: " << cdpsFut.status().message() << std::endl;
100+
return 1;
101+
}
102+
103+
cdps = cdpsFut.value();
104+
cdp_x = cdps.first.value();
105+
cdp_y = cdps.second.value();
106+
cdp_x_extents = GetExtents<mdio::dtypes::float64_t>(cdp_x);
107+
cdp_y_extents = GetExtents<mdio::dtypes::float64_t>(cdp_y);
108+
109+
std::cout << "========Peak Amplitude=========" << std::endl;
110+
utm::print_corners(cdp_x_extents, cdp_y_extents);
111+
utm::web_display(cdp_x_extents, cdp_y_extents);
112+
std::cout << std::endl;
113+
std::cout << "=================================" << std::endl << std::endl;
114+
115+
auto troughSlicedDatasetRes = ds.sel(il_trough, xl_trough);
116+
if (!troughSlicedDatasetRes.status().ok()) {
117+
std::cerr << "Failed to slice dataset: " << troughSlicedDatasetRes.status().message() << std::endl;
118+
return 1;
119+
}
120+
121+
auto troughSlicedDataset = troughSlicedDatasetRes.value();
122+
// The trough sliced dataset should show a different inline/crossline index pair than the peak sliced dataset.
123+
// std::cout << "Trough sliced dataset: " << troughSlicedDataset << std::endl;
124+
125+
cdpsFut = GetUTMCoords(troughSlicedDataset);
126+
if (!cdpsFut.status().ok()) {
127+
std::cerr << "Failed to get UTM coordinates of trough amplitude: " << cdpsFut.status().message() << std::endl;
128+
return 1;
129+
}
130+
131+
cdps = cdpsFut.value();
132+
cdp_x = cdps.first.value();
133+
cdp_y = cdps.second.value();
134+
cdp_x_extents = GetExtents<mdio::dtypes::float64_t>(cdp_x);
135+
cdp_y_extents = GetExtents<mdio::dtypes::float64_t>(cdp_y);
136+
137+
std::cout << "========Trough Amplitude=========" << std::endl;
138+
utm::print_corners(cdp_x_extents, cdp_y_extents);
139+
std::cout << std::endl;
140+
utm::web_display(cdp_x_extents, cdp_y_extents);
141+
std::cout << std::endl;
142+
std::cout << "=================================" << std::endl << std::endl;
143+
return 0;
144+
}
145+
146+
147+
mdio::Future<mdio::Dataset> OpenDataset(const std::string& path, uint64_t cache_size_bytes) {
148+
auto cacheJson = nlohmann::json::parse(R"({"cache_pool": {"total_bytes_limit": 1073741824}})"); // 1GiB default cache size.
149+
cacheJson["cache_pool"]["total_bytes_limit"] = cache_size_bytes;
150+
auto spec = mdio::Context::Spec::FromJson(cacheJson);
151+
auto ctx = mdio::Context(spec.value());
152+
return mdio::Dataset::Open(path, mdio::constants::kOpen, ctx);
153+
}
154+
155+
mdio::Result<std::pair<mdio::Future<mdio::VariableData<mdio::dtypes::float64_t>>, mdio::Future<mdio::VariableData<mdio::dtypes::float64_t>>>> GetUTMCoords(mdio::Dataset& ds) {
156+
MDIO_ASSIGN_OR_RETURN(auto cdp_x, ds.variables.get<mdio::dtypes::float64_t>("cdp-x"));
157+
MDIO_ASSIGN_OR_RETURN(auto cdp_y, ds.variables.get<mdio::dtypes::float64_t>("cdp-y"));
158+
159+
auto cdp_x_fut = cdp_x.Read();
160+
auto cdp_y_fut = cdp_y.Read();
161+
162+
return std::make_pair(cdp_x_fut, cdp_y_fut);
163+
}
164+
165+
mdio::Result<std::pair<mdio::Future<mdio::VariableData<mdio::dtypes::uint16_t>>, mdio::Future<mdio::VariableData<mdio::dtypes::uint16_t>>>> GetInlineCrossline(mdio::Dataset& ds) {
166+
MDIO_ASSIGN_OR_RETURN(auto il, ds.variables.get<mdio::dtypes::uint16_t>("inline"));
167+
MDIO_ASSIGN_OR_RETURN(auto xl, ds.variables.get<mdio::dtypes::uint16_t>("crossline"));
168+
169+
auto il_fut = il.Read();
170+
auto xl_fut = xl.Read();
171+
172+
return std::make_pair(il_fut, xl_fut);
173+
}

0 commit comments

Comments
 (0)