try to add ambient noise of mseed

AI4EPS · Jan 19, 2025 · fff8c91 · fff8c91
1 parent 12694c8
commit fff8c91
Show file tree

Hide file tree

Showing 7 changed files with 170 additions and 14 deletions.
diff --git a/cctorch/data.py b/cctorch/data.py
@@ -184,6 +184,7 @@ def __init__(
         if self.mode == "AN":
             self.data_list1 = pd.read_csv(data_list1)
             self.data_list2 = self.data_list1
+            self.data_format2 = self.data_format1
 
         block_num1 = int(np.ceil(len(unique_row) / block_size1))
         block_num2 = int(np.ceil(len(unique_col) / block_size2))
@@ -382,12 +383,14 @@ def sample(self, block_index):
                     else:
                         meta2 = local_dict[self.data_list2.loc[jj, "file_name"]]
 
-                if self.mode == "AN":
-                    data1.append(meta1["data"][:, :, self.data_list1.loc[ii, "channel_index"]])
-                    index1.append(self.data_list1.loc[ii, "channel_index"])
+                if (self.mode == "AN") and ("channel_index" in self.data_list1.columns):
+                    ch1 = self.data_list1.loc[ii, "channel_index"]
+                    ch2 = self.data_list2.loc[jj, "channel_index"]
+                    data1.append(meta1["data"][:, ch1 : ch1 + 1, :])  # (nc, nx, nt)
+                    index1.append(ch1)
                     info1.append({"file_name": self.data_list1.loc[ii, "file_name"]})
-                    data2.append(meta2["data"][:, :, self.data_list2.loc[jj, "channel_index"]])
-                    index2.append(self.data_list2.loc[jj, "channel_index"])
+                    data2.append(meta2["data"][:, ch2 : ch2 + 1, :])
+                    index2.append(ch2)
                     info2.append({"file_name": self.data_list2.loc[jj, "file_name"]})
                 else:
                     data1.append(meta1["data"])
@@ -485,6 +488,8 @@ def read_data(file_name, data_path, format="h5", mode="CC", config={}):
     elif mode == "AN":
         if format == "h5":
             data, info = read_das_continuous_data_h5(data_path / file_name, dataset_keys=[])
+        elif format == "mseed":
+            data, info = read_mseed(file_name, config=config)
 
     elif mode == "TM":
         if format == "mseed":
@@ -500,8 +505,8 @@ def read_data(file_name, data_path, format="h5", mode="CC", config={}):
 def read_mseed(fname, highpass_filter=False, sampling_rate=100, config=None):
     try:
         stream = obspy.Stream()
-        for tmp in fname.split("_"):
-            with fsspec.open(tmp, "rb") as fs:
+        for tmp in fname.split("|"):
+            with fsspec.open(tmp, "rb", anon=True) as fs:
                 if tmp.endswith(".sac"):
                     meta = obspy.read(fs, format="SAC")
                 else:

diff --git a/cctorch/transforms.py b/cctorch/transforms.py
@@ -142,6 +142,8 @@ def forward(self, data):
         moving_abs[moving_abs == 0.0] = 1.0
         data /= moving_abs[:, :, :nx, :nt]
 
+        data = data.squeeze(0)  # (nb, nc, nx, nt) -> (nc, nx, nt)
+
         return data
 
 

diff --git a/run.py b/run.py
@@ -11,13 +11,14 @@
 import torch
 import torch.distributed as dist
 import torchvision.transforms as T
+from sklearn.cluster import DBSCAN
+from torch.utils.data import DataLoader
+from tqdm import tqdm
+
 import utils
 from cctorch import CCDataset, CCIterableDataset, CCModel
 from cctorch.transforms import *
 from cctorch.utils import write_ambient_noise
-from sklearn.cluster import DBSCAN
-from torch.utils.data import DataLoader
-from tqdm import tqdm
 
 
 def get_args_parser(add_help=True):
@@ -35,8 +36,8 @@ def get_args_parser(add_help=True):
     parser.add_argument("--data_list2", default=None, type=str, help="data list 1")
     parser.add_argument("--data_path1", default="./", type=str, help="data path")
     parser.add_argument("--data_path2", default="./", type=str, help="data path")
-    parser.add_argument("--data_format1", default="h5", type=str, help="data type in {h5, memmap}")
-    parser.add_argument("--data_format2", default="h5", type=str, help="data type in {h5, memmap}")
+    parser.add_argument("--data_format1", default="h5", type=str, help="data type in {h5, memmap, mseed}")
+    parser.add_argument("--data_format2", default="h5", type=str, help="data type in {h5, memmap, mseed}")
     parser.add_argument("--config", default=None, type=str, help="config file")
     parser.add_argument("--result_path", default="./results", type=str, help="results path")
     parser.add_argument("--dataset_type", default="iterable", type=str, help="data loader type in {map, iterable}")

diff --git a/scripts/generate_list_an.py → scripts/generate_das_list.py b/scripts/generate_list_an.py → scripts/generate_das_list.py
diff --git a/scripts/generate_mseed_list.py b/scripts/generate_mseed_list.py
@@ -0,0 +1,95 @@
+# %%
+import fsspec
+import h5py
+import obspy
+import pandas as pd
+
+
+def map_cloud_path(root_path, provider, starttime, network, station, location, channels):
+    paths = []
+    for channel in channels.split(","):
+        if isinstance(starttime, str):
+            starttime = pd.Timestamp(starttime)
+        if provider.lower() == "scedc":
+            year = starttime.strftime("%Y")
+            dayofyear = starttime.strftime("%j")
+            if location == "":
+                location = "__"
+            path = f"{root_path}/{provider.lower()}-pds/continuous_waveforms/{year}/{year}_{dayofyear}/{network}{station:_<5}{channel}{location:_<2}_{year}{dayofyear}.ms"
+        elif provider.lower() == "ncedc":
+            year = starttime.strftime("%Y")
+            dayofyear = starttime.strftime("%j")
+            path = f"{root_path}/{provider.lower()}-pds/continuous_waveforms/{network}/{year}/{year}.{dayofyear}/{station}.{network}.{channel}.{location}.D.{year}.{dayofyear}"
+        else:
+            raise ValueError(f"Unknown provider: {provider}")
+        paths.append(path)
+
+    return paths
+
+
+# %%
+if __name__ == "__main__":
+    # %%
+    mseed_list = [
+        {
+            "provider": "ncedc",
+            "network": "NC",
+            "station": "KCT",
+            "location": "",
+            "channels": "HHE,HHN,HHZ",
+            "year": "2012",
+            "month": "01",
+            "day": "01",
+        },
+        {
+            "provider": "ncedc",
+            "network": "NC",
+            "station": "KRP",
+            "location": "",
+            "channels": "HHE,HHN,HHZ",
+            "year": "2012",
+            "month": "01",
+            "day": "01",
+        },
+        {
+            "provider": "ncedc",
+            "network": "NC",
+            "station": "KHMB",
+            "location": "",
+            "channels": "HHE,HHN,HHZ",
+            "year": "2012",
+            "month": "01",
+            "day": "01",
+        },
+    ]
+    # %%
+    file_list = []
+    root_path = "s3:/"
+    for mseed_info in mseed_list:
+        starttime = pd.Timestamp(f"{mseed_info['year']}-{mseed_info['month']}-{mseed_info['day']}T00:00:00")
+        file_path = map_cloud_path(
+            root_path,
+            mseed_info["provider"],
+            starttime,
+            mseed_info["network"],
+            mseed_info["station"],
+            mseed_info["location"],
+            mseed_info["channels"],
+        )
+        file_list.append("|".join(file_path))
+        # with fsspec.open(file_path, "rb", anon=True) as f:
+        #     stream = obspy.read(f)
+        #     stream.plot()  # %%
+
+    with open("data_list.txt", "w") as f:
+        f.write("file_name\n")
+        f.write("\n".join(file_list))
+
+    num_files = len(file_list)
+    with open("pair_list.txt", "w") as f:
+        for i in range(num_files):
+            for j in range(i + 1, num_files):
+                f.write(f"{i},{j}\n")
+
+
+# %%
diff --git a/scripts/plot_ambient_noise.py → scripts/plot_ambient_noise_das.py b/scripts/plot_ambient_noise.py → scripts/plot_ambient_noise_das.py
@@ -55,9 +55,9 @@ def get_args_parser(add_help=True):
         index = index[sorted_idx]
         data = data[sorted_idx]
 
-        np.savez(figure_path / f"result_{ch1}.npz", data=data, index=index)
+        np.savez(figure_path / f"ambient_noise_das_{ch1}.npz", data=data, index=index)
         plt.figure()
         vmax = np.std(data)
         plt.imshow(data, vmin=-vmax, vmax=vmax, aspect="auto", cmap="RdBu")
         plt.colorbar()
-        plt.savefig(figure_path / f"result_{ch1}.png", dpi=300, bbox_inches="tight")
+        plt.savefig(figure_path / f"ambient_noise_das_{ch1}.png", dpi=300, bbox_inches="tight")
diff --git a/scripts/plot_ambient_noise_mseed.py b/scripts/plot_ambient_noise_mseed.py
@@ -0,0 +1,53 @@
+# %%
+from pathlib import Path
+
+import h5py
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+from tqdm.auto import tqdm
+
+
+def get_args_parser(add_help=True):
+
+    import argparse
+
+    parser = argparse.ArgumentParser(description="Read CCTorch Results", add_help=add_help)
+    parser.add_argument("--result_path", type=str, default="results", help="path to results")
+    parser.add_argument("--figure_path", type=str, default="figures", help="path to figures")
+    return parser
+
+
+# %%
+if __name__ == "__main__":
+
+    args = get_args_parser().parse_args()
+
+    result_path = Path(args.result_path)
+    figure_path = Path(args.figure_path)
+    if not figure_path.exists():
+        figure_path.mkdir(parents=True)
+
+    h5_files = sorted(result_path.glob("*.h5"))
+    print(f"{len(h5_files)} hdf5 files found")
+
+    data = []
+    index = []
+    for h5_file in h5_files:
+        with h5py.File(h5_file, "r") as fp:
+            print(fp.keys())
+            ch1_list = fp.keys()
+            for ch1 in ch1_list:
+                ch2_list = fp[ch1].keys()
+                for ch2 in ch2_list:
+                    plt.figure()
+                    plt.plot(fp[f"{ch1}/{ch2}"]["xcorr"][0, :])
+                    plt.plot(fp[f"{ch1}/{ch2}"]["xcorr"][1, :] + 1)
+                    plt.plot(fp[f"{ch1}/{ch2}"]["xcorr"][2, :] + 2)
+                    plt.savefig(figure_path / f"ambient_noise_{ch1}_{ch2}.png", dpi=300, bbox_inches="tight")
+                # raise
+                # for ch2 in ch2_list:
+                #     data.append(fp[ch1][ch2]["xcorr"][:])
+                #     index.append(ch2)
+
+            raise