diff --git a/cctorch/data.py b/cctorch/data.py index 1a0763d..acd71ce 100644 --- a/cctorch/data.py +++ b/cctorch/data.py @@ -184,6 +184,7 @@ def __init__( if self.mode == "AN": self.data_list1 = pd.read_csv(data_list1) self.data_list2 = self.data_list1 + self.data_format2 = self.data_format1 block_num1 = int(np.ceil(len(unique_row) / block_size1)) block_num2 = int(np.ceil(len(unique_col) / block_size2)) @@ -382,12 +383,14 @@ def sample(self, block_index): else: meta2 = local_dict[self.data_list2.loc[jj, "file_name"]] - if self.mode == "AN": - data1.append(meta1["data"][:, :, self.data_list1.loc[ii, "channel_index"]]) - index1.append(self.data_list1.loc[ii, "channel_index"]) + if (self.mode == "AN") and ("channel_index" in self.data_list1.columns): + ch1 = self.data_list1.loc[ii, "channel_index"] + ch2 = self.data_list2.loc[jj, "channel_index"] + data1.append(meta1["data"][:, ch1 : ch1 + 1, :]) # (nc, nx, nt) + index1.append(ch1) info1.append({"file_name": self.data_list1.loc[ii, "file_name"]}) - data2.append(meta2["data"][:, :, self.data_list2.loc[jj, "channel_index"]]) - index2.append(self.data_list2.loc[jj, "channel_index"]) + data2.append(meta2["data"][:, ch2 : ch2 + 1, :]) + index2.append(ch2) info2.append({"file_name": self.data_list2.loc[jj, "file_name"]}) else: data1.append(meta1["data"]) @@ -485,6 +488,8 @@ def read_data(file_name, data_path, format="h5", mode="CC", config={}): elif mode == "AN": if format == "h5": data, info = read_das_continuous_data_h5(data_path / file_name, dataset_keys=[]) + elif format == "mseed": + data, info = read_mseed(file_name, config=config) elif mode == "TM": if format == "mseed": @@ -500,8 +505,8 @@ def read_data(file_name, data_path, format="h5", mode="CC", config={}): def read_mseed(fname, highpass_filter=False, sampling_rate=100, config=None): try: stream = obspy.Stream() - for tmp in fname.split("_"): - with fsspec.open(tmp, "rb") as fs: + for tmp in fname.split("|"): + with fsspec.open(tmp, "rb", anon=True) as fs: if tmp.endswith(".sac"): meta = obspy.read(fs, format="SAC") else: diff --git a/cctorch/transforms.py b/cctorch/transforms.py index af05465..b902ed2 100644 --- a/cctorch/transforms.py +++ b/cctorch/transforms.py @@ -142,6 +142,8 @@ def forward(self, data): moving_abs[moving_abs == 0.0] = 1.0 data /= moving_abs[:, :, :nx, :nt] + data = data.squeeze(0) # (nb, nc, nx, nt) -> (nc, nx, nt) + return data diff --git a/run.py b/run.py index a674093..2c0feb8 100644 --- a/run.py +++ b/run.py @@ -11,13 +11,14 @@ import torch import torch.distributed as dist import torchvision.transforms as T +from sklearn.cluster import DBSCAN +from torch.utils.data import DataLoader +from tqdm import tqdm + import utils from cctorch import CCDataset, CCIterableDataset, CCModel from cctorch.transforms import * from cctorch.utils import write_ambient_noise -from sklearn.cluster import DBSCAN -from torch.utils.data import DataLoader -from tqdm import tqdm def get_args_parser(add_help=True): @@ -35,8 +36,8 @@ def get_args_parser(add_help=True): parser.add_argument("--data_list2", default=None, type=str, help="data list 1") parser.add_argument("--data_path1", default="./", type=str, help="data path") parser.add_argument("--data_path2", default="./", type=str, help="data path") - parser.add_argument("--data_format1", default="h5", type=str, help="data type in {h5, memmap}") - parser.add_argument("--data_format2", default="h5", type=str, help="data type in {h5, memmap}") + parser.add_argument("--data_format1", default="h5", type=str, help="data type in {h5, memmap, mseed}") + parser.add_argument("--data_format2", default="h5", type=str, help="data type in {h5, memmap, mseed}") parser.add_argument("--config", default=None, type=str, help="config file") parser.add_argument("--result_path", default="./results", type=str, help="results path") parser.add_argument("--dataset_type", default="iterable", type=str, help="data loader type in {map, iterable}") diff --git a/scripts/generate_list_an.py b/scripts/generate_das_list.py similarity index 100% rename from scripts/generate_list_an.py rename to scripts/generate_das_list.py diff --git a/scripts/generate_mseed_list.py b/scripts/generate_mseed_list.py new file mode 100644 index 0000000..ef9a265 --- /dev/null +++ b/scripts/generate_mseed_list.py @@ -0,0 +1,95 @@ +# %% +import fsspec +import h5py +import obspy +import pandas as pd + + +def map_cloud_path(root_path, provider, starttime, network, station, location, channels): + paths = [] + for channel in channels.split(","): + if isinstance(starttime, str): + starttime = pd.Timestamp(starttime) + if provider.lower() == "scedc": + year = starttime.strftime("%Y") + dayofyear = starttime.strftime("%j") + if location == "": + location = "__" + path = f"{root_path}/{provider.lower()}-pds/continuous_waveforms/{year}/{year}_{dayofyear}/{network}{station:_<5}{channel}{location:_<2}_{year}{dayofyear}.ms" + elif provider.lower() == "ncedc": + year = starttime.strftime("%Y") + dayofyear = starttime.strftime("%j") + path = f"{root_path}/{provider.lower()}-pds/continuous_waveforms/{network}/{year}/{year}.{dayofyear}/{station}.{network}.{channel}.{location}.D.{year}.{dayofyear}" + else: + raise ValueError(f"Unknown provider: {provider}") + paths.append(path) + + return paths + + +# %% +if __name__ == "__main__": + # %% + mseed_list = [ + { + "provider": "ncedc", + "network": "NC", + "station": "KCT", + "location": "", + "channels": "HHE,HHN,HHZ", + "year": "2012", + "month": "01", + "day": "01", + }, + { + "provider": "ncedc", + "network": "NC", + "station": "KRP", + "location": "", + "channels": "HHE,HHN,HHZ", + "year": "2012", + "month": "01", + "day": "01", + }, + { + "provider": "ncedc", + "network": "NC", + "station": "KHMB", + "location": "", + "channels": "HHE,HHN,HHZ", + "year": "2012", + "month": "01", + "day": "01", + }, + ] + # %% + file_list = [] + root_path = "s3:/" + for mseed_info in mseed_list: + starttime = pd.Timestamp(f"{mseed_info['year']}-{mseed_info['month']}-{mseed_info['day']}T00:00:00") + file_path = map_cloud_path( + root_path, + mseed_info["provider"], + starttime, + mseed_info["network"], + mseed_info["station"], + mseed_info["location"], + mseed_info["channels"], + ) + file_list.append("|".join(file_path)) + # with fsspec.open(file_path, "rb", anon=True) as f: + # stream = obspy.read(f) + # stream.plot() # %% + + with open("data_list.txt", "w") as f: + f.write("file_name\n") + f.write("\n".join(file_list)) + + num_files = len(file_list) + with open("pair_list.txt", "w") as f: + for i in range(num_files): + for j in range(i + 1, num_files): + f.write(f"{i},{j}\n") + + +# %% diff --git a/scripts/plot_ambient_noise.py b/scripts/plot_ambient_noise_das.py similarity index 90% rename from scripts/plot_ambient_noise.py rename to scripts/plot_ambient_noise_das.py index 36a3f23..498e505 100644 --- a/scripts/plot_ambient_noise.py +++ b/scripts/plot_ambient_noise_das.py @@ -55,9 +55,9 @@ def get_args_parser(add_help=True): index = index[sorted_idx] data = data[sorted_idx] - np.savez(figure_path / f"result_{ch1}.npz", data=data, index=index) + np.savez(figure_path / f"ambient_noise_das_{ch1}.npz", data=data, index=index) plt.figure() vmax = np.std(data) plt.imshow(data, vmin=-vmax, vmax=vmax, aspect="auto", cmap="RdBu") plt.colorbar() - plt.savefig(figure_path / f"result_{ch1}.png", dpi=300, bbox_inches="tight") + plt.savefig(figure_path / f"ambient_noise_das_{ch1}.png", dpi=300, bbox_inches="tight") diff --git a/scripts/plot_ambient_noise_mseed.py b/scripts/plot_ambient_noise_mseed.py new file mode 100644 index 0000000..a5a6c49 --- /dev/null +++ b/scripts/plot_ambient_noise_mseed.py @@ -0,0 +1,53 @@ +# %% +from pathlib import Path + +import h5py +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +from tqdm.auto import tqdm + + +def get_args_parser(add_help=True): + + import argparse + + parser = argparse.ArgumentParser(description="Read CCTorch Results", add_help=add_help) + parser.add_argument("--result_path", type=str, default="results", help="path to results") + parser.add_argument("--figure_path", type=str, default="figures", help="path to figures") + return parser + + +# %% +if __name__ == "__main__": + + args = get_args_parser().parse_args() + + result_path = Path(args.result_path) + figure_path = Path(args.figure_path) + if not figure_path.exists(): + figure_path.mkdir(parents=True) + + h5_files = sorted(result_path.glob("*.h5")) + print(f"{len(h5_files)} hdf5 files found") + + data = [] + index = [] + for h5_file in h5_files: + with h5py.File(h5_file, "r") as fp: + print(fp.keys()) + ch1_list = fp.keys() + for ch1 in ch1_list: + ch2_list = fp[ch1].keys() + for ch2 in ch2_list: + plt.figure() + plt.plot(fp[f"{ch1}/{ch2}"]["xcorr"][0, :]) + plt.plot(fp[f"{ch1}/{ch2}"]["xcorr"][1, :] + 1) + plt.plot(fp[f"{ch1}/{ch2}"]["xcorr"][2, :] + 2) + plt.savefig(figure_path / f"ambient_noise_{ch1}_{ch2}.png", dpi=300, bbox_inches="tight") + # raise + # for ch2 in ch2_list: + # data.append(fp[ch1][ch2]["xcorr"][:]) + # index.append(ch2) + + raise