Skip to content

Commit

Permalink
try to add ambient noise of mseed
Browse files Browse the repository at this point in the history
  • Loading branch information
zhuwq0 committed Jan 19, 2025
1 parent 12694c8 commit fff8c91
Show file tree
Hide file tree
Showing 7 changed files with 170 additions and 14 deletions.
19 changes: 12 additions & 7 deletions cctorch/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ def __init__(
if self.mode == "AN":
self.data_list1 = pd.read_csv(data_list1)
self.data_list2 = self.data_list1
self.data_format2 = self.data_format1

block_num1 = int(np.ceil(len(unique_row) / block_size1))
block_num2 = int(np.ceil(len(unique_col) / block_size2))
Expand Down Expand Up @@ -382,12 +383,14 @@ def sample(self, block_index):
else:
meta2 = local_dict[self.data_list2.loc[jj, "file_name"]]

if self.mode == "AN":
data1.append(meta1["data"][:, :, self.data_list1.loc[ii, "channel_index"]])
index1.append(self.data_list1.loc[ii, "channel_index"])
if (self.mode == "AN") and ("channel_index" in self.data_list1.columns):
ch1 = self.data_list1.loc[ii, "channel_index"]
ch2 = self.data_list2.loc[jj, "channel_index"]
data1.append(meta1["data"][:, ch1 : ch1 + 1, :]) # (nc, nx, nt)
index1.append(ch1)
info1.append({"file_name": self.data_list1.loc[ii, "file_name"]})
data2.append(meta2["data"][:, :, self.data_list2.loc[jj, "channel_index"]])
index2.append(self.data_list2.loc[jj, "channel_index"])
data2.append(meta2["data"][:, ch2 : ch2 + 1, :])
index2.append(ch2)
info2.append({"file_name": self.data_list2.loc[jj, "file_name"]})
else:
data1.append(meta1["data"])
Expand Down Expand Up @@ -485,6 +488,8 @@ def read_data(file_name, data_path, format="h5", mode="CC", config={}):
elif mode == "AN":
if format == "h5":
data, info = read_das_continuous_data_h5(data_path / file_name, dataset_keys=[])
elif format == "mseed":
data, info = read_mseed(file_name, config=config)

elif mode == "TM":
if format == "mseed":
Expand All @@ -500,8 +505,8 @@ def read_data(file_name, data_path, format="h5", mode="CC", config={}):
def read_mseed(fname, highpass_filter=False, sampling_rate=100, config=None):
try:
stream = obspy.Stream()
for tmp in fname.split("_"):
with fsspec.open(tmp, "rb") as fs:
for tmp in fname.split("|"):
with fsspec.open(tmp, "rb", anon=True) as fs:
if tmp.endswith(".sac"):
meta = obspy.read(fs, format="SAC")
else:
Expand Down
2 changes: 2 additions & 0 deletions cctorch/transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,8 @@ def forward(self, data):
moving_abs[moving_abs == 0.0] = 1.0
data /= moving_abs[:, :, :nx, :nt]

data = data.squeeze(0) # (nb, nc, nx, nt) -> (nc, nx, nt)

return data


Expand Down
11 changes: 6 additions & 5 deletions run.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,14 @@
import torch
import torch.distributed as dist
import torchvision.transforms as T
from sklearn.cluster import DBSCAN
from torch.utils.data import DataLoader
from tqdm import tqdm

import utils
from cctorch import CCDataset, CCIterableDataset, CCModel
from cctorch.transforms import *
from cctorch.utils import write_ambient_noise
from sklearn.cluster import DBSCAN
from torch.utils.data import DataLoader
from tqdm import tqdm


def get_args_parser(add_help=True):
Expand All @@ -35,8 +36,8 @@ def get_args_parser(add_help=True):
parser.add_argument("--data_list2", default=None, type=str, help="data list 1")
parser.add_argument("--data_path1", default="./", type=str, help="data path")
parser.add_argument("--data_path2", default="./", type=str, help="data path")
parser.add_argument("--data_format1", default="h5", type=str, help="data type in {h5, memmap}")
parser.add_argument("--data_format2", default="h5", type=str, help="data type in {h5, memmap}")
parser.add_argument("--data_format1", default="h5", type=str, help="data type in {h5, memmap, mseed}")
parser.add_argument("--data_format2", default="h5", type=str, help="data type in {h5, memmap, mseed}")
parser.add_argument("--config", default=None, type=str, help="config file")
parser.add_argument("--result_path", default="./results", type=str, help="results path")
parser.add_argument("--dataset_type", default="iterable", type=str, help="data loader type in {map, iterable}")
Expand Down
File renamed without changes.
95 changes: 95 additions & 0 deletions scripts/generate_mseed_list.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# %%
import fsspec
import h5py
import obspy
import pandas as pd


def map_cloud_path(root_path, provider, starttime, network, station, location, channels):
paths = []
for channel in channels.split(","):
if isinstance(starttime, str):
starttime = pd.Timestamp(starttime)
if provider.lower() == "scedc":
year = starttime.strftime("%Y")
dayofyear = starttime.strftime("%j")
if location == "":
location = "__"
path = f"{root_path}/{provider.lower()}-pds/continuous_waveforms/{year}/{year}_{dayofyear}/{network}{station:_<5}{channel}{location:_<2}_{year}{dayofyear}.ms"
elif provider.lower() == "ncedc":
year = starttime.strftime("%Y")
dayofyear = starttime.strftime("%j")
path = f"{root_path}/{provider.lower()}-pds/continuous_waveforms/{network}/{year}/{year}.{dayofyear}/{station}.{network}.{channel}.{location}.D.{year}.{dayofyear}"
else:
raise ValueError(f"Unknown provider: {provider}")
paths.append(path)

return paths


# %%
if __name__ == "__main__":
# %%
mseed_list = [
{
"provider": "ncedc",
"network": "NC",
"station": "KCT",
"location": "",
"channels": "HHE,HHN,HHZ",
"year": "2012",
"month": "01",
"day": "01",
},
{
"provider": "ncedc",
"network": "NC",
"station": "KRP",
"location": "",
"channels": "HHE,HHN,HHZ",
"year": "2012",
"month": "01",
"day": "01",
},
{
"provider": "ncedc",
"network": "NC",
"station": "KHMB",
"location": "",
"channels": "HHE,HHN,HHZ",
"year": "2012",
"month": "01",
"day": "01",
},
]
# %%
file_list = []
root_path = "s3:/"
for mseed_info in mseed_list:
starttime = pd.Timestamp(f"{mseed_info['year']}-{mseed_info['month']}-{mseed_info['day']}T00:00:00")
file_path = map_cloud_path(
root_path,
mseed_info["provider"],
starttime,
mseed_info["network"],
mseed_info["station"],
mseed_info["location"],
mseed_info["channels"],
)
file_list.append("|".join(file_path))
# with fsspec.open(file_path, "rb", anon=True) as f:
# stream = obspy.read(f)
# stream.plot() # %%

with open("data_list.txt", "w") as f:
f.write("file_name\n")
f.write("\n".join(file_list))

num_files = len(file_list)
with open("pair_list.txt", "w") as f:
for i in range(num_files):
for j in range(i + 1, num_files):
f.write(f"{i},{j}\n")


# %%
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,9 @@ def get_args_parser(add_help=True):
index = index[sorted_idx]
data = data[sorted_idx]

np.savez(figure_path / f"result_{ch1}.npz", data=data, index=index)
np.savez(figure_path / f"ambient_noise_das_{ch1}.npz", data=data, index=index)
plt.figure()
vmax = np.std(data)
plt.imshow(data, vmin=-vmax, vmax=vmax, aspect="auto", cmap="RdBu")
plt.colorbar()
plt.savefig(figure_path / f"result_{ch1}.png", dpi=300, bbox_inches="tight")
plt.savefig(figure_path / f"ambient_noise_das_{ch1}.png", dpi=300, bbox_inches="tight")
53 changes: 53 additions & 0 deletions scripts/plot_ambient_noise_mseed.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# %%
from pathlib import Path

import h5py
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from tqdm.auto import tqdm


def get_args_parser(add_help=True):

import argparse

parser = argparse.ArgumentParser(description="Read CCTorch Results", add_help=add_help)
parser.add_argument("--result_path", type=str, default="results", help="path to results")
parser.add_argument("--figure_path", type=str, default="figures", help="path to figures")
return parser


# %%
if __name__ == "__main__":

args = get_args_parser().parse_args()

result_path = Path(args.result_path)
figure_path = Path(args.figure_path)
if not figure_path.exists():
figure_path.mkdir(parents=True)

h5_files = sorted(result_path.glob("*.h5"))
print(f"{len(h5_files)} hdf5 files found")

data = []
index = []
for h5_file in h5_files:
with h5py.File(h5_file, "r") as fp:
print(fp.keys())
ch1_list = fp.keys()
for ch1 in ch1_list:
ch2_list = fp[ch1].keys()
for ch2 in ch2_list:
plt.figure()
plt.plot(fp[f"{ch1}/{ch2}"]["xcorr"][0, :])
plt.plot(fp[f"{ch1}/{ch2}"]["xcorr"][1, :] + 1)
plt.plot(fp[f"{ch1}/{ch2}"]["xcorr"][2, :] + 2)
plt.savefig(figure_path / f"ambient_noise_{ch1}_{ch2}.png", dpi=300, bbox_inches="tight")
# raise
# for ch2 in ch2_list:
# data.append(fp[ch1][ch2]["xcorr"][:])
# index.append(ch2)

raise

0 comments on commit fff8c91

Please sign in to comment.