-
Notifications
You must be signed in to change notification settings - Fork 36
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
6 changed files
with
278 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,121 @@ | ||
# %% | ||
import os | ||
from collections import defaultdict | ||
|
||
import matplotlib.pyplot as plt | ||
import numpy as np | ||
import pandas as pd | ||
from sklearn.cluster import DBSCAN | ||
|
||
# %% | ||
root_path = "local" | ||
region = "hinet" | ||
data_path = f"{root_path}/{region}/cctorch" | ||
result_path = f"{root_path}/{region}/qtm" | ||
if not os.path.exists(result_path): | ||
os.makedirs(result_path) | ||
|
||
# Load the datasets | ||
events = pd.read_csv(f"{data_path}/cctorch_events.csv") | ||
picks = pd.read_csv(f"{data_path}/cctorch_picks.csv") | ||
pairs = pd.read_csv(f"{data_path}/ccpairs/CC_002.csv") | ||
print(f"Events: {events.shape}, Picks: {picks.shape}, Pairs: {pairs.shape}") | ||
|
||
# basic filtering | ||
events = events[(events["num_picks"] > 12) & (events["adloc_score"] > 0.9)] | ||
picks = picks[picks["idx_eve"].isin(events["idx_eve"])] | ||
pairs = pairs[pairs["idx_eve1"].isin(events["idx_eve"]) & pairs["idx_eve2"].isin(events["idx_eve"])] | ||
print(f"Events: {events.shape}, Picks: {picks.shape}, Pairs: {pairs.shape}") | ||
|
||
# %% | ||
# Step 1: Calculate mean CC values, filter for CC > 0.9, and create distance matrix | ||
mean_cc = pairs.groupby(["idx_eve1", "idx_eve2"])["cc"].median().reset_index() | ||
neigh_cc = mean_cc[mean_cc["cc"] > 0.9].copy() | ||
neigh_cc["distance"] = 1 - neigh_cc["cc"] | ||
|
||
# Ensure distance matrix includes all events (even those without high CC values) | ||
all_events = np.union1d(neigh_cc["idx_eve1"], neigh_cc["idx_eve2"]) | ||
distance_matrix = pd.DataFrame(np.ones((len(all_events), len(all_events))), index=all_events, columns=all_events) | ||
|
||
# Populate the distance matrix with valid distances from neigh_cc | ||
for _, row in neigh_cc.iterrows(): | ||
distance_matrix.loc[row["idx_eve1"], row["idx_eve2"]] = row["distance"] | ||
|
||
# Symmetrize the matrix | ||
distance_matrix = np.minimum(distance_matrix, distance_matrix.T) | ||
|
||
# Set diagonal to 0 (distance of event to itself) | ||
np.fill_diagonal(distance_matrix.values, 0) | ||
|
||
# %% | ||
# Step 2: Apply DBSCAN | ||
dbscan = DBSCAN(metric="precomputed", eps=0.1, min_samples=2) | ||
clusters = dbscan.fit_predict(distance_matrix) | ||
|
||
# %% | ||
# Step 3: Map events to clusters and find neighbors | ||
cluster_dict = dict(zip(distance_matrix.index, clusters)) | ||
neighbors = defaultdict(list) | ||
|
||
for idx, cluster_id in cluster_dict.items(): | ||
if cluster_id == -1: # Ignore noise | ||
continue | ||
# Count the number of neighbors (events with CC > 0.9) | ||
subset = neigh_cc[(neigh_cc["idx_eve1"] == idx) | (neigh_cc["idx_eve2"] == idx)] | ||
num_neighbors = subset["cc"].count() | ||
neighbors[cluster_id].append((idx, num_neighbors)) | ||
|
||
# For each cluster, select the event with the largest number of neighbors | ||
selected_events = {cluster: max(event_list, key=lambda x: x[1])[0] for cluster, event_list in neighbors.items()} | ||
|
||
# %% | ||
# Step 4: Map the filtered `events` and `picks` based on the `selected_events` | ||
# We will first create a mapping of the key events to their respective clusters | ||
event_to_key_event = {} | ||
for cluster, key_event in selected_events.items(): | ||
for idx, _ in neighbors[cluster]: | ||
event_to_key_event[idx] = key_event | ||
|
||
# %% | ||
# Step 5: Filter Events by `idx_eve`, keeping the one with the largest `num_picks` | ||
# Map `idx_eve` to the key event (to map neighbors to key events) | ||
# events["mapped_idx_eve"] = events["idx_eve"].map(event_to_key_event) | ||
events["mapped_idx_eve"] = events["idx_eve"].map(lambda x: event_to_key_event.get(x, x)) | ||
|
||
# %% | ||
# Now filter events by mapped `idx_eve` (key events), keeping the one with the largest `num_picks` | ||
filtered_events = events.loc[events.groupby("mapped_idx_eve")["num_picks"].idxmax()] | ||
|
||
# Step 6: Filter Picks by `(idx_eve, idx_sta, phase_type)`, keeping the one with the largest `phase_score` | ||
# Map `idx_eve` in picks to the key event (to map neighbors to key events) | ||
# picks["mapped_idx_eve"] = picks["idx_eve"].map(event_to_key_event) | ||
picks["mapped_idx_eve"] = picks["idx_eve"].map(lambda x: event_to_key_event.get(x, x)) | ||
|
||
# Now filter picks by mapped `idx_eve`, `idx_sta`, `phase_type`, keeping the one with the largest `phase_score` | ||
filtered_picks = picks.loc[picks.groupby(["mapped_idx_eve", "idx_sta", "phase_type"])["phase_score"].idxmax()] | ||
|
||
|
||
print(f"Filtered Events: {filtered_events.shape}, Filtered Picks: {filtered_picks.shape}") | ||
|
||
# Save the results to files | ||
filtered_events.to_csv(f"{result_path}/qtm_events.csv", index=False) | ||
filtered_picks.to_csv(f"{result_path}/qtm_picks.csv", index=False) | ||
|
||
|
||
# %% | ||
plt.figure(figsize=(10, 10)) | ||
plt.scatter(events["longitude"], events["latitude"], s=1, c="blue", label="All Events") | ||
plt.scatter( | ||
filtered_events["longitude"], | ||
filtered_events["latitude"], | ||
s=1, | ||
c="red", | ||
marker="x", | ||
label="Filtered Events", | ||
) | ||
plt.legend() | ||
plt.savefig(f"{result_path}/filtered_events.png") | ||
# %% | ||
plt.figure(figsize=(10, 10)) | ||
plt.hist(events["adloc_score"], bins=100) | ||
# %% |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
# %% | ||
import argparse | ||
import json | ||
import os | ||
from datetime import datetime | ||
from glob import glob | ||
from itertools import product | ||
|
||
import numpy as np | ||
import pandas as pd | ||
import scipy | ||
import torch | ||
from tqdm import tqdm | ||
|
||
# %% | ||
root_path = "local" | ||
region = "hinet" | ||
|
||
result_path = f"{region}/qtm" | ||
if not os.path.exists(f"{root_path}/{result_path}"): | ||
os.makedirs(f"{root_path}/{result_path}") | ||
|
||
# %% | ||
with open(f"{root_path}/{region}/config.json", "r") as fp: | ||
config = json.load(fp) | ||
|
||
# %% Get mseed list | ||
# mseed_list = sorted(glob(f"{root_path}/{region}/waveforms/????/???/??/*.mseed")) | ||
# mseed_list = sorted(glob(f"{root_path}/{region}/waveforms/????-???/??/*.sac"), reverse=True) | ||
mseed_list = sorted(glob(f"{root_path}/{region}/waveforms/2024-???/??/*.sac"), reverse=True) | ||
subdir = 3 # year-jday/hour/station_id.mseed | ||
mseeds = pd.DataFrame(mseed_list, columns=["fname"]) | ||
mseeds["mseed_id"] = mseeds["fname"].apply(lambda x: "/".join(x.replace(".sac", "").split("/")[-subdir:])) | ||
mseeds["station_id"] = mseeds["fname"].apply(lambda x: x.replace(".sac", "").split("/")[-1]) | ||
# remove .E/.N/.Z or .EB/.NB/.ZB | ||
mseeds["mseed_id"] = mseeds["mseed_id"].apply(lambda x: ".".join(x.split(".")[:-1])) | ||
mseeds["station_id"] = mseeds["station_id"].apply(lambda x: "." + ".".join(x.split(".")[:-1]) + "..") | ||
mseeds["begin_time"] = mseeds["fname"].apply( | ||
lambda x: datetime.strptime( | ||
# f"{x.split('/')[-subdir]}-{x.split('/')[-subdir+1]}T{x.split('/')[-subdir+2]}", "%Y-%jT%H" | ||
f"{x.split('/')[-subdir]}T{x.split('/')[-subdir+1]}", | ||
"%Y-%jT%H", | ||
).strftime("%Y-%m-%dT%H:%M:%S.%f") | ||
) | ||
mseeds = ( | ||
mseeds.groupby("mseed_id") | ||
.agg( | ||
{ | ||
"station_id": lambda x: ",".join(x.unique()), | ||
"begin_time": lambda x: ",".join(x.unique()), | ||
"fname": lambda x: ",".join(sorted(x)), | ||
} | ||
) | ||
.reset_index() | ||
) | ||
mseeds["idx_mseed"] = np.arange(len(mseeds)) | ||
mseeds.to_csv(f"{root_path}/{region}/qtm/mseed_list.csv", index=False) | ||
with open(f"{root_path}/{region}/qtm/mseed_list.txt", "w") as fp: | ||
fp.write("\n".join(mseeds["fname"])) | ||
|
||
print(f"Number of mseed files: {len(mseeds)}") | ||
|
||
# %% | ||
# with open(f"{root_path}/{region}/qtm/event_phase_station_id.txt", "r") as fp: | ||
# event_phase_station_id = fp.read().splitlines() | ||
# picks = pd.read_csv(f"{root_path}/{region}/cctorch/cctorch_picks.csv") | ||
picks = pd.read_csv(f"{root_path}/{region}/qtm/qtm_picks.csv") | ||
picks["phase_time"] = pd.to_datetime(picks["phase_time"], format="mixed") | ||
picks["phase_time"] = picks["phase_time"].dt.tz_localize(None) | ||
picks = picks[ | ||
(picks["phase_time"] >= pd.to_datetime("2024-01-01T00:00:00")) | ||
# & (picks["phase_time"] < pd.to_datetime("2024-01-02T00:00:00")) | ||
] | ||
stations = pd.read_csv(f"{root_path}/{region}/cctorch/cctorch_stations.csv") | ||
picks = picks.merge(stations[["idx_sta", "station_id"]], on="idx_sta") | ||
print(picks.iloc[:10]) | ||
print(f"Number of picks: {len(picks)}") | ||
|
||
# %% | ||
# events = pd.read_csv(f"{root_path}/{region}/cctorch/cctorch_events.csv") | ||
events = pd.read_csv(f"{root_path}/{region}/qtm/qtm_events.csv") | ||
events["event_time"] = pd.to_datetime(events["event_time"], format="mixed") | ||
events["event_time"] = events["event_time"].dt.tz_localize(None) | ||
events = events[ | ||
(events["event_time"] >= pd.to_datetime("2024-01-01T00:00:00")) | ||
# & (events["event_time"] < pd.to_datetime("2024-01-02T00:00:00")) | ||
] | ||
print(f"Number of events: {len(events)}") | ||
|
||
# %% Generate event mseed pairs | ||
pairs = [] | ||
unique_station_ids = np.intersect1d(mseeds["station_id"].unique(), picks["station_id"].unique()) | ||
print(f"Number of unique station ids: {len(unique_station_ids)}") | ||
|
||
# %% | ||
with open(f"{root_path}/{region}/qtm/pairs.txt", "w") as fp: | ||
mseeds = mseeds.set_index("idx_mseed") | ||
picks = picks.groupby("station_id") | ||
for idx_mseed, row in tqdm(mseeds.iterrows(), total=len(mseeds), desc="Writing pairs"): | ||
station_id = row["station_id"] | ||
if station_id not in unique_station_ids: | ||
continue | ||
for idx_pick in picks.get_group(station_id)["idx_pick"].values: | ||
fp.write(f"{idx_mseed},{idx_pick}\n") | ||
|
||
## based on GPU memory | ||
batch = 16 | ||
block_size1 = 1 | ||
block_size2 = 100_000 # ~7GB | ||
|
||
# %% | ||
base_cmd = ( | ||
f"../../CCTorch/run.py --mode=TM --pair_list={root_path}/{region}/qtm/pairs.txt " | ||
f"--data_list1={root_path}/{region}/qtm/mseed_list.txt --data_format1=mseed " | ||
f"--data_list2={root_path}/{region}/cctorch/cctorch_picks.csv --data_path2={root_path}/{region}/cctorch/template.dat --data_format2=memmap " | ||
f"--config={root_path}/{region}/cctorch/config.json --batch_size={batch} --block_size1={block_size1} --block_size2={block_size2} --normalize --reduce_c --result_path={root_path}/{region}/qtm/ccpairs" | ||
) | ||
|
||
# %% | ||
num_gpu = torch.cuda.device_count() | ||
if num_gpu == 0: | ||
if os.uname().sysname == "Darwin": | ||
cmd = f"python {base_cmd} --device=mps" | ||
else: | ||
cmd = f"python {base_cmd} --device=cpu" | ||
elif num_gpu == 1: | ||
cmd = f"python {base_cmd}" | ||
else: | ||
cmd = f"torchrun --standalone --nproc_per_node {num_gpu} {base_cmd}" | ||
|
||
# %% | ||
print(cmd) | ||
os.system(cmd) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters