Skip to content

Commit

Permalink
run_adloc_v2.py works
Browse files Browse the repository at this point in the history
  • Loading branch information
zhuwq0 committed Oct 24, 2024
1 parent 270f1fd commit 0208470
Show file tree
Hide file tree
Showing 5 changed files with 261 additions and 130 deletions.
166 changes: 166 additions & 0 deletions scripts/merge_adloc_picks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
# %%
import json
import multiprocessing as mp
import os
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
from datetime import datetime, timedelta, timezone
from threading import Lock, Thread

import fsspec
import numpy as np
import pandas as pd
import pyproj
from obspy import read_inventory
from obspy.clients.fdsn import Client
from sklearn.cluster import DBSCAN
from tqdm import tqdm
from args import parse_args
from glob import glob
import matplotlib.pyplot as plt
from utils.plotting import plotting_ransac

# %%
if __name__ == "__main__":

args = parse_args()
root_path = args.root_path
region = args.region
iter = args.iter

data_path = f"{region}/adloc"
result_path = f"{region}/adloc"
figure_path = f"{region}/adloc/figures"
if not os.path.exists(figure_path):
os.makedirs(figure_path)

# %%
# protocol = "gs"
# token_json = f"{os.environ['HOME']}/.config/gcloud/application_default_credentials.json"
# with open(token_json, "r") as fp:
# token = json.load(fp)
# fs = fsspec.filesystem(protocol, token=token)

# %%
event_csvs = sorted(glob(f"{root_path}/{data_path}/????/????.???.events_sst_{iter}.csv"))

# %%
events = []
picks = []
stations = []
for event_csv in tqdm(event_csvs, desc="Load event csvs"):
pick_csv = event_csv.replace(f"events_sst_{iter}.csv", f"picks_sst_{iter}.csv")
station_csv = event_csv.replace(f"events_sst_{iter}.csv", f"stations_sst_{iter}.csv")

year, jday = event_csv.split("/")[-1].split(".")[:2]
events_ = pd.read_csv(event_csv, dtype=str)
picks_ = pd.read_csv(pick_csv, dtype=str)
stations_ = pd.read_csv(station_csv)
events_["year"] = year
events_["jday"] = jday
picks_["year"] = year
picks_["jday"] = jday
stations_["year"] = year
stations_["jday"] = jday
events.append(events_)
picks.append(picks_)
stations.append(stations_)

events = pd.concat(events, ignore_index=True)
picks = pd.concat(picks, ignore_index=True)
stations = pd.concat(stations, ignore_index=True)

station_terms = (
stations.groupby(["station_id"])
.apply(
lambda x: pd.Series(
{
"station_term_time_p": (
(x.station_term_time_p * x.num_pick_p).sum() / x.num_pick_p.sum()
if x.num_pick_p.sum() > 0
else 0
),
"station_term_time_s": (
(x.station_term_time_s * x.num_pick_s).sum() / x.num_pick_s.sum()
if x.num_pick_s.sum() > 0
else 0
),
"station_term_amplitude": (
(x.station_term_amplitude * x.num_pick).sum() / x.num_pick.sum() if x.num_pick.sum() > 0 else 0
),
}
)
)
.reset_index()
)
if iter > 0:
stations_prev = pd.read_csv(f"{root_path}/{result_path}/adloc_stations_sst_{iter-1}.csv")
stations_prev.set_index("station_id", inplace=True)

station_terms["station_term_time_p"] += (
station_terms["station_id"].map(stations_prev["station_term_time_p"]).fillna(0)
)
station_terms["station_term_time_s"] += (
station_terms["station_id"].map(stations_prev["station_term_time_s"]).fillna(0)
)
station_terms["station_term_amplitude"] += (
station_terms["station_id"].map(stations_prev["station_term_amplitude"]).fillna(0)
)

stations = stations.groupby(["station_id"]).first().reset_index()
stations.drop(["station_term_time_p", "station_term_time_s", "station_term_amplitude"], axis=1, inplace=True)
stations = stations.merge(station_terms, on="station_id")

events["dummy_id"] = events["year"] + "." + events["jday"] + "." + events["event_index"]
picks["dummy_id"] = picks["year"] + "." + picks["jday"] + "." + picks["event_index"]

events["event_index"] = np.arange(len(events))
picks = picks.drop("event_index", axis=1)
picks = picks.merge(events[["dummy_id", "event_index"]], on="dummy_id")

events.drop(["year", "jday", "dummy_id"], axis=1, inplace=True)
picks.drop(["year", "jday", "dummy_id"], axis=1, inplace=True)
stations.drop(["year", "jday"], axis=1, inplace=True)

events.to_csv(f"{root_path}/{result_path}/adloc_events_sst_{iter}.csv", index=False)
picks.to_csv(f"{root_path}/{result_path}/adloc_picks_sst_{iter}.csv", index=False)
stations.to_csv(f"{root_path}/{result_path}/adloc_stations_sst_{iter}.csv", index=False)

# %%

events = pd.read_csv(f"{root_path}/{result_path}/adloc_events_sst_{iter}.csv")
picks = pd.read_csv(f"{root_path}/{result_path}/adloc_picks_sst_{iter}.csv")
stations = pd.read_csv(f"{root_path}/{result_path}/adloc_stations_sst_{iter}.csv")

fig, ax = plt.subplots(3, 3, figsize=(12, 10))
ax[0, 0].scatter(events["longitude"], events["latitude"], c=events["depth_km"], s=1, cmap="viridis_r")
ax[0, 0].set_title(f"Events {len(events)}")
ax[0, 1].scatter(events["longitude"], events["depth_km"], c=events["depth_km"], s=1, cmap="viridis_r")
ax[0, 1].invert_yaxis()
ax[0, 1].set_title(f"Events depth")
ax[0, 2].scatter(events["latitude"], events["depth_km"], c=events["depth_km"], s=1, cmap="viridis_r")
ax[0, 2].invert_yaxis()
ax[0, 2].set_title(f"Events latitude")
ax[1, 0].scatter(
stations["longitude"], stations["latitude"], c=stations["station_term_time_p"], marker="^", cmap="viridis_r"
)
ax[1, 0].set_title(f"Station term time P {stations['station_term_time_p'].mean():.2f} s")
ax[1, 1].scatter(
stations["longitude"], stations["latitude"], c=stations["station_term_time_s"], marker="^", cmap="viridis_r"
)
ax[1, 1].set_title(f"Station term time S {stations['station_term_time_s'].mean():.2f} s")
ax[1, 2].scatter(
stations["longitude"], stations["latitude"], c=stations["station_term_amplitude"], marker="^", cmap="viridis_r"
)
ax[1, 2].set_title(f"Station term amplitude {stations['station_term_amplitude'].mean():.2f} m")
ax[2, 0].hist(events["adloc_residual_time"], bins=30, edgecolor="white")
ax[2, 0].set_title(f"Event residual time")
ax[2, 1].hist(events["adloc_residual_amplitude"], bins=30, edgecolor="white")
ax[2, 1].set_title(f"Event residual amplitude")
idx = picks["adloc_mask"] == 1
ax[2, 2].hist(picks.loc[idx, "adloc_residual_time"], bins=30, edgecolor="white")
ax[2, 2].set_title(f"Pick residual time")
# ax[2, 2].hist(picks["adloc_residual_amplitude"], bins=30, edgecolor="white")
# ax[2, 2].set_title(f"Pick residual amplitude")
plt.tight_layout()
plt.savefig(f"{root_path}/{figure_path}/adloc_summary_{iter}.png")
plt.close()
24 changes: 1 addition & 23 deletions scripts/merge_gamma_picks.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,28 +18,6 @@
from glob import glob


def load_data(year, jday, data_path, root_path, bucket, protocol, token):

fs = fsspec.filesystem(protocol, token=token)
adloc_events_csv = f"{data_path}/{year:04d}/adloc_events_{jday:03d}.csv"
adloc_picks_csv = f"{data_path}/{year:04d}/adloc_picks_{jday:03d}.csv"
if protocol == "file":
events = pd.read_csv(f"{root_path}/{adloc_events_csv}", parse_dates=["time"])
picks = pd.read_csv(f"{root_path}/{adloc_picks_csv}", parse_dates=["phase_time"])
else:
with fs.open(f"{bucket}/{adloc_events_csv}", "r") as fp:
events = pd.read_csv(fp, parse_dates=["time"])
with fs.open(f"{bucket}/{adloc_picks_csv}", "r") as fp:
picks = pd.read_csv(fp, parse_dates=["phase_time"])

events["year"] = year
events["jday"] = jday
picks["year"] = year
picks["jday"] = jday

return events, picks


# %%
if __name__ == "__main__":

Expand Down Expand Up @@ -83,7 +61,7 @@ def load_data(year, jday, data_path, root_path, bucket, protocol, token):

events["event_index"] = np.arange(len(events))
picks = picks.drop("event_index", axis=1)
picks = picks.merge(events[["dummy_id", "event_index"]], on="dummy_id")
picks = picks.merge(events[["dummy_id", "event_index"]], on="dummy_id", how="left")

events.drop(["year", "jday", "dummy_id"], axis=1, inplace=True)
picks.drop(["year", "jday", "dummy_id"], axis=1, inplace=True)
Expand Down
44 changes: 16 additions & 28 deletions scripts/run_adloc.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ def run_adloc(
config: Dict,
node_rank: int = 0,
num_nodes: int = 1,
picks_csv: str = None,
protocol: str = "file",
bucket: str = "",
token: Dict = None,
Expand Down Expand Up @@ -69,13 +68,6 @@ def run_adloc(
config["maxdepth"] = config["maxdepth"] if "maxdepth" in config else 60.0
config["use_amplitude"] = True

# ## Eikonal for 1D velocity model
zz = [0.0, 5.5, 16.0, 32.0]
vp = [5.5, 5.5, 6.7, 7.8]
vp_vs_ratio = 1.73
vs = [v / vp_vs_ratio for v in vp]
h = 0.3

# %%
## Automatic region; you can also specify a region
# lon0 = stations["longitude"].median()
Expand Down Expand Up @@ -119,6 +111,17 @@ def run_adloc(
# %%
config["eikonal"] = None

# ## Eikonal for 1D velocity model
zz = [0.0, 5.5, 16.0, 32.0]
vp = [5.5, 5.5, 6.7, 7.8]
vp_vs_ratio = 1.73
vs = [v / vp_vs_ratio for v in vp]
# Northern California (Gil7)
# zz = [0.0, 1.0, 3.0, 4.0, 5.0, 17.0, 25.0, 62.0]
# vp = [3.2, 3.2, 4.5, 4.8, 5.51, 6.21, 6.89, 7.83]
# vs = [1.5, 1.5, 2.4, 2.78, 3.18, 3.40, 3.98, 4.52]
h = 0.3

if os.path.exists(f"{root_path}/{region}/obspy/velocity.csv"):
velocity = pd.read_csv(f"{root_path}/{region}/obspy/velocity.csv")
zz = velocity["z_km"].values
Expand Down Expand Up @@ -153,17 +156,6 @@ def run_adloc(
(None, None), # t
)

# %%
plt.figure()
plt.scatter(stations["x_km"], stations["y_km"], c=stations["depth_km"], cmap="viridis_r", s=100, marker="^")
plt.colorbar(label="Depth (km)")
plt.xlabel("X (km)")
plt.ylabel("Y (km)")
plt.xlim(config["xlim_km"])
plt.ylim(config["ylim_km"])
plt.title("Stations")
plt.savefig(os.path.join(figure_path, "stations.png"), bbox_inches="tight", dpi=300)

# %%
mapping_phase_type_int = {"P": 0, "S": 1}
config["vel"] = {mapping_phase_type_int[k]: v for k, v in config["vel"].items()}
Expand Down Expand Up @@ -207,9 +199,8 @@ def run_adloc(
station_term_amp = (
picks[picks["mask"] == 1.0].groupby("idx_sta").agg({"residual_amplitude": "median"}).reset_index()
)
stations["station_term_amplitude"] += (
stations["idx_sta"].map(station_term_amp.set_index("idx_sta")["residual_amplitude"]).fillna(0)
)
station_term_amp.set_index("idx_sta", inplace=True)
stations["station_term_amplitude"] += stations["idx_sta"].map(station_term_amp["residual_amplitude"]).fillna(0)

## Same P and S station term
# station_term_time = picks[picks["mask"] == 1.0].groupby("idx_sta").agg({"residual_time": "mean"}).reset_index()
Expand All @@ -224,15 +215,12 @@ def run_adloc(
station_term_time = (
picks[picks["mask"] == 1.0].groupby(["idx_sta", "phase_type"]).agg({"residual_time": "mean"}).reset_index()
)
station_term_time.set_index("idx_sta", inplace=True)
stations["station_term_time_p"] += (
stations["idx_sta"]
.map(station_term_time[station_term_time["phase_type"] == 0].set_index("idx_sta")["residual_time"])
.fillna(0)
stations["idx_sta"].map(station_term_time[station_term_time["phase_type"] == 0]["residual_time"]).fillna(0)
)
stations["station_term_time_s"] += (
stations["idx_sta"]
.map(station_term_time[station_term_time["phase_type"] == 1].set_index("idx_sta")["residual_time"])
.fillna(0)
stations["idx_sta"].map(station_term_time[station_term_time["phase_type"] == 1]["residual_time"]).fillna(0)
)

plotting_ransac(stations, figure_path, config, picks, events_init, events, suffix=f"_ransac_sst_{iter}")
Expand Down
Loading

0 comments on commit 0208470

Please sign in to comment.