Skip to content

Commit

Permalink
update adloc_dtcc
Browse files Browse the repository at this point in the history
  • Loading branch information
zhuwq0 committed Sep 12, 2024
1 parent 4e3d280 commit a31c424
Show file tree
Hide file tree
Showing 16 changed files with 1,176 additions and 54 deletions.
22 changes: 13 additions & 9 deletions examples/california/run_adloc.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,13 +257,15 @@ def run_adloc(
config["min_score"] = 0.5
config["min_p_picks"] = 0 # for filtering
config["min_s_picks"] = 0 # for filtering
stations["station_term_time"] = 0.0 # no station term
stations["station_term_time"] = 0.0 # no station term
# picks, events = invert_location_iter(picks, stations, config, estimator, events_init=events_init, iter=iter)
if iter == 0:
picks, events = invert_location(picks, stations, config, estimator, events_init=events_init, iter=iter)
else:
# picks, events = invert_location(picks, stations, config, estimator, events_init=events_init, iter=iter)
picks, events = invert_location(picks[picks['mask']==1], stations, config, estimator, events_init=events_init, iter=iter)
picks, events = invert_location(
picks[picks["mask"] == 1], stations, config, estimator, events_init=events_init, iter=iter
)
# station_term = picks[picks["mask"] == 1.0].groupby("idx_sta").agg({"residual_time": "mean"}).reset_index()
station_term_time = (
picks[picks["mask"] == 1.0].groupby("idx_sta").agg({"residual_time": "mean"}).reset_index()
Expand All @@ -279,16 +281,16 @@ def run_adloc(
)
## Separate P and S station term
# station_term = (
# picks[picks["mask"] == 1.0].groupby(["idx_sta", "phase_type"]).agg({"residual_s": "mean"}).reset_index()
# picks[picks["mask"] == 1.0].groupby(["idx_sta", "phase_type"]).agg({"residual": "mean"}).reset_index()
# )
# stations["station_term_p"] = (
# stations["idx_sta"]
# .map(station_term[station_term["phase_type"] == 0].set_index("idx_sta")["residual_s"])
# .map(station_term[station_term["phase_type"] == 0].set_index("idx_sta")["residual"])
# .fillna(0)
# )
# stations["station_term_s"] = (
# stations["idx_sta"]
# .map(station_term[station_term["phase_type"] == 1].set_index("idx_sta")["residual_s"])
# .map(station_term[station_term["phase_type"] == 1].set_index("idx_sta")["residual"])
# .fillna(0)
# )

Expand Down Expand Up @@ -326,17 +328,19 @@ def run_adloc(
events.drop(["idx_eve", "x_km", "y_km", "z_km"], axis=1, inplace=True, errors="ignore")
events.sort_values(["time"], inplace=True)

picks.rename({"mask": "adloc_mask", "residual_s": "adloc_residual_s"}, axis=1, inplace=True)
picks.rename({"mask": "adloc_mask", "residual_time": "adloc_residual_time"}, axis=1, inplace=True)
if "residual_amplitude" in picks.columns:
picks.rename({"residual_amplitude": "adloc_residual_amplitude"}, axis=1, inplace=True)
picks["phase_type"] = picks["phase_type"].map({0: "P", 1: "S"})
picks.drop(["idx_eve", "idx_sta"], axis=1, inplace=True, errors="ignore")
picks.sort_values(["phase_time"], inplace=True)

# stations.drop(["idx_sta", "x_km", "y_km", "z_km"], axis=1, inplace=True, errors="ignore")
# stations.rename({"station_term": "adloc_station_term_s"}, axis=1, inplace=True)

# picks.to_csv(os.path.join(result_path, "ransac_picks_sst.csv"), index=False)
# events.to_csv(os.path.join(result_path, "ransac_events_sst.csv"), index=False)
# stations.to_csv(os.path.join(result_path, "ransac_stations_sst.csv"), index=False)
# picks.to_csv(os.path.join(result_path, "ransac_picks.csv"), index=False)
# events.to_csv(os.path.join(result_path, "ransac_events.csv"), index=False)
# stations.to_csv(os.path.join(result_path, "ransac_stations.csv"), index=False)
picks.to_csv(f"{root_path}/{adloc_picks_csv}", index=False)
events.to_csv(f"{root_path}/{adloc_events_csv}", index=False)
# stations.to_json(f"{root_path}/{result_path}/adloc_stations_{jday:03d}.json", orient="index")
Expand Down
31 changes: 31 additions & 0 deletions scripts/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,22 @@
"SCEDC"
]
},
"Lamont2024": {
"longitude0": -119.097,
"latitude0": 35.109,
"maxradius_degree": 1.0,
"minlatitude": 34.809,
"maxlatitude": 35.409,
"minlongitude": -119.397,
"maxlongitude": -118.897,
"mindepth": 0,
"maxdepth": 30,
"starttime": "2024-08-07T00:00:00",
"endtime": "2024-08-30T00:00:00",
"provider": [
"SCEDC"
]
},
"Hawaii": {
"minlatitude": 18.39,
"maxlatitude": 20.39,
Expand Down Expand Up @@ -164,6 +180,21 @@
"provider": [
"NCEDC"
]
},
"Forge": {
"starttime": "2024-04-01T00:00:00",
"endtime": "2024-05-01T00:00:00",
"minlatitude": 38.28,
"maxlatitude": 38.68,
"minlongitude": -113.06,
"maxlongitude": -112.66,
"mindepth": 0,
"maxdepth": 10,
"network": "*",
"channel": "HH*,BH*,EH*,HN*,DP*",
"provider": [
"IRIS"
]
}
}
}
6 changes: 3 additions & 3 deletions scripts/cut_templates_cc.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,13 +340,13 @@ def cut_templates(root_path, region, config):
)

# %%
stations = pd.read_csv(f"{root_path}/{data_path}/ransac_stations_sst.csv")
stations = pd.read_csv(f"{root_path}/{data_path}/ransac_stations.csv")
stations.sort_values(by=["latitude", "longitude"], inplace=True)
print(f"{len(stations) = }")
print(stations.iloc[:5])

# %%
events = pd.read_csv(f"{root_path}/{data_path}/ransac_events_sst.csv", parse_dates=["time"])
events = pd.read_csv(f"{root_path}/{data_path}/ransac_events.csv", parse_dates=["time"])
events.rename(columns={"time": "event_time"}, inplace=True)
events["event_time"] = pd.to_datetime(events["event_time"], utc=True)
reference_t0 = events["event_time"].min()
Expand Down Expand Up @@ -396,7 +396,7 @@ def cut_templates(root_path, region, config):
eikonal = init_eikonal2d(eikonal)

# %%
picks = pd.read_csv(f"{root_path}/{region}/adloc/ransac_picks_sst.csv")
picks = pd.read_csv(f"{root_path}/{region}/adloc/ransac_picks.csv")
picks = picks[picks["adloc_mask"] == 1]
picks["phase_time"] = pd.to_datetime(picks["phase_time"], utc=True)
min_phase_score = picks["phase_score"].min()
Expand Down
224 changes: 224 additions & 0 deletions scripts/generate_pairs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,224 @@
# %%
import argparse
import json
import multiprocessing as mp
import os
import pickle
from contextlib import nullcontext

import numpy as np
import pandas as pd
from args import parse_args
from pyproj import Proj
from sklearn.neighbors import NearestNeighbors
from tqdm import tqdm


# %%
def pairing_picks(event_pairs, picks, config):
picks = picks[["idx_eve", "idx_sta", "phase_type", "phase_score", "phase_time"]].copy()
merged = pd.merge(
event_pairs,
picks,
left_on="idx_eve1",
right_on="idx_eve",
)
merged = pd.merge(
merged,
picks,
left_on=["idx_eve2", "idx_sta", "phase_type"],
right_on=["idx_eve", "idx_sta", "phase_type"],
suffixes=("_1", "_2"),
)
merged = merged.rename(columns={"phase_time_1": "phase_time1", "phase_time_2": "phase_time2"})
merged["phase_score"] = (merged["phase_score_1"] + merged["phase_score_2"]) / 2.0

merged["travel_time1"] = (merged["phase_time1"] - merged["event_time1"]).dt.total_seconds()
merged["travel_time2"] = (merged["phase_time2"] - merged["event_time2"]).dt.total_seconds()
merged["phase_dtime"] = merged["travel_time1"] - merged["travel_time2"]

# filtering
# merged = merged.sort_values("phase_score", ascending=False)
merged = (
merged.groupby(["idx_eve1", "idx_eve2"], group_keys=False)
.apply(lambda x: (x.nlargest(config["MAX_OBS"], "phase_score") if len(x) > config["MIN_OBS"] else None))
.reset_index(drop=True)
)

return merged[["idx_eve1", "idx_eve2", "idx_sta", "phase_type", "phase_score", "phase_dtime"]]


# %%
if __name__ == "__main__":

args = parse_args()
root_path = args.root_path
region = args.region

data_path = f"{root_path}/{region}/adloc"
result_path = f"{root_path}/{region}/adloc_dd"
if not os.path.exists(result_path):
os.makedirs(result_path)

# %%
pick_file = os.path.join(data_path, "ransac_picks.csv")
event_file = os.path.join(data_path, "ransac_events.csv")
station_file = os.path.join(data_path, "ransac_stations.csv")

# %%
MAX_PAIR_DIST = 10 # km
MAX_NEIGHBORS = 50
MIN_NEIGHBORS = 8
MIN_OBS = 8
MAX_OBS = 100
config = {}
config["MAX_PAIR_DIST"] = MAX_PAIR_DIST
config["MAX_NEIGHBORS"] = MAX_NEIGHBORS
config["MIN_NEIGHBORS"] = MIN_NEIGHBORS
config["MIN_OBS"] = MIN_OBS
config["MAX_OBS"] = MAX_OBS
mapping_phase_type_int = {"P": 0, "S": 1}

# %%
stations = pd.read_csv(station_file)
picks = pd.read_csv(pick_file, parse_dates=["phase_time"])
events = pd.read_csv(event_file, parse_dates=["time"])

picks = picks[picks["event_index"] != -1]
# check phase_type is P/S or 0/1
if set(picks["phase_type"].unique()).issubset(set(mapping_phase_type_int.keys())): # P/S
picks["phase_type"] = picks["phase_type"].map(mapping_phase_type_int)

# %%
if "idx_eve" in events.columns:
events = events.drop("idx_eve", axis=1)
if "idx_sta" in stations.columns:
stations = stations.drop("idx_sta", axis=1)
if "idx_eve" in picks.columns:
picks = picks.drop("idx_eve", axis=1)
if "idx_sta" in picks.columns:
picks = picks.drop("idx_sta", axis=1)

# %%
# reindex in case the index does not start from 0 or is not continuous
stations = stations[stations["station_id"].isin(picks["station_id"].unique())]
events = events[events["event_index"].isin(picks["event_index"].unique())]
stations["idx_sta"] = np.arange(len(stations))
events["idx_eve"] = np.arange(len(events))

picks = picks.merge(events[["event_index", "idx_eve"]], on="event_index")
picks = picks.merge(stations[["station_id", "idx_sta"]], on="station_id")

# %%
lon0 = stations["longitude"].median()
lat0 = stations["latitude"].median()
proj = Proj(f"+proj=sterea +lon_0={lon0} +lat_0={lat0} +units=km")

stations[["x_km", "y_km"]] = stations.apply(
lambda x: pd.Series(proj(longitude=x.longitude, latitude=x.latitude)), axis=1
)
stations["depth_km"] = -stations["elevation_m"] / 1000
stations["z_km"] = stations["depth_km"]

events[["x_km", "y_km"]] = events.apply(
lambda x: pd.Series(proj(longitude=x.longitude, latitude=x.latitude)), axis=1
)
events["z_km"] = events["depth_km"]

picks = picks.merge(events[["idx_eve", "time"]], on="idx_eve")
picks["travel_time"] = (picks["phase_time"] - picks["time"]).dt.total_seconds()
picks.drop("time", axis=1, inplace=True)

# %%
picks_by_event = picks.groupby("idx_eve")

# Option 1:
neigh = NearestNeighbors(radius=MAX_PAIR_DIST, n_jobs=-1)
neigh.fit(events[["x_km", "y_km", "z_km"]].values)
pairs = set()
neigh_ind = neigh.radius_neighbors(sort_results=True)[1]
for i, neighs in enumerate(tqdm(neigh_ind, desc="Generating pairs")):
if len(neighs) < MIN_NEIGHBORS:
continue
for j in neighs[:MAX_NEIGHBORS]:
if i < j:
pairs.add((i, j))
pairs = list(pairs)
event_pairs = pd.DataFrame(list(pairs), columns=["idx_eve1", "idx_eve2"])
print(f"Number of events: {len(events)}")
print(f"Number of event pairs: {len(event_pairs)}")
event_pairs["event_time1"] = events["time"].iloc[event_pairs["idx_eve1"]].values
event_pairs["event_time2"] = events["time"].iloc[event_pairs["idx_eve2"]].values

# Option 2:
# neigh = NearestNeighbors(radius=MAX_PAIR_DIST, n_jobs=-1)
# neigh.fit(events[["x_km", "y_km", "z_km"]].values)
# pairs = set()
# neigh_ind = neigh.radius_neighbors()[1]
# for i, neighs in enumerate(tqdm(neigh_ind, desc="Generating pairs")):
# if len(neighs) < MIN_NEIGHBORS:
# continue
# neighs = neighs[np.argsort(events.loc[neighs, "num_picks"])] ## TODO: check if useful
# for j in neighs[:MAX_NEIGHBORS]:
# if i > j:
# pairs.add((j, i))
# else:
# pairs.add((i, j))
# pairs = list(pairs)

# %%
chunk_size = 10_000
num_chunk = len(event_pairs) // chunk_size
pbar = tqdm(total=num_chunk, desc="Pairing picks")
results = []
jobs = []
ctx = mp.get_context("spawn")
ncpu = min(num_chunk, min(32, mp.cpu_count()))
picks = picks.set_index("idx_eve")
with ctx.Pool(processes=ncpu) as pool:
for i in np.array_split(np.arange(len(event_pairs)), num_chunk):
event_pairs_ = event_pairs.iloc[i]
idx = np.unique(event_pairs_[["idx_eve1", "idx_eve2"]].values.flatten())
picks_ = picks.loc[idx].reset_index()
job = pool.apply_async(pairing_picks, args=(event_pairs_, picks_, config), callback=lambda x: pbar.update())
jobs.append(job)
pool.close()
pool.join()
for job in jobs:
results.append(job.get())

event_pairs = pd.concat(results, ignore_index=True)
event_pairs = event_pairs.drop_duplicates()

print(f"Number of pick pairs: {len(event_pairs)}")

dtypes = np.dtype(
[
("event_index1", np.int32),
("event_index2", np.int32),
("station_index", np.int32),
("phase_type", np.int32),
("phase_score", np.float32),
("phase_dtime", np.float32),
]
)
pairs_array = np.memmap(
os.path.join(result_path, "pair_dt.dat"),
mode="w+",
shape=(len(event_pairs),),
dtype=dtypes,
)
pairs_array["event_index1"] = event_pairs["idx_eve1"].values
pairs_array["event_index2"] = event_pairs["idx_eve2"].values
pairs_array["station_index"] = event_pairs["idx_sta"].values
pairs_array["phase_type"] = event_pairs["phase_type"].values
pairs_array["phase_score"] = event_pairs["phase_score"].values
pairs_array["phase_dtime"] = event_pairs["phase_dtime"].values
with open(os.path.join(result_path, "pair_dtypes.pkl"), "wb") as f:
pickle.dump(dtypes, f)

events.to_csv(os.path.join(result_path, "pair_events.csv"), index=False)
stations.to_csv(os.path.join(result_path, "pair_stations.csv"), index=False)
picks.to_csv(os.path.join(result_path, "pair_picks.csv"), index=False)

# %%
Loading

0 comments on commit a31c424

Please sign in to comment.