diff --git a/examples/california/run_adloc.py b/examples/california/run_adloc.py index 2081e37..ba07b2c 100644 --- a/examples/california/run_adloc.py +++ b/examples/california/run_adloc.py @@ -257,13 +257,15 @@ def run_adloc( config["min_score"] = 0.5 config["min_p_picks"] = 0 # for filtering config["min_s_picks"] = 0 # for filtering - stations["station_term_time"] = 0.0 # no station term + stations["station_term_time"] = 0.0 # no station term # picks, events = invert_location_iter(picks, stations, config, estimator, events_init=events_init, iter=iter) if iter == 0: picks, events = invert_location(picks, stations, config, estimator, events_init=events_init, iter=iter) else: # picks, events = invert_location(picks, stations, config, estimator, events_init=events_init, iter=iter) - picks, events = invert_location(picks[picks['mask']==1], stations, config, estimator, events_init=events_init, iter=iter) + picks, events = invert_location( + picks[picks["mask"] == 1], stations, config, estimator, events_init=events_init, iter=iter + ) # station_term = picks[picks["mask"] == 1.0].groupby("idx_sta").agg({"residual_time": "mean"}).reset_index() station_term_time = ( picks[picks["mask"] == 1.0].groupby("idx_sta").agg({"residual_time": "mean"}).reset_index() @@ -279,16 +281,16 @@ def run_adloc( ) ## Separate P and S station term # station_term = ( - # picks[picks["mask"] == 1.0].groupby(["idx_sta", "phase_type"]).agg({"residual_s": "mean"}).reset_index() + # picks[picks["mask"] == 1.0].groupby(["idx_sta", "phase_type"]).agg({"residual": "mean"}).reset_index() # ) # stations["station_term_p"] = ( # stations["idx_sta"] - # .map(station_term[station_term["phase_type"] == 0].set_index("idx_sta")["residual_s"]) + # .map(station_term[station_term["phase_type"] == 0].set_index("idx_sta")["residual"]) # .fillna(0) # ) # stations["station_term_s"] = ( # stations["idx_sta"] - # .map(station_term[station_term["phase_type"] == 1].set_index("idx_sta")["residual_s"]) + # .map(station_term[station_term["phase_type"] == 1].set_index("idx_sta")["residual"]) # .fillna(0) # ) @@ -326,7 +328,9 @@ def run_adloc( events.drop(["idx_eve", "x_km", "y_km", "z_km"], axis=1, inplace=True, errors="ignore") events.sort_values(["time"], inplace=True) - picks.rename({"mask": "adloc_mask", "residual_s": "adloc_residual_s"}, axis=1, inplace=True) + picks.rename({"mask": "adloc_mask", "residual_time": "adloc_residual_time"}, axis=1, inplace=True) + if "residual_amplitude" in picks.columns: + picks.rename({"residual_amplitude": "adloc_residual_amplitude"}, axis=1, inplace=True) picks["phase_type"] = picks["phase_type"].map({0: "P", 1: "S"}) picks.drop(["idx_eve", "idx_sta"], axis=1, inplace=True, errors="ignore") picks.sort_values(["phase_time"], inplace=True) @@ -334,9 +338,9 @@ def run_adloc( # stations.drop(["idx_sta", "x_km", "y_km", "z_km"], axis=1, inplace=True, errors="ignore") # stations.rename({"station_term": "adloc_station_term_s"}, axis=1, inplace=True) - # picks.to_csv(os.path.join(result_path, "ransac_picks_sst.csv"), index=False) - # events.to_csv(os.path.join(result_path, "ransac_events_sst.csv"), index=False) - # stations.to_csv(os.path.join(result_path, "ransac_stations_sst.csv"), index=False) + # picks.to_csv(os.path.join(result_path, "ransac_picks.csv"), index=False) + # events.to_csv(os.path.join(result_path, "ransac_events.csv"), index=False) + # stations.to_csv(os.path.join(result_path, "ransac_stations.csv"), index=False) picks.to_csv(f"{root_path}/{adloc_picks_csv}", index=False) events.to_csv(f"{root_path}/{adloc_events_csv}", index=False) # stations.to_json(f"{root_path}/{result_path}/adloc_stations_{jday:03d}.json", orient="index") diff --git a/scripts/config.json b/scripts/config.json index ad0ede6..7b04ca2 100644 --- a/scripts/config.json +++ b/scripts/config.json @@ -56,6 +56,22 @@ "SCEDC" ] }, + "Lamont2024": { + "longitude0": -119.097, + "latitude0": 35.109, + "maxradius_degree": 1.0, + "minlatitude": 34.809, + "maxlatitude": 35.409, + "minlongitude": -119.397, + "maxlongitude": -118.897, + "mindepth": 0, + "maxdepth": 30, + "starttime": "2024-08-07T00:00:00", + "endtime": "2024-08-30T00:00:00", + "provider": [ + "SCEDC" + ] + }, "Hawaii": { "minlatitude": 18.39, "maxlatitude": 20.39, @@ -164,6 +180,21 @@ "provider": [ "NCEDC" ] + }, + "Forge": { + "starttime": "2024-04-01T00:00:00", + "endtime": "2024-05-01T00:00:00", + "minlatitude": 38.28, + "maxlatitude": 38.68, + "minlongitude": -113.06, + "maxlongitude": -112.66, + "mindepth": 0, + "maxdepth": 10, + "network": "*", + "channel": "HH*,BH*,EH*,HN*,DP*", + "provider": [ + "IRIS" + ] } } } \ No newline at end of file diff --git a/scripts/cut_templates_cc.py b/scripts/cut_templates_cc.py index eaa9749..08654ac 100644 --- a/scripts/cut_templates_cc.py +++ b/scripts/cut_templates_cc.py @@ -340,13 +340,13 @@ def cut_templates(root_path, region, config): ) # %% - stations = pd.read_csv(f"{root_path}/{data_path}/ransac_stations_sst.csv") + stations = pd.read_csv(f"{root_path}/{data_path}/ransac_stations.csv") stations.sort_values(by=["latitude", "longitude"], inplace=True) print(f"{len(stations) = }") print(stations.iloc[:5]) # %% - events = pd.read_csv(f"{root_path}/{data_path}/ransac_events_sst.csv", parse_dates=["time"]) + events = pd.read_csv(f"{root_path}/{data_path}/ransac_events.csv", parse_dates=["time"]) events.rename(columns={"time": "event_time"}, inplace=True) events["event_time"] = pd.to_datetime(events["event_time"], utc=True) reference_t0 = events["event_time"].min() @@ -396,7 +396,7 @@ def cut_templates(root_path, region, config): eikonal = init_eikonal2d(eikonal) # %% - picks = pd.read_csv(f"{root_path}/{region}/adloc/ransac_picks_sst.csv") + picks = pd.read_csv(f"{root_path}/{region}/adloc/ransac_picks.csv") picks = picks[picks["adloc_mask"] == 1] picks["phase_time"] = pd.to_datetime(picks["phase_time"], utc=True) min_phase_score = picks["phase_score"].min() diff --git a/scripts/generate_pairs.py b/scripts/generate_pairs.py new file mode 100644 index 0000000..b5546de --- /dev/null +++ b/scripts/generate_pairs.py @@ -0,0 +1,224 @@ +# %% +import argparse +import json +import multiprocessing as mp +import os +import pickle +from contextlib import nullcontext + +import numpy as np +import pandas as pd +from args import parse_args +from pyproj import Proj +from sklearn.neighbors import NearestNeighbors +from tqdm import tqdm + + +# %% +def pairing_picks(event_pairs, picks, config): + picks = picks[["idx_eve", "idx_sta", "phase_type", "phase_score", "phase_time"]].copy() + merged = pd.merge( + event_pairs, + picks, + left_on="idx_eve1", + right_on="idx_eve", + ) + merged = pd.merge( + merged, + picks, + left_on=["idx_eve2", "idx_sta", "phase_type"], + right_on=["idx_eve", "idx_sta", "phase_type"], + suffixes=("_1", "_2"), + ) + merged = merged.rename(columns={"phase_time_1": "phase_time1", "phase_time_2": "phase_time2"}) + merged["phase_score"] = (merged["phase_score_1"] + merged["phase_score_2"]) / 2.0 + + merged["travel_time1"] = (merged["phase_time1"] - merged["event_time1"]).dt.total_seconds() + merged["travel_time2"] = (merged["phase_time2"] - merged["event_time2"]).dt.total_seconds() + merged["phase_dtime"] = merged["travel_time1"] - merged["travel_time2"] + + # filtering + # merged = merged.sort_values("phase_score", ascending=False) + merged = ( + merged.groupby(["idx_eve1", "idx_eve2"], group_keys=False) + .apply(lambda x: (x.nlargest(config["MAX_OBS"], "phase_score") if len(x) > config["MIN_OBS"] else None)) + .reset_index(drop=True) + ) + + return merged[["idx_eve1", "idx_eve2", "idx_sta", "phase_type", "phase_score", "phase_dtime"]] + + +# %% +if __name__ == "__main__": + + args = parse_args() + root_path = args.root_path + region = args.region + + data_path = f"{root_path}/{region}/adloc" + result_path = f"{root_path}/{region}/adloc_dd" + if not os.path.exists(result_path): + os.makedirs(result_path) + + # %% + pick_file = os.path.join(data_path, "ransac_picks.csv") + event_file = os.path.join(data_path, "ransac_events.csv") + station_file = os.path.join(data_path, "ransac_stations.csv") + + # %% + MAX_PAIR_DIST = 10 # km + MAX_NEIGHBORS = 50 + MIN_NEIGHBORS = 8 + MIN_OBS = 8 + MAX_OBS = 100 + config = {} + config["MAX_PAIR_DIST"] = MAX_PAIR_DIST + config["MAX_NEIGHBORS"] = MAX_NEIGHBORS + config["MIN_NEIGHBORS"] = MIN_NEIGHBORS + config["MIN_OBS"] = MIN_OBS + config["MAX_OBS"] = MAX_OBS + mapping_phase_type_int = {"P": 0, "S": 1} + + # %% + stations = pd.read_csv(station_file) + picks = pd.read_csv(pick_file, parse_dates=["phase_time"]) + events = pd.read_csv(event_file, parse_dates=["time"]) + + picks = picks[picks["event_index"] != -1] + # check phase_type is P/S or 0/1 + if set(picks["phase_type"].unique()).issubset(set(mapping_phase_type_int.keys())): # P/S + picks["phase_type"] = picks["phase_type"].map(mapping_phase_type_int) + + # %% + if "idx_eve" in events.columns: + events = events.drop("idx_eve", axis=1) + if "idx_sta" in stations.columns: + stations = stations.drop("idx_sta", axis=1) + if "idx_eve" in picks.columns: + picks = picks.drop("idx_eve", axis=1) + if "idx_sta" in picks.columns: + picks = picks.drop("idx_sta", axis=1) + + # %% + # reindex in case the index does not start from 0 or is not continuous + stations = stations[stations["station_id"].isin(picks["station_id"].unique())] + events = events[events["event_index"].isin(picks["event_index"].unique())] + stations["idx_sta"] = np.arange(len(stations)) + events["idx_eve"] = np.arange(len(events)) + + picks = picks.merge(events[["event_index", "idx_eve"]], on="event_index") + picks = picks.merge(stations[["station_id", "idx_sta"]], on="station_id") + + # %% + lon0 = stations["longitude"].median() + lat0 = stations["latitude"].median() + proj = Proj(f"+proj=sterea +lon_0={lon0} +lat_0={lat0} +units=km") + + stations[["x_km", "y_km"]] = stations.apply( + lambda x: pd.Series(proj(longitude=x.longitude, latitude=x.latitude)), axis=1 + ) + stations["depth_km"] = -stations["elevation_m"] / 1000 + stations["z_km"] = stations["depth_km"] + + events[["x_km", "y_km"]] = events.apply( + lambda x: pd.Series(proj(longitude=x.longitude, latitude=x.latitude)), axis=1 + ) + events["z_km"] = events["depth_km"] + + picks = picks.merge(events[["idx_eve", "time"]], on="idx_eve") + picks["travel_time"] = (picks["phase_time"] - picks["time"]).dt.total_seconds() + picks.drop("time", axis=1, inplace=True) + + # %% + picks_by_event = picks.groupby("idx_eve") + + # Option 1: + neigh = NearestNeighbors(radius=MAX_PAIR_DIST, n_jobs=-1) + neigh.fit(events[["x_km", "y_km", "z_km"]].values) + pairs = set() + neigh_ind = neigh.radius_neighbors(sort_results=True)[1] + for i, neighs in enumerate(tqdm(neigh_ind, desc="Generating pairs")): + if len(neighs) < MIN_NEIGHBORS: + continue + for j in neighs[:MAX_NEIGHBORS]: + if i < j: + pairs.add((i, j)) + pairs = list(pairs) + event_pairs = pd.DataFrame(list(pairs), columns=["idx_eve1", "idx_eve2"]) + print(f"Number of events: {len(events)}") + print(f"Number of event pairs: {len(event_pairs)}") + event_pairs["event_time1"] = events["time"].iloc[event_pairs["idx_eve1"]].values + event_pairs["event_time2"] = events["time"].iloc[event_pairs["idx_eve2"]].values + + # Option 2: + # neigh = NearestNeighbors(radius=MAX_PAIR_DIST, n_jobs=-1) + # neigh.fit(events[["x_km", "y_km", "z_km"]].values) + # pairs = set() + # neigh_ind = neigh.radius_neighbors()[1] + # for i, neighs in enumerate(tqdm(neigh_ind, desc="Generating pairs")): + # if len(neighs) < MIN_NEIGHBORS: + # continue + # neighs = neighs[np.argsort(events.loc[neighs, "num_picks"])] ## TODO: check if useful + # for j in neighs[:MAX_NEIGHBORS]: + # if i > j: + # pairs.add((j, i)) + # else: + # pairs.add((i, j)) + # pairs = list(pairs) + + # %% + chunk_size = 10_000 + num_chunk = len(event_pairs) // chunk_size + pbar = tqdm(total=num_chunk, desc="Pairing picks") + results = [] + jobs = [] + ctx = mp.get_context("spawn") + ncpu = min(num_chunk, min(32, mp.cpu_count())) + picks = picks.set_index("idx_eve") + with ctx.Pool(processes=ncpu) as pool: + for i in np.array_split(np.arange(len(event_pairs)), num_chunk): + event_pairs_ = event_pairs.iloc[i] + idx = np.unique(event_pairs_[["idx_eve1", "idx_eve2"]].values.flatten()) + picks_ = picks.loc[idx].reset_index() + job = pool.apply_async(pairing_picks, args=(event_pairs_, picks_, config), callback=lambda x: pbar.update()) + jobs.append(job) + pool.close() + pool.join() + for job in jobs: + results.append(job.get()) + + event_pairs = pd.concat(results, ignore_index=True) + event_pairs = event_pairs.drop_duplicates() + + print(f"Number of pick pairs: {len(event_pairs)}") + + dtypes = np.dtype( + [ + ("event_index1", np.int32), + ("event_index2", np.int32), + ("station_index", np.int32), + ("phase_type", np.int32), + ("phase_score", np.float32), + ("phase_dtime", np.float32), + ] + ) + pairs_array = np.memmap( + os.path.join(result_path, "pair_dt.dat"), + mode="w+", + shape=(len(event_pairs),), + dtype=dtypes, + ) + pairs_array["event_index1"] = event_pairs["idx_eve1"].values + pairs_array["event_index2"] = event_pairs["idx_eve2"].values + pairs_array["station_index"] = event_pairs["idx_sta"].values + pairs_array["phase_type"] = event_pairs["phase_type"].values + pairs_array["phase_score"] = event_pairs["phase_score"].values + pairs_array["phase_dtime"] = event_pairs["phase_dtime"].values + with open(os.path.join(result_path, "pair_dtypes.pkl"), "wb") as f: + pickle.dump(dtypes, f) + + events.to_csv(os.path.join(result_path, "pair_events.csv"), index=False) + stations.to_csv(os.path.join(result_path, "pair_stations.csv"), index=False) + picks.to_csv(os.path.join(result_path, "pair_picks.csv"), index=False) + +# %% diff --git a/scripts/plot_catalog.py b/scripts/plot_catalog.py index 01d1b0c..20b40d7 100644 --- a/scripts/plot_catalog.py +++ b/scripts/plot_catalog.py @@ -24,6 +24,8 @@ def parse_args(): # %% root_path = args.root_path region = args.region +# root_path = "local" +# region = "demo" result_path = f"{region}/results" figure_path = f"{region}/figures" @@ -52,7 +54,7 @@ def parse_args(): # %% -gamma_file = f"{root_path}/{region}/results/phase_association/events.csv" +gamma_file = f"{root_path}/{region}/gamma/gamma_events_000_001.csv" gamma_exist = False if os.path.exists(gamma_file): gamma_exist = True @@ -61,13 +63,28 @@ def parse_args(): # %% -adloc_file = f"{root_path}/{region}/adloc/adloc_events.csv" +adloc_file = f"{root_path}/{region}/adloc/ransac_events.csv" adloc_exist = False if os.path.exists(adloc_file): adloc_exist = True adloc_catalog = pd.read_csv(adloc_file, parse_dates=["time"]) + # adloc_catalog["magnitude"] = 0.0 # gamma_catalog["depth_km"] = gamma_catalog["depth(m)"] / 1e3 +# %% +adloc_dt_file = f"{root_path}/{region}/adloc_dd/adloc_dt_events.csv" +adloc_dt_exist = False +if os.path.exists(adloc_dt_file): + adloc_dt_exist = True + adloc_dt_catalog = pd.read_csv(adloc_dt_file, parse_dates=["time"]) + +# %% +adloc_dtcc_file = f"{root_path}/{region}/adloc_dd/adloc_dtcc_events.csv" +adloc_dtcc_exist = False +if os.path.exists(adloc_dtcc_file): + adloc_dtcc_exist = True + adloc_dtcc_catalog = pd.read_csv(adloc_dtcc_file, parse_dates=["event_time"]) + # %% hypodd_file = f"{root_path}/{region}/hypodd/hypodd_ct.reloc" hypodd_ct_exist = False @@ -113,6 +130,10 @@ def parse_args(): catalog_ct_hypodd = catalog_ct_hypodd[catalog_ct_hypodd["DEPTH"] != "*********"] catalog_ct_hypodd["DEPTH"] = catalog_ct_hypodd["DEPTH"].astype(float) + plt.figure() + plt.scatter(catalog_ct_hypodd["LON"], catalog_ct_hypodd["LAT"], s=2) + plt.show() + # %% hypodd_file = f"{root_path}/{region}/hypodd/hypodd_cc.reloc" hypodd_cc_exist = False @@ -158,6 +179,10 @@ def parse_args(): catalog_cc_hypodd = catalog_cc_hypodd[catalog_cc_hypodd["DEPTH"] != "*********"] catalog_cc_hypodd["DEPTH"] = catalog_cc_hypodd["DEPTH"].astype(float) + plt.figure() + plt.scatter(catalog_cc_hypodd["LON"], catalog_cc_hypodd["LAT"], s=2) + plt.show() + # %% growclust_file = f"{root_path}/{region}/growclust/growclust_ct_catalog.txt" growclust_ct_exist = False @@ -300,8 +325,8 @@ def parse_args(): # %% size_factor = 2600 -fig, ax = plt.subplots(3, 2, squeeze=False, figsize=(10, 15), sharex=True, sharey=True) -for i in range(3): +fig, ax = plt.subplots(4, 2, squeeze=False, figsize=(10, 15), sharex=True, sharey=True) +for i in range(4): for j in range(2): ax[i, j].set_xlim(xlim) ax[i, j].set_ylim(ylim) @@ -350,48 +375,73 @@ def parse_args(): ax[0, 1].legend() # ax[0, 1].set_title(f"AdLoc: {len(adloc_catalog)}") -if hypodd_ct_exist and (len(catalog_ct_hypodd) > 0): +if adloc_dt_exist and (len(adloc_dt_catalog) > 0): ax[1, 0].scatter( + adloc_dt_catalog["longitude"], + adloc_dt_catalog["latitude"], + s=min(2, size_factor / len(adloc_dt_catalog)), + alpha=1.0, + linewidth=0, + label=f"AdLoc_DT: {len(adloc_dt_catalog)}", + ) + ax[1, 0].legend() + # ax[1, 0].set_title(f"AdLoc_DT: {len(adloc_dt_catalog)}") + +if adloc_dtcc_exist and (len(adloc_dtcc_catalog) > 0): + ax[1, 1].scatter( + adloc_dtcc_catalog["longitude"], + adloc_dtcc_catalog["latitude"], + s=min(2, size_factor / len(adloc_dtcc_catalog)), + alpha=1.0, + linewidth=0, + label=f"AdLoc_DTCC: {len(adloc_dtcc_catalog)}", + ) + ax[1, 1].legend() + # ax[1, 1].set_title(f"AdLoc_DTCC: {len(adloc_dtcc_catalog)}") + + +if hypodd_ct_exist and (len(catalog_ct_hypodd) > 0): + ax[2, 0].scatter( catalog_ct_hypodd["LON"], catalog_ct_hypodd["LAT"], s=min(2, size_factor / len(catalog_ct_hypodd)), alpha=1.0, linewidth=0, ) - ax[1, 0].set_title(f"HypoDD (CT): {len(catalog_ct_hypodd)}") + ax[2, 0].set_title(f"HypoDD (CT): {len(catalog_ct_hypodd)}") # ax[1, 0].set_xlim(xlim) # ax[1, 0].set_ylim(ylim) # ax[1, 0].set_aspect((ylim[1] - ylim[0]) / ((xlim[1] - xlim[0]) * np.cos(np.mean(ylim) * np.pi / 180))) if hypodd_cc_exist and (len(catalog_cc_hypodd) > 0): - ax[1, 1].scatter( + ax[2, 1].scatter( catalog_cc_hypodd["LON"], catalog_cc_hypodd["LAT"], s=min(2, size_factor / len(catalog_cc_hypodd)), alpha=1.0, linewidth=0, ) - ax[1, 1].set_title(f"HypoDD (CC): {len(catalog_cc_hypodd)}") + ax[2, 1].set_title(f"HypoDD (CC): {len(catalog_cc_hypodd)}") if growclust_ct_exist and (len(growclust_ct_catalog) > 0): - ax[2, 0].scatter( + ax[3, 0].scatter( growclust_ct_catalog["lonR"], growclust_ct_catalog["latR"], s=min(2, size_factor / len(growclust_ct_catalog)), alpha=1.0, linewidth=0, ) - ax[2, 0].set_title(f"GrowClust: {len(growclust_ct_catalog)}") + ax[3, 0].set_title(f"GrowClust: {len(growclust_ct_catalog)}") if growclust_cc_exist and (len(growclust_cc_catalog) > 0): - ax[2, 1].scatter( + ax[3, 1].scatter( growclust_cc_catalog["lonR"], growclust_cc_catalog["latR"], s=min(2, size_factor / len(growclust_cc_catalog)), alpha=1.0, linewidth=0, ) - ax[2, 1].set_title(f"GrowClust: {len(growclust_cc_catalog)}") + ax[3, 1].set_title(f"GrowClust: {len(growclust_cc_catalog)}") fig.tight_layout() plt.savefig(f"{root_path}/{figure_path}/catalogs_location.png", dpi=300) @@ -704,8 +754,7 @@ def plot3d(x, y, z, config, fig_name): config_plot3d = { "xrange": [config["minlongitude"], config["maxlongitude"]], "yrange": [config["minlatitude"], config["maxlatitude"]], - "zrange": [config["gamma"]["zmin_km"], config["gamma"]["zmax_km"]], - # "zrange": [0, 6], + "zrange": [config["mindepth"], config["maxdepth"]], } if gamma_exist and len(gamma_catalog) > 0: diff --git a/scripts/run_adloc.py b/scripts/run_adloc.py index f9a580a..217b2e5 100644 --- a/scripts/run_adloc.py +++ b/scripts/run_adloc.py @@ -65,8 +65,8 @@ def run_adloc( stations["station_id"] = stations.index stations.reset_index(drop=True, inplace=True) - config["mindepth"] = 0.0 - config["maxdepth"] = 30.0 + config["mindepth"] = config["mindepth"] if "mindepth" in config else 0.0 + config["maxdepth"] = config["maxdepth"] if "maxdepth" in config else 30.0 config["use_amplitude"] = True # ## Eikonal for 1D velocity model @@ -207,21 +207,35 @@ def run_adloc( ) ## Separate P and S station term # station_term = ( - # picks[picks["mask"] == 1.0].groupby(["idx_sta", "phase_type"]).agg({"residual_s": "mean"}).reset_index() + # picks[picks["mask"] == 1.0].groupby(["idx_sta", "phase_type"]).agg({"residual": "mean"}).reset_index() # ) # stations["station_term_p"] = ( # stations["idx_sta"] - # .map(station_term[station_term["phase_type"] == 0].set_index("idx_sta")["residual_s"]) + # .map(station_term[station_term["phase_type"] == 0].set_index("idx_sta")["residual"]) # .fillna(0) # ) # stations["station_term_s"] = ( # stations["idx_sta"] - # .map(station_term[station_term["phase_type"] == 1].set_index("idx_sta")["residual_s"]) + # .map(station_term[station_term["phase_type"] == 1].set_index("idx_sta")["residual"]) # .fillna(0) # ) plotting_ransac(stations, figure_path, config, picks, events_init, events, suffix=f"_ransac_sst_{iter}") + if "event_index" not in events.columns: + events["event_index"] = events.merge(picks[["idx_eve", "event_index"]], on="idx_eve")["event_index"] + events[["longitude", "latitude"]] = events.apply( + lambda x: pd.Series(proj(x["x_km"], x["y_km"], inverse=True)), axis=1 + ) + events["depth_km"] = events["z_km"] + + picks["adloc_mask"] = picks["mask"] + picks["adloc_residual_time"] = picks["residual_time"] + picks["adloc_residual_amplitude"] = picks["residual_amplitude"] + picks.to_csv(os.path.join(result_path, f"ransac_picks_sst_{iter}.csv"), index=False) + events.to_csv(os.path.join(result_path, f"ransac_events_sst_{iter}.csv"), index=False) + stations.to_csv(os.path.join(result_path, f"ransac_stations_sst_{iter}.csv"), index=False) + if iter == 0: MIN_SST_S = ( np.mean(np.abs(station_term_time["residual_time"])) / 10.0 @@ -232,9 +246,9 @@ def run_adloc( # break iter += 1 - plotting_ransac(stations, figure_path, config, picks, events_init, events, suffix=f"_ransac_sst") - # %% + plotting_ransac(stations, figure_path, config, picks, events_init, events, suffix=f"_ransac") + if "event_index" not in events.columns: events["event_index"] = events.merge(picks[["idx_eve", "event_index"]], on="idx_eve")["event_index"] events[["longitude", "latitude"]] = events.apply( @@ -244,17 +258,19 @@ def run_adloc( events.drop(["idx_eve", "x_km", "y_km", "z_km"], axis=1, inplace=True, errors="ignore") events.sort_values(["time"], inplace=True) - picks.rename({"mask": "adloc_mask", "residual_s": "adloc_residual_s"}, axis=1, inplace=True) + # picks.rename({"mask": "adloc_mask", "residual": "adloc_residual"}, axis=1, inplace=True) picks["phase_type"] = picks["phase_type"].map({0: "P", 1: "S"}) - picks.drop(["idx_eve", "idx_sta"], axis=1, inplace=True, errors="ignore") + picks.drop( + ["idx_eve", "idx_sta", "mask", "residual_time", "residual_amplitude"], axis=1, inplace=True, errors="ignore" + ) picks.sort_values(["phase_time"], inplace=True) stations.drop(["idx_sta", "x_km", "y_km", "z_km"], axis=1, inplace=True, errors="ignore") # stations.rename({"station_term": "adloc_station_term_s"}, axis=1, inplace=True) - picks.to_csv(os.path.join(result_path, "ransac_picks_sst.csv"), index=False) - events.to_csv(os.path.join(result_path, "ransac_events_sst.csv"), index=False) - stations.to_csv(os.path.join(result_path, "ransac_stations_sst.csv"), index=False) + picks.to_csv(os.path.join(result_path, "ransac_picks.csv"), index=False) + events.to_csv(os.path.join(result_path, "ransac_events.csv"), index=False) + stations.to_csv(os.path.join(result_path, "ransac_stations.csv"), index=False) # %% diff --git a/scripts/run_adloc_cc.py b/scripts/run_adloc_cc.py new file mode 100644 index 0000000..23df506 --- /dev/null +++ b/scripts/run_adloc_cc.py @@ -0,0 +1,324 @@ +# %% +import json +import os +import pickle +import time + +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import torch +import torch.distributed as dist +import torch.optim as optim +from adloc.adloc import TravelTimeDD +from adloc.data import PhaseDatasetDTCC +from adloc.eikonal2d import init_eikonal2d +from adloc.inversion import optimize_dd +from args import parse_args +from matplotlib import pyplot as plt +from pyproj import Proj +from torch.distributed import init_process_group +from torch.nn.parallel import DistributedDataParallel as DDP +from torch.utils.data import DataLoader +from tqdm.auto import tqdm +from utils.plotting import plotting_dd + +torch.manual_seed(0) +np.random.seed(0) + + +# %% +if __name__ == "__main__": + + # %% + args = parse_args() + root_path = args.root_path + region = args.region + data_path = f"{root_path}/{region}/cctorch" + result_path = f"{root_path}/{region}/adloc_dd" + figure_path = f"{root_path}/{region}/adloc_dd/figures" + if not os.path.exists(data_path): + os.makedirs(data_path) + if not os.path.exists(result_path): + os.makedirs(result_path) + + # %% + ddp = int(os.environ.get("RANK", -1)) != -1 # is this a ddp run? + if ddp: + init_process_group(backend="gloo") + ddp_rank = int(os.environ["RANK"]) + ddp_local_rank = int(os.environ["LOCAL_RANK"]) + ddp_world_size = int(os.environ["WORLD_SIZE"]) + master_process = ddp_rank == 0 # this process will do logging, checkpointing etc. + print(f"DDP rank {ddp_rank}, local rank {ddp_local_rank}, world size {ddp_world_size}") + else: + # vanilla, non-DDP run + ddp_rank = 0 + ddp_local_rank = 0 + ddp_world_size = 1 + master_process = True + print("Non-DDP run") + + # %% reading from the generated files + events = pd.read_csv(os.path.join(data_path, "cctorch_events.csv")) + events["time"] = pd.to_datetime(events["event_time"]) + stations = pd.read_csv(os.path.join(data_path, "cctorch_stations.csv")) + picks = pd.read_csv(os.path.join(data_path, "cctorch_picks.csv"), parse_dates=["phase_time"]) + pairs = pd.read_csv(os.path.join(data_path, "ccpairs", "CC_000_002.csv")) + + with open(f"{root_path}/{region}/config.json", "r") as fp: + config = json.load(fp) + print(json.dumps(config, indent=4)) + config["use_amplitude"] = True + + # ## Eikonal for 1D velocity model + zz = [0.0, 5.5, 16.0, 32.0] + vp = [5.5, 5.5, 6.7, 7.8] + vp_vs_ratio = 1.73 + vs = [v / vp_vs_ratio for v in vp] + h = 0.3 + + # %% + if not os.path.exists(result_path): + os.makedirs(result_path) + if not os.path.exists(figure_path): + os.makedirs(figure_path) + + # %% + ## Automatic region; you can also specify a region + # lon0 = stations["longitude"].median() + # lat0 = stations["latitude"].median() + lat0 = (config["minlatitude"] + config["maxlatitude"]) / 2 + lon0 = (config["minlongitude"] + config["maxlongitude"]) / 2 + proj = Proj(f"+proj=sterea +lon_0={lon0} +lat_0={lat0} +units=km") + + stations["x_km"], stations["y_km"] = proj(stations["longitude"], stations["latitude"]) + stations["z_km"] = stations["depth_km"] + events["time"] = pd.to_datetime(events["time"]) + events["x_km"], events["y_km"] = proj(events["longitude"], events["latitude"]) + events["z_km"] = events["depth_km"] + + events_init = events.copy() + + ## set up the config; you can also specify the region manually + if ("xlim_km" not in config) or ("ylim_km" not in config) or ("zlim_km" not in config): + xmin, ymin = proj(config["minlongitude"], config["minlatitude"]) + xmax, ymax = proj(config["maxlongitude"], config["maxlatitude"]) + zmin, zmax = config["mindepth"], config["maxdepth"] + config["xlim_km"] = (xmin, xmax) + config["ylim_km"] = (ymin, ymax) + config["zlim_km"] = (zmin, zmax) + + mapping_phase_type_int = {"P": 0, "S": 1} + config["vel"] = {"P": 6.0, "S": 6.0 / 1.73} + config["vel"] = {mapping_phase_type_int[k]: v for k, v in config["vel"].items()} + + # %% + config["eikonal"] = None + ## Eikonal for 1D velocity model + # zz = [0.0, 5.5, 16.0, 32.0] + # vp = [5.5, 5.5, 6.7, 7.8] + # vp_vs_ratio = 1.73 + # vs = [v / vp_vs_ratio for v in vp] + # zz = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 30.0] + # vp = [4.746, 4.793, 4.799, 5.045, 5.721, 5.879, 6.504, 6.708, 6.725, 7.800] + # vs = [2.469, 2.470, 2.929, 2.930, 3.402, 3.403, 3.848, 3.907, 3.963, 4.500] + # h = 0.3 + vel = {"Z": zz, "P": vp, "S": vs} + config["eikonal"] = { + "vel": vel, + "h": h, + "xlim_km": config["xlim_km"], + "ylim_km": config["ylim_km"], + "zlim_km": config["zlim_km"], + } + config["eikonal"] = init_eikonal2d(config["eikonal"]) + + # %% + config["bfgs_bounds"] = ( + (config["xlim_km"][0] - 1, config["xlim_km"][1] + 1), # x + (config["ylim_km"][0] - 1, config["ylim_km"][1] + 1), # y + (0, config["zlim_km"][1] + 1), + (None, None), # t + ) + + # %% + phase_dataset = PhaseDatasetDTCC(pairs, picks, events, stations, rank=ddp_local_rank, world_size=ddp_world_size) + data_loader = DataLoader(phase_dataset, batch_size=None, shuffle=False, num_workers=0, drop_last=False) + + # %% + num_event = len(events) + num_station = len(stations) + station_loc = stations[["x_km", "y_km", "z_km"]].values + event_loc = events[["x_km", "y_km", "z_km"]].values # + np.random.randn(num_event, 3) * 2.0 + travel_time = TravelTimeDD( + num_event, + num_station, + station_loc=station_loc, + event_loc=event_loc, + # event_time=event_time, + eikonal=config["eikonal"], + ) + if ddp: + travel_time = DDP(travel_time) + raw_travel_time = travel_time.module if ddp else travel_time + + if ddp_local_rank == 0: + print(f"Dataset: {len(events)} events, {len(stations)} stations, {len(data_loader)} batches") + + ## invert loss + ###################################################################################################### + optimizer = optim.Adam(params=travel_time.parameters(), lr=0.1) + valid_index = np.ones(len(pairs), dtype=bool) + EPOCHS = 100 + for epoch in range(EPOCHS): + loss = 0 + optimizer.zero_grad() + # for meta in tqdm(phase_dataset, desc=f"Epoch {i}"): + for meta in data_loader: + out = travel_time( + meta["idx_sta"], + meta["idx_eve"], + meta["phase_type"], + meta["phase_time"], + meta["phase_weight"], + ) + pred_, loss_ = out["phase_time"], out["loss"] + + loss_.backward() + + if ddp: + dist.all_reduce(loss_, op=dist.ReduceOp.SUM) + # loss_ /= ddp_world_size + + loss += loss_ + + # torch.nn.utils.clip_grad_norm_(travel_time.parameters(), 1.0) + optimizer.step() + with torch.no_grad(): + raw_travel_time.event_loc.weight.data[:, 2].clamp_( + min=config["zlim_km"][0] + 0.1, max=config["zlim_km"][1] - 0.1 + ) + raw_travel_time.event_loc.weight.data[torch.isnan(raw_travel_time.event_loc.weight)] = 0.0 + if ddp_local_rank == 0: + print(f"Epoch {epoch}: loss {loss:.6e} of {np.sum(valid_index)} picks, {loss / np.sum(valid_index):.6e}") + + ### filtering + pred_time = [] + phase_dataset.valid_index = np.ones(len(pairs), dtype=bool) + for meta in phase_dataset: + meta = travel_time( + meta["idx_sta"], + meta["idx_eve"], + meta["phase_type"], + meta["phase_time"], + meta["phase_weight"], + ) + pred_time.append(meta["phase_time"].detach().numpy()) + + pred_time = np.concatenate(pred_time) + valid_index = ( + np.abs(pred_time - pairs["dt"]) < np.std((pred_time - pairs["dt"])[valid_index]) * 3.0 + ) # * (np.cos(epoch * np.pi / EPOCHS) + 2.0) # 3std -> 1std + + pairs_df = pd.DataFrame( + { + "event_index1": pairs["idx_eve1"], + "event_index2": pairs["idx_eve2"], + "station_index": pairs["idx_sta"], + } + ) + pairs_df = pairs_df[valid_index] + config["MIN_OBS"] = 8 + pairs_df = pairs_df.groupby(["event_index1", "event_index2"], as_index=False, group_keys=False).filter( + lambda x: len(x) >= config["MIN_OBS"] + ) + valid_index = np.zeros(len(pairs), dtype=bool) + valid_index[pairs_df.index] = True + + phase_dataset.valid_index = valid_index + + invert_event_loc = raw_travel_time.event_loc.weight.clone().detach().numpy() + invert_event_time = raw_travel_time.event_time.weight.clone().detach().numpy() + valid_event_index = np.unique(pairs["idx_eve1"][valid_index]) + valid_event_index = np.concatenate( + [np.unique(pairs["idx_eve1"][valid_index]), np.unique(pairs["idx_eve2"][valid_index])] + ) + valid_event_index = np.sort(np.unique(valid_event_index)) + + if ddp_local_rank == 0 and (epoch % 10 == 0): + events = events_init.copy() + events["time"] = events["time"] + pd.to_timedelta(np.squeeze(invert_event_time), unit="s") + events["x_km"] = invert_event_loc[:, 0] + events["y_km"] = invert_event_loc[:, 1] + events["z_km"] = invert_event_loc[:, 2] + events[["longitude", "latitude"]] = events.apply( + lambda x: pd.Series(proj(x["x_km"], x["y_km"], inverse=True)), axis=1 + ) + events["depth_km"] = events["z_km"] + events = events.iloc[valid_event_index] + events.to_csv( + f"{result_path}/adloc_dtcc_events_{epoch//10}.csv", + index=False, + float_format="%.5f", + date_format="%Y-%m-%dT%H:%M:%S.%f", + ) + plotting_dd(events, stations, config, figure_path, events_init, suffix=f"_ddcc_{epoch//10}") + + # ###################################################################################################### + # optimizer = optim.LBFGS(params=raw_travel_time.parameters(), max_iter=10, line_search_fn="strong_wolfe") + + # def closure(): + # optimizer.zero_grad() + # loss = 0 + # # for meta in tqdm(phase_dataset, desc=f"BFGS"): + # if ddp_local_rank == 0: + # print(f"BFGS: ", end="") + # for meta in phase_dataset: + # if ddp_local_rank == 0: + # print(".", end="") + + # loss_ = travel_time( + # meta["idx_sta"], + # meta["idx_eve"], + # meta["phase_type"], + # meta["phase_time"], + # meta["phase_weight"], + # )["loss"] + # loss_.backward() + + # if ddp: + # dist.all_reduce(loss_, op=dist.ReduceOp.SUM) + # # loss_ /= ddp_world_size + + # loss += loss_ + + # if ddp_local_rank == 0: + # print(f"Loss: {loss}") + # raw_travel_time.event_loc.weight.data[:, 2].clamp_(min=config["zlim_km"][0], max=config["zlim_km"][1]) + # return loss + + # optimizer.step(closure) + # ###################################################################################################### + + # %% + if ddp_local_rank == 0: + + plotting_dd(events, stations, config, figure_path, events_init, suffix="_dtcc") + + invert_event_loc = raw_travel_time.event_loc.weight.clone().detach().numpy() + invert_event_time = raw_travel_time.event_time.weight.clone().detach().numpy() + + events = events_init.copy() + events["time"] = events["time"] + pd.to_timedelta(np.squeeze(invert_event_time), unit="s") + events["x_km"] = invert_event_loc[:, 0] + events["y_km"] = invert_event_loc[:, 1] + events["z_km"] = invert_event_loc[:, 2] + events[["longitude", "latitude"]] = events.apply( + lambda x: pd.Series(proj(x["x_km"], x["y_km"], inverse=True)), axis=1 + ) + events["depth_km"] = events["z_km"] + events = events.iloc[valid_event_index] + events.to_csv( + f"{result_path}/adloc_dtcc_events.csv", index=False, float_format="%.5f", date_format="%Y-%m-%dT%H:%M:%S.%f" + ) diff --git a/scripts/run_adloc_ct.py b/scripts/run_adloc_ct.py new file mode 100644 index 0000000..16aef4c --- /dev/null +++ b/scripts/run_adloc_ct.py @@ -0,0 +1,324 @@ +# %% +import json +import os +import pickle +import time + +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import torch +import torch.distributed as dist +import torch.optim as optim +from adloc.adloc import TravelTimeDD +from adloc.data import PhaseDatasetDT +from adloc.eikonal2d import init_eikonal2d +from adloc.inversion import optimize_dd +from args import parse_args +from matplotlib import pyplot as plt +from pyproj import Proj +from torch.distributed import init_process_group +from torch.nn.parallel import DistributedDataParallel as DDP +from torch.utils.data import DataLoader +from tqdm.auto import tqdm +from utils.plotting import plotting_dd + +torch.manual_seed(0) +np.random.seed(0) + + +# %% +if __name__ == "__main__": + + # %% + args = parse_args() + root_path = args.root_path + region = args.region + data_path = f"{root_path}/{region}/adloc_dd" + result_path = f"{root_path}/{region}/adloc_dd" + figure_path = f"{root_path}/{region}/adloc_dd/figures" + if not os.path.exists(data_path): + os.makedirs(data_path) + if not os.path.exists(result_path): + os.makedirs(result_path) + + # %% + ddp = int(os.environ.get("RANK", -1)) != -1 # is this a ddp run? + if ddp: + init_process_group(backend="gloo") + ddp_rank = int(os.environ["RANK"]) + ddp_local_rank = int(os.environ["LOCAL_RANK"]) + ddp_world_size = int(os.environ["WORLD_SIZE"]) + master_process = ddp_rank == 0 # this process will do logging, checkpointing etc. + print(f"DDP rank {ddp_rank}, local rank {ddp_local_rank}, world size {ddp_world_size}") + else: + # vanilla, non-DDP run + ddp_rank = 0 + ddp_local_rank = 0 + ddp_world_size = 1 + master_process = True + print("Non-DDP run") + + # %% reading from the generated files + events = pd.read_csv(os.path.join(result_path, "pair_events.csv"), parse_dates=["time"]) + stations = pd.read_csv(os.path.join(result_path, "pair_stations.csv")) + picks = pd.read_csv(os.path.join(result_path, "pair_picks.csv"), parse_dates=["phase_time"]) + dtypes = pickle.load(open(os.path.join(result_path, "pair_dtypes.pkl"), "rb")) + pairs = np.memmap(os.path.join(result_path, "pair_dt.dat"), mode="r", dtype=dtypes) + + with open(f"{root_path}/{region}/config.json", "r") as fp: + config = json.load(fp) + print(json.dumps(config, indent=4)) + config["use_amplitude"] = True + + # ## Eikonal for 1D velocity model + zz = [0.0, 5.5, 16.0, 32.0] + vp = [5.5, 5.5, 6.7, 7.8] + vp_vs_ratio = 1.73 + vs = [v / vp_vs_ratio for v in vp] + h = 0.3 + + # %% + if not os.path.exists(result_path): + os.makedirs(result_path) + if not os.path.exists(figure_path): + os.makedirs(figure_path) + + # %% + ## Automatic region; you can also specify a region + # lon0 = stations["longitude"].median() + # lat0 = stations["latitude"].median() + lat0 = (config["minlatitude"] + config["maxlatitude"]) / 2 + lon0 = (config["minlongitude"] + config["maxlongitude"]) / 2 + proj = Proj(f"+proj=sterea +lon_0={lon0} +lat_0={lat0} +units=km") + + stations["x_km"], stations["y_km"] = proj(stations["longitude"], stations["latitude"]) + stations["z_km"] = stations["depth_km"] + events["time"] = pd.to_datetime(events["time"]) + events["x_km"], events["y_km"] = proj(events["longitude"], events["latitude"]) + events["z_km"] = events["depth_km"] + + events_init = events.copy() + + ## set up the config; you can also specify the region manually + if ("xlim_km" not in config) or ("ylim_km" not in config) or ("zlim_km" not in config): + xmin, ymin = proj(config["minlongitude"], config["minlatitude"]) + xmax, ymax = proj(config["maxlongitude"], config["maxlatitude"]) + zmin, zmax = config["mindepth"], config["maxdepth"] + config["xlim_km"] = (xmin, xmax) + config["ylim_km"] = (ymin, ymax) + config["zlim_km"] = (zmin, zmax) + + mapping_phase_type_int = {"P": 0, "S": 1} + config["vel"] = {"P": 6.0, "S": 6.0 / 1.73} + config["vel"] = {mapping_phase_type_int[k]: v for k, v in config["vel"].items()} + + # %% + config["eikonal"] = None + ## Eikonal for 1D velocity model + # zz = [0.0, 5.5, 16.0, 32.0] + # vp = [5.5, 5.5, 6.7, 7.8] + # vp_vs_ratio = 1.73 + # vs = [v / vp_vs_ratio for v in vp] + # zz = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 30.0] + # vp = [4.746, 4.793, 4.799, 5.045, 5.721, 5.879, 6.504, 6.708, 6.725, 7.800] + # vs = [2.469, 2.470, 2.929, 2.930, 3.402, 3.403, 3.848, 3.907, 3.963, 4.500] + # h = 0.3 + vel = {"Z": zz, "P": vp, "S": vs} + config["eikonal"] = { + "vel": vel, + "h": h, + "xlim_km": config["xlim_km"], + "ylim_km": config["ylim_km"], + "zlim_km": config["zlim_km"], + } + config["eikonal"] = init_eikonal2d(config["eikonal"]) + + # %% + config["bfgs_bounds"] = ( + (config["xlim_km"][0] - 1, config["xlim_km"][1] + 1), # x + (config["ylim_km"][0] - 1, config["ylim_km"][1] + 1), # y + (0, config["zlim_km"][1] + 1), + (None, None), # t + ) + + # %% + phase_dataset = PhaseDatasetDT(pairs, picks, events, stations, rank=ddp_local_rank, world_size=ddp_world_size) + data_loader = DataLoader(phase_dataset, batch_size=None, shuffle=False, num_workers=0, drop_last=False) + + # %% + num_event = len(events) + num_station = len(stations) + station_loc = stations[["x_km", "y_km", "z_km"]].values + event_loc = events[["x_km", "y_km", "z_km"]].values # + np.random.randn(num_event, 3) * 2.0 + travel_time = TravelTimeDD( + num_event, + num_station, + station_loc=station_loc, + event_loc=event_loc, + # event_time=event_time, + eikonal=config["eikonal"], + ) + if ddp: + travel_time = DDP(travel_time) + raw_travel_time = travel_time.module if ddp else travel_time + + if ddp_local_rank == 0: + print(f"Dataset: {len(events)} events, {len(stations)} stations, {len(data_loader)} batches") + + ## invert loss + ###################################################################################################### + optimizer = optim.Adam(params=travel_time.parameters(), lr=0.1) + valid_index = np.ones(len(pairs), dtype=bool) + EPOCHS = 100 + for epoch in range(EPOCHS): + loss = 0 + optimizer.zero_grad() + # for meta in tqdm(phase_dataset, desc=f"Epoch {i}"): + for meta in data_loader: + out = travel_time( + meta["idx_sta"], + meta["idx_eve"], + meta["phase_type"], + meta["phase_time"], + meta["phase_weight"], + ) + pred_, loss_ = out["phase_time"], out["loss"] + + loss_.backward() + + if ddp: + dist.all_reduce(loss_, op=dist.ReduceOp.SUM) + # loss_ /= ddp_world_size + + loss += loss_ + + # torch.nn.utils.clip_grad_norm_(travel_time.parameters(), 1.0) + optimizer.step() + with torch.no_grad(): + raw_travel_time.event_loc.weight.data[:, 2].clamp_( + min=config["zlim_km"][0] + 0.1, max=config["zlim_km"][1] - 0.1 + ) + raw_travel_time.event_loc.weight.data[torch.isnan(raw_travel_time.event_loc.weight)] = 0.0 + if ddp_local_rank == 0: + print(f"Epoch {epoch}: loss {loss:.6e} of {np.sum(valid_index)} picks, {loss / np.sum(valid_index):.6e}") + + ### filtering + pred_time = [] + phase_dataset.valid_index = np.ones(len(pairs), dtype=bool) + for meta in phase_dataset: + meta = travel_time( + meta["idx_sta"], + meta["idx_eve"], + meta["phase_type"], + meta["phase_time"], + meta["phase_weight"], + ) + pred_time.append(meta["phase_time"].detach().numpy()) + + pred_time = np.concatenate(pred_time) + valid_index = ( + np.abs(pred_time - pairs["phase_dtime"]) < np.std((pred_time - pairs["phase_dtime"])[valid_index]) * 3.0 + ) # * (np.cos(epoch * np.pi / EPOCHS) + 2.0) # 3std -> 1std + + pairs_df = pd.DataFrame( + { + "event_index1": pairs["event_index1"], + "event_index2": pairs["event_index2"], + "station_index": pairs["station_index"], + } + ) + pairs_df = pairs_df[valid_index] + config["MIN_OBS"] = 8 + pairs_df = pairs_df.groupby(["event_index1", "event_index2"], as_index=False, group_keys=False).filter( + lambda x: len(x) >= config["MIN_OBS"] + ) + valid_index = np.zeros(len(pairs), dtype=bool) + valid_index[pairs_df.index] = True + + phase_dataset.valid_index = valid_index + + invert_event_loc = raw_travel_time.event_loc.weight.clone().detach().numpy() + invert_event_time = raw_travel_time.event_time.weight.clone().detach().numpy() + valid_event_index = np.unique(pairs["event_index1"][valid_index]) + valid_event_index = np.concatenate( + [np.unique(pairs["event_index1"][valid_index]), np.unique(pairs["event_index2"][valid_index])] + ) + valid_event_index = np.sort(np.unique(valid_event_index)) + + if ddp_local_rank == 0 and (epoch % 10 == 0): + events = events_init.copy() + events["time"] = events["time"] + pd.to_timedelta(np.squeeze(invert_event_time), unit="s") + events["x_km"] = invert_event_loc[:, 0] + events["y_km"] = invert_event_loc[:, 1] + events["z_km"] = invert_event_loc[:, 2] + events[["longitude", "latitude"]] = events.apply( + lambda x: pd.Series(proj(x["x_km"], x["y_km"], inverse=True)), axis=1 + ) + events["depth_km"] = events["z_km"] + events = events.iloc[valid_event_index] + events.to_csv( + f"{result_path}/adloc_dt_events_{epoch//10}.csv", + index=False, + float_format="%.5f", + date_format="%Y-%m-%dT%H:%M:%S.%f", + ) + plotting_dd(events, stations, config, figure_path, events_init, suffix=f"_dt_{epoch//10}") + + # ###################################################################################################### + # optimizer = optim.LBFGS(params=raw_travel_time.parameters(), max_iter=10, line_search_fn="strong_wolfe") + + # def closure(): + # optimizer.zero_grad() + # loss = 0 + # # for meta in tqdm(phase_dataset, desc=f"BFGS"): + # if ddp_local_rank == 0: + # print(f"BFGS: ", end="") + # for meta in phase_dataset: + # if ddp_local_rank == 0: + # print(".", end="") + + # loss_ = travel_time( + # meta["idx_sta"], + # meta["idx_eve"], + # meta["phase_type"], + # meta["phase_time"], + # meta["phase_weight"], + # )["loss"] + # loss_.backward() + + # if ddp: + # dist.all_reduce(loss_, op=dist.ReduceOp.SUM) + # # loss_ /= ddp_world_size + + # loss += loss_ + + # if ddp_local_rank == 0: + # print(f"Loss: {loss}") + # raw_travel_time.event_loc.weight.data[:, 2].clamp_(min=config["zlim_km"][0], max=config["zlim_km"][1]) + # return loss + + # optimizer.step(closure) + # ###################################################################################################### + + # %% + if ddp_local_rank == 0: + + plotting_dd(events, stations, config, figure_path, events_init, suffix="_dt") + + invert_event_loc = raw_travel_time.event_loc.weight.clone().detach().numpy() + invert_event_time = raw_travel_time.event_time.weight.clone().detach().numpy() + + events = events_init.copy() + events["time"] = events["time"] + pd.to_timedelta(np.squeeze(invert_event_time), unit="s") + events["x_km"] = invert_event_loc[:, 0] + events["y_km"] = invert_event_loc[:, 1] + events["z_km"] = invert_event_loc[:, 2] + events[["longitude", "latitude"]] = events.apply( + lambda x: pd.Series(proj(x["x_km"], x["y_km"], inverse=True)), axis=1 + ) + events["depth_km"] = events["z_km"] + events = events.iloc[valid_event_index] + events.to_csv( + f"{result_path}/adloc_dt_events.csv", index=False, float_format="%.5f", date_format="%Y-%m-%dT%H:%M:%S.%f" + ) diff --git a/scripts/run_cctorch.py b/scripts/run_cctorch.py index ec37c30..9359822 100644 --- a/scripts/run_cctorch.py +++ b/scripts/run_cctorch.py @@ -45,6 +45,7 @@ f"--events_csv={root_path}/{region}/cctorch/cctorch_events.csv --picks_csv={root_path}/{region}/cctorch/cctorch_picks.csv --stations_csv={root_path}/{region}/cctorch/cctorch_stations.csv " f"--config={root_path}/{region}/cctorch/config.json --batch_size={batch} --block_size1={block_size1} --block_size2={block_size2} --result_path={root_path}/{result_path}" ) + num_gpu = torch.cuda.device_count() if num_gpu == 0: if os.uname().sysname == "Darwin": diff --git a/scripts/run_gamma.py b/scripts/run_gamma.py index a4ba0b8..d5156f4 100644 --- a/scripts/run_gamma.py +++ b/scripts/run_gamma.py @@ -84,7 +84,9 @@ def run_gamma( config["dims"] = ["x(km)", "y(km)", "z(km)"] xmin, ymin = proj(config["minlongitude"], config["minlatitude"]) xmax, ymax = proj(config["maxlongitude"], config["maxlatitude"]) - zmin, zmax = config["mindepth"], config["maxdepth"] + # zmin, zmax = config["mindepth"], config["maxdepth"] + zmin = config["mindepth"] if "mindepth" in config else 0 + zmax = config["maxdepth"] if "maxdepth" in config else 30 config["x(km)"] = (xmin, xmax) config["y(km)"] = (ymin, ymax) config["z(km)"] = (zmin, zmax) diff --git a/scripts/run_growclust_cc.py b/scripts/run_growclust_cc.py index 48473ef..93ab2ed 100644 --- a/scripts/run_growclust_cc.py +++ b/scripts/run_growclust_cc.py @@ -18,7 +18,7 @@ # %% # stations_json = f"{region}/results/data/stations.json" # stations = pd.read_json(f"{root_path}/{stations_json}", orient="index") -station_csv = f"{region}/adloc/ransac_stations_sst.csv" +station_csv = f"{region}/adloc/ransac_stations.csv" stations = pd.read_csv(f"{root_path}/{station_csv}") stations.set_index("station_id", inplace=True) @@ -35,7 +35,7 @@ # %% # events_csv = f"{region}/results/phase_association/events.csv" -events_csv = f"{region}/adloc/ransac_events_sst.csv" +events_csv = f"{region}/adloc/ransac_events.csv" # event_file = f"{region}/cctorch/events.csv" events = pd.read_csv(f"{root_path}/{events_csv}") # event_df = event_df[event_df["gamma_score"] > 10] diff --git a/scripts/run_hypodd_cc.py b/scripts/run_hypodd_cc.py index e18cdd3..6809cf4 100644 --- a/scripts/run_hypodd_cc.py +++ b/scripts/run_hypodd_cc.py @@ -47,8 +47,7 @@ for i, row in events.iterrows(): event_index = row["event_index"] origin = row["time"] - # magnitude = row["magnitude"] - magnitude = 1.0 + magnitude = row["magnitude"] x_err = 0.0 z_err = 0.0 time_err = 0.0 diff --git a/scripts/run_hypodd_ct.py b/scripts/run_hypodd_ct.py index fe12947..895e1b2 100644 --- a/scripts/run_hypodd_ct.py +++ b/scripts/run_hypodd_ct.py @@ -20,7 +20,7 @@ # station_json = f"{region}/results/data/stations.json" # stations = pd.read_json(f"{root_path}/{station_json}", orient="index") # station_csv = f"{region}/cctorch/cctorch_stations.csv" -station_csv = f"{region}/adloc/ransac_stations_sst.csv" +station_csv = f"{region}/adloc/ransac_stations.csv" stations = pd.read_csv(f"{root_path}/{station_csv}") stations.set_index("station_id", inplace=True) @@ -53,8 +53,8 @@ # %% ############################################# Picks Format ###################################################### -picks_csv = f"{region}/adloc/ransac_picks_sst.csv" -events_csv = f"{region}/adloc/ransac_events_sst.csv" +picks_csv = f"{region}/adloc/ransac_picks.csv" +events_csv = f"{region}/adloc/ransac_events.csv" picks = pd.read_csv(f"{root_path}/{picks_csv}") events = pd.read_csv(f"{root_path}/{events_csv}") diff --git a/scripts/run_phasenet.py b/scripts/run_phasenet.py index fa09e19..f52161f 100644 --- a/scripts/run_phasenet.py +++ b/scripts/run_phasenet.py @@ -63,9 +63,9 @@ def run_phasenet( fp.write("\n".join(mseed_list)) # %% - os.system( - f"python {model_path}/phasenet/predict.py --model={model_path}/model/190703-214543 --data_dir=./ --data_list={root_path}/{result_path}/mseed_list_{node_rank:03d}_{num_nodes:03d}.csv --response_xml={root_path}/{region}/results/network/inventory.xml --format=mseed --amplitude --highpass_filter=1.0 --result_dir={root_path}/{result_path} --result_fname=phasenet_picks_{node_rank:03d}_{num_nodes:03d} --batch_size=1" - ) + cmd = f"python {model_path}/phasenet/predict.py --model={model_path}/model/190703-214543 --data_dir=./ --data_list={root_path}/{result_path}/mseed_list_{node_rank:03d}_{num_nodes:03d}.csv --response_xml={root_path}/{region}/results/network/inventory.xml --format=mseed --amplitude --highpass_filter=1.0 --result_dir={root_path}/{result_path} --result_fname=phasenet_picks_{node_rank:03d}_{num_nodes:03d} --batch_size=1" + # cmd += " --sampling_rate 100" + os.system(cmd) if protocol != "file": fs.put( diff --git a/scripts/run_qtm.py b/scripts/run_qtm.py index 447f29d..cef6853 100644 --- a/scripts/run_qtm.py +++ b/scripts/run_qtm.py @@ -81,7 +81,7 @@ def parse_args(): fp.write("\n".join([f"{x[0]},{x[1]}" for x in pairs])) ## based on GPU memory -batch = 256 +batch = 16 block_size1 = 1 block_size2 = 100_000 # ~7GB diff --git a/scripts/utils/plotting.py b/scripts/utils/plotting.py index b440bdd..eea87fe 100644 --- a/scripts/utils/plotting.py +++ b/scripts/utils/plotting.py @@ -265,10 +265,17 @@ def plotting_ransac(stations, figure_path, config, picks, events_init, events, s plt.savefig(os.path.join(figure_path, f"error{suffix}.png"), bbox_inches="tight", dpi=300) plt.close(fig) - xmin, xmax = config["xlim_km"] - ymin, ymax = config["ylim_km"] - zmin, zmax = config["zlim_km"] - vmin, vmax = config["zlim_km"] + # xmin, xmax = config["xlim_km"] + # ymin, ymax = config["ylim_km"] + # zmin, zmax = config["zlim_km"] + # vmin, vmax = config["zlim_km"] + xmin = events["x_km"].min() + xmax = events["x_km"].max() + ymin = events["y_km"].min() + ymax = events["y_km"].max() + zmin = events["z_km"].min() + zmax = events["z_km"].max() + vmin, vmax = zmin, zmax events = events.sort_values("time", ascending=True) s = max(0.1, min(10, 5000 / len(events))) alpha = 0.8 @@ -402,5 +409,146 @@ def plotting_ransac(stations, figure_path, config, picks, events_init, events, s # ax[1, 1].set_ylabel("Depth (km)") cbar = fig.colorbar(im, ax=ax[1, 1]) cbar.set_label("Depth (km)") + + if "magnitude" in events.columns: + im = ax[1, 2].hist(events["magnitude"], bins=30, edgecolor="white") + ax[1, 2].set_yscale("log") + ax[1, 2].set_xlabel("Magnitude") + ax[1, 2].set_ylabel("Count") + plt.savefig(os.path.join(figure_path, f"location{suffix}.png"), bbox_inches="tight", dpi=300) plt.close(fig) + + +# %% +def plotting_dd(events, stations, config, figure_path, events_old, suffix=""): + + xmin, xmax = config["xlim_km"] + ymin, ymax = config["ylim_km"] + zmin, zmax = config["zlim_km"] + vmin, vmax = zmin, zmax + + s = max(0.1, min(10, 5000 / len(events))) + alpha = 0.8 + + fig, ax = plt.subplots(3, 2, figsize=(10, 10), gridspec_kw={"height_ratios": [2, 1, 1]}) + im = ax[0, 0].scatter( + events_old["x_km"], + events_old["y_km"], + c=events_old["z_km"], + cmap="viridis_r", + s=s, + marker="o", + vmin=vmin, + vmax=vmax, + alpha=alpha, + linewidth=0.0, + ) + ax[0, 0].set_xlim([xmin, xmax]) + ax[0, 0].set_ylim([ymin, ymax]) + cbar = fig.colorbar(im, ax=ax[0, 0]) + cbar.set_label("Depth (km)") + ax[0, 0].set_title(f"ADLoc: {len(events_old)} events") + + im = ax[0, 1].scatter( + events["x_km"], + events["y_km"], + c=events["z_km"], + cmap="viridis_r", + s=s, + marker="o", + vmin=vmin, + vmax=vmax, + alpha=alpha, + linewidth=0.0, + ) + ax[0, 1].set_xlim([xmin, xmax]) + ax[0, 1].set_ylim([ymin, ymax]) + cbar = fig.colorbar(im, ax=ax[0, 1]) + cbar.set_label("Depth (km)") + ax[0, 1].set_title(f"ADLoc DD: {len(events)} events") + + # im = ax[1, 0].scatter( + # events_new["x_km"], + # events_new["z_km"], + # c=events_new["z_km"], + # cmap="viridis_r", + # s=1, + # marker="o", + # vmin=vmin, + # vmax=vmax, + # ) + # ax[1, 0].set_xlim([xmin, xmax]) + # ax[1, 0].set_ylim([zmax, zmin]) + # cbar = fig.colorbar(im, ax=ax[1, 0]) + # cbar.set_label("Depth (km)") + + im = ax[1, 0].scatter( + events_old["x_km"], + events_old["z_km"], + c=events_old["z_km"], + cmap="viridis_r", + s=s, + marker="o", + vmin=vmin, + vmax=vmax, + alpha=alpha, + linewidth=0.0, + ) + ax[1, 0].set_xlim([xmin, xmax]) + ax[1, 0].set_ylim([zmax, zmin]) + cbar = fig.colorbar(im, ax=ax[1, 0]) + cbar.set_label("Depth (km)") + + im = ax[1, 1].scatter( + events["x_km"], + events["z_km"], + c=events["z_km"], + cmap="viridis_r", + s=s, + marker="o", + vmin=vmin, + vmax=vmax, + alpha=alpha, + linewidth=0.0, + ) + ax[1, 1].set_xlim([xmin, xmax]) + ax[1, 1].set_ylim([zmax, zmin]) + cbar = fig.colorbar(im, ax=ax[1, 1]) + cbar.set_label("Depth (km)") + + im = ax[2, 0].scatter( + events_old["y_km"], + events_old["z_km"], + c=events_old["z_km"], + cmap="viridis_r", + s=s, + marker="o", + vmin=vmin, + vmax=vmax, + alpha=alpha, + linewidth=0.0, + ) + ax[2, 0].set_xlim([ymin, ymax]) + ax[2, 0].set_ylim([zmax, zmin]) + cbar = fig.colorbar(im, ax=ax[2, 0]) + cbar.set_label("Depth (km)") + + im = ax[2, 1].scatter( + events["y_km"], + events["z_km"], + c=events["z_km"], + cmap="viridis_r", + s=s, + marker="o", + vmin=vmin, + vmax=vmax, + alpha=alpha, + linewidth=0.0, + ) + ax[2, 1].set_xlim([ymin, ymax]) + ax[2, 1].set_ylim([zmax, zmin]) + cbar = fig.colorbar(im, ax=ax[2, 1]) + cbar.set_label("Depth (km)") + + plt.savefig(os.path.join(figure_path, f"location{suffix}.png"), bbox_inches="tight", dpi=300)