diff --git a/install.sh b/install.sh index d7a7524..6f7d310 100755 --- a/install.sh +++ b/install.sh @@ -1,5 +1,6 @@ #!/bin/bash +cwd=$(pwd) unamestr=`uname` env_file=environment.yml if [ "$unamestr" == 'Linux' ]; then @@ -71,6 +72,9 @@ if [ $? -ne 0 ]; then echo ". $_CONDA_ROOT/etc/profile.d/conda.sh" >> $prof fi +# make sure we're in the project directory still +cd $cwd + # Start in conda base environment echo "Activate base virtual environment" conda activate base @@ -80,12 +84,12 @@ conda remove -y -n $VENV --all # Package list: package_list=( - "python>=3.6" + "python>=3.7" "impactutils" "fiona>=1.8.20" "ipython" "jupyter" - "numpy" + "numpy=1.21" "obspy" "pandas" "pip" diff --git a/libcomcat/classes.py b/libcomcat/classes.py index b977a67..a270d39 100644 --- a/libcomcat/classes.py +++ b/libcomcat/classes.py @@ -767,7 +767,7 @@ def getProducts(self, product_name, source="preferred", version="preferred"): dft = df[df["source"] == psource] dft = dft.sort_values("time") dft["version"] = np.arange(1, len(dft) + 1) - newframe = newframe.append(dft) + newframe = pd.concat([newframe, dft]) df = newframe if source == "preferred": diff --git a/libcomcat/dataframes.py b/libcomcat/dataframes.py index a008f08..f7ec7fb 100644 --- a/libcomcat/dataframes.py +++ b/libcomcat/dataframes.py @@ -119,19 +119,6 @@ def get_phase_dataframe(detail, catalog="preferred"): """ if catalog is None: catalog = "preferred" - df = pd.DataFrame( - columns=[ - "Channel", - "Distance", - "Azimuth", - "Phase", - "Arrival Time", - "Status", - "Residual", - "Weight", - "Agency", - ] - ) phasedata = detail.getProducts("phase-data", source=catalog)[0] quakeurl = phasedata.getContentURL("quakeml.xml") @@ -150,6 +137,7 @@ def get_phase_dataframe(detail, catalog="preferred"): msg = fmt % (quakeurl, str(e)) raise ParsingError(msg) catevent = catalog.events[0] + phaserows = [] for pick in catevent.picks: station = pick.waveform_id.station_code fmt = "Getting pick %s for station%s..." @@ -157,7 +145,20 @@ def get_phase_dataframe(detail, catalog="preferred"): phaserow = _get_phaserow(pick, catevent) if phaserow is None: continue - df = df.append(phaserow, ignore_index=True) + phaserows.append(phaserow) + df = pd.DataFrame(phaserows) + columns = [ + "Channel", + "Distance", + "Azimuth", + "Phase", + "Arrival Time", + "Status", + "Residual", + "Weight", + "Agency", + ] + df = df[columns] return df @@ -319,7 +320,7 @@ def get_magnitude_data_frame(detail, catalog, magtype): AttributeError if input DetailEvent does not have a phase-data product for the input catalog. """ - columns = columns = [ + columns = [ "Channel", "Type", "Amplitude", @@ -331,7 +332,6 @@ def get_magnitude_data_frame(detail, catalog, magtype): "Azimuth", "MeasurementTime", ] - df = pd.DataFrame(columns=columns) phasedata = detail.getProducts("phase-data", source=catalog)[0] quakeurl = phasedata.getContentURL("quakeml.xml") try: @@ -350,6 +350,7 @@ def get_magnitude_data_frame(detail, catalog, magtype): msg = fmt % (quakeurl, str(e)) raise ParsingError(msg) catevent = catalog.events[0] # match this to input catalog + rows = [] for magnitude in catevent.magnitudes: if magnitude.magnitude_type.lower() != magtype.lower(): continue @@ -403,8 +404,9 @@ def get_magnitude_data_frame(detail, catalog, magtype): row["Weight"] = contribution.weight row["Distance"] = distance row["Azimuth"] = azimuth + rows.append(row) - df = df.append(row, ignore_index=True) + df = pd.DataFrame(rows) df = df[columns] return df @@ -548,7 +550,7 @@ def get_pager_data_frame( if not detail.hasProduct("losspager"): return None - df = None + total_rows = [] for pager in detail.getProducts("losspager", version="all"): total_row = {} default = {} @@ -645,12 +647,11 @@ def get_pager_data_frame( "predicted_dollars", "dollars_sigma", ] - if df is None: - df = pd.DataFrame(columns=columns) - df = df.append(total_row, ignore_index=True) + total_rows.append(total_row) for ccode, country_row in country_rows.items(): - df = df.append(country_row, ignore_index=True) + total_rows.append(country_row) + df = pd.DataFrame(total_rows) df = df[columns] # countries with zero fatalities don't report, so fill in with zeros if get_losses: @@ -1002,24 +1003,25 @@ def get_history_data_frame(detail, products=None): else: products = PRODUCTS - dataframe = pd.DataFrame(columns=PRODUCT_COLUMNS) + allrows = [] for product in products: logging.debug("Searching for %s products..." % product) if not event.hasProduct(product): continue prows = _get_product_rows(event, product) - dataframe = dataframe.append(prows, ignore_index=True) + allrows += prows - dataframe = dataframe.sort_values("Update Time") - dataframe["Elapsed (min)"] = np.round(dataframe["Elapsed (min)"], 1) + dataframe = pd.DataFrame(allrows) dataframe["Comment"] = "" dataframe = dataframe[PRODUCT_COLUMNS] + dataframe = dataframe.sort_values("Update Time") + dataframe["Elapsed (min)"] = np.round(dataframe["Elapsed (min)"], 1) return (dataframe, event) def _get_product_rows(event, product_name): products = event.getProducts(product_name, source="all", version="all") - prows = pd.DataFrame(columns=PRODUCT_COLUMNS) + prows = [] for product in products: # if product.contents == ['']: # continue @@ -1047,7 +1049,7 @@ def _get_product_rows(event, product_name): continue if prow is None: continue - prows = prows.append(prow, ignore_index=True) + prows.append(prow) return prows @@ -1774,6 +1776,7 @@ def split_history_frame(dataframe, product=None): parts = dataframe.iloc[0]["Description"].split("|") columns = [p.split("#")[0] for p in parts] df2 = pd.DataFrame(columns=columns) + hrows = [] for idx, row in dataframe.iterrows(): parts = row["Description"].split("|") columns = [p.split("#")[0].strip() for p in parts] @@ -1790,8 +1793,9 @@ def split_history_frame(dataframe, product=None): newvalues.append(newval) ddict = dict(zip(columns, newvalues)) row = pd.Series(ddict) - df2 = df2.append(row, ignore_index=True) + hrows.append(row) + df2 = pd.DataFrame(hrows) dataframe = dataframe.reset_index(drop=True) df2 = df2.reset_index(drop=True) dataframe = pd.concat([dataframe, df2], axis=1) @@ -2114,7 +2118,7 @@ def associate( dlabels = ["dtime", "ddist", "dmag", "asq", "bsq", "csq", "psum"] talternates.drop(labels=dlabels, axis="columns", inplace=True) talternates["chosen_id"] = ef_row["id"] - alternates = alternates.append(talternates) + alternates = pd.concat([alternates, talternates]) found_events.append(row) associated = pd.DataFrame(found_events) diff --git a/tests/libcomcat/dataframes_test.py b/tests/libcomcat/dataframes_test.py index d36b0fd..4e6c6f7 100755 --- a/tests/libcomcat/dataframes_test.py +++ b/tests/libcomcat/dataframes_test.py @@ -152,23 +152,22 @@ def test_history_data_frame(): # SMOKE TEST cassettes, datadir = get_datadir() tape_file = os.path.join(cassettes, "dataframes_history.yaml") + products = [ + "shakemap", + "dyfi", + "losspager", + "oaf", + "finite-fault", + "focal-mechanism", + "ground-failure", + "moment-tensor", + "phase-data", + "origin", + ] + with vcr.use_cassette(tape_file, record_mode="new_episodes"): nc72852151 = get_event_by_id("nc72852151", includesuperseded=True) - (history, event) = get_history_data_frame( - nc72852151, - [ - "shakemap", - "dyfi", - "losspager", - "oaf", - "finite-fault", - "focal-mechanism", - "ground-failure", - "moment-tensor", - "phase-data", - "origin", - ], - ) + (history, event) = get_history_data_frame(nc72852151, products) us10008e3k = get_event_by_id("us10008e3k", includesuperseded=True) (history, event) = get_history_data_frame( us10008e3k, @@ -415,6 +414,8 @@ def test_associate(): if __name__ == "__main__": + print("Testing history frame...") + test_history_data_frame() print("Testing catalog association...") test_associate() print("Testing nan mags extraction...") @@ -431,5 +432,3 @@ def test_associate(): test_get_detail_data_frame() print("Testing magnitude frame...") test_magnitude_dataframe() - print("Testing history frame...") - test_history_data_frame()