From 419318089b0b313009f97813e08cc0495cb7f3e2 Mon Sep 17 00:00:00 2001 From: thodson Date: Tue, 17 Sep 2024 17:00:49 -0500 Subject: [PATCH] Lint and format --- dataretrieval/__init__.py | 4 +- dataretrieval/codes/states.py | 201 ++++++------ dataretrieval/codes/timezones.py | 6 +- dataretrieval/nadp.py | 54 ++-- dataretrieval/nldi.py | 126 ++++---- dataretrieval/nwis.py | 537 ++++++++++++++++--------------- dataretrieval/streamstats.py | 42 +-- dataretrieval/utils.py | 49 +-- dataretrieval/waterwatch.py | 16 +- dataretrieval/wqp.py | 206 ++++++------ pyproject.toml | 15 + tests/iii.py | 201 ++++++++++++ tests/nadp_test.py | 16 +- tests/nldi_test.py | 190 +++++------ tests/nwis_test.py | 177 +++++----- tests/utils_test.py | 31 +- tests/waterservices_test.py | 328 ++++++++++++------- tests/wqp_test.py | 122 ++++--- 18 files changed, 1348 insertions(+), 973 deletions(-) create mode 100755 tests/iii.py diff --git a/dataretrieval/__init__.py b/dataretrieval/__init__.py index b4d31cf..a52c04c 100644 --- a/dataretrieval/__init__.py +++ b/dataretrieval/__init__.py @@ -8,6 +8,6 @@ from dataretrieval.wqp import * try: - __version__ = version('dataretrieval') + __version__ = version("dataretrieval") except PackageNotFoundError: - __version__ = 'version-unknown' + __version__ = "version-unknown" diff --git a/dataretrieval/codes/states.py b/dataretrieval/codes/states.py index 9ff3d47..e3d83f0 100644 --- a/dataretrieval/codes/states.py +++ b/dataretrieval/codes/states.py @@ -1,108 +1,109 @@ """List of 2-digit state codes with commented full names.""" + state_codes = { - "Alabama": "al", - "Alaska": "ak", - "Arizona": "az", - "Arkansas": "ar", - "California": "ca", - "Colorado": "co", - "Connecticut": "ct", - "Delaware": "de", + "Alabama": "al", + "Alaska": "ak", + "Arizona": "az", + "Arkansas": "ar", + "California": "ca", + "Colorado": "co", + "Connecticut": "ct", + "Delaware": "de", "District of Columbia": "dc", - "Florida": "fl", - "Georgia": "ga", - "Hawaii": "hi", - "Idaho": "id", - "Illinois": "il", - "Indiana": "in", - "Iowa": "ia", - "Kansas": "ks", - "Kentucky": "ky", - "Louisiana": "la", - "Maine": "me", - "Maryland": "md", - "Massachusetts": "ma", - "Michigan": "mi", - "Minnesota": "mn", - "Mississippi": "ms", - "Missouri": "mo", - "Montana": "mt", - "Nebraska": "ne", - "Nevada": "nv", - "New Hampshire": "nh", - "New Jersey": "nj", - "New Mexico": "nm", - "New York": "ny", - "North Carolina": "nc", - "North Dakota": "nd", - "Ohio": "oh", - "Oklahoma": "ok", - "Oregon": "or", - "Pennsylvania": "pa", - "Rhode Island": "ri", - "South Carolina": "sc", - "South Dakota": "sd", - "Tennessee": "tn", - "Texas": "tx", - "Utah": "ut", - "Vermont": "vt", - "Virginia": "va", - "Washington": "wa", - "West Virginia": "wv", - "Wisconsin": "wi", - "Wyoming": "wy", + "Florida": "fl", + "Georgia": "ga", + "Hawaii": "hi", + "Idaho": "id", + "Illinois": "il", + "Indiana": "in", + "Iowa": "ia", + "Kansas": "ks", + "Kentucky": "ky", + "Louisiana": "la", + "Maine": "me", + "Maryland": "md", + "Massachusetts": "ma", + "Michigan": "mi", + "Minnesota": "mn", + "Mississippi": "ms", + "Missouri": "mo", + "Montana": "mt", + "Nebraska": "ne", + "Nevada": "nv", + "New Hampshire": "nh", + "New Jersey": "nj", + "New Mexico": "nm", + "New York": "ny", + "North Carolina": "nc", + "North Dakota": "nd", + "Ohio": "oh", + "Oklahoma": "ok", + "Oregon": "or", + "Pennsylvania": "pa", + "Rhode Island": "ri", + "South Carolina": "sc", + "South Dakota": "sd", + "Tennessee": "tn", + "Texas": "tx", + "Utah": "ut", + "Vermont": "vt", + "Virginia": "va", + "Washington": "wa", + "West Virginia": "wv", + "Wisconsin": "wi", + "Wyoming": "wy", } fips_codes = { - "Alabama": "01", - "Alaska": "02", - "Arizona": "04", - "Arkansas": "05", - "California": "06", - "Colorado": "08", - "Connecticut": "09", - "Delaware": "10", + "Alabama": "01", + "Alaska": "02", + "Arizona": "04", + "Arkansas": "05", + "California": "06", + "Colorado": "08", + "Connecticut": "09", + "Delaware": "10", "District of Columbia": "11", - "Florida": "12", - "Georgia": "13", - "Hawaii": "15", - "Idaho": "16", - "Illinois": "17", - "Indiana": "18", - "Iowa": "19", - "Kansas": "20", - "Kentucky": "21", - "Louisiana": "22", - "Maine": "23", - "Maryland": "24", - "Massachusetts": "25", - "Michigan": "26", - "Minnesota": "27", - "Mississippi": "28", - "Missouri": "29", - "Montana": "30", - "Nebraska": "31", - "Nevada": "32", - "New Hampshire": "33", - "New Jersey": "34", - "New Mexico": "35", - "New York": "36", - "North Carolina": "37", - "North Dakota": "38", - "Ohio": "39", - "Oklahoma": "40", - "Oregon": "41", - "Pennsylvania": "42", - "Rhode Island": "44", - "South Carolina": "45", - "South Dakota": "46", - "Tennessee": "47", - "Texas": "48", - "Utah": "49", - "Vermont": "50", - "Virginia": "51", - "Washington": "53", - "West Virginia": "54", - "Wisconsin": "55", - "Wyoming": "56", + "Florida": "12", + "Georgia": "13", + "Hawaii": "15", + "Idaho": "16", + "Illinois": "17", + "Indiana": "18", + "Iowa": "19", + "Kansas": "20", + "Kentucky": "21", + "Louisiana": "22", + "Maine": "23", + "Maryland": "24", + "Massachusetts": "25", + "Michigan": "26", + "Minnesota": "27", + "Mississippi": "28", + "Missouri": "29", + "Montana": "30", + "Nebraska": "31", + "Nevada": "32", + "New Hampshire": "33", + "New Jersey": "34", + "New Mexico": "35", + "New York": "36", + "North Carolina": "37", + "North Dakota": "38", + "Ohio": "39", + "Oklahoma": "40", + "Oregon": "41", + "Pennsylvania": "42", + "Rhode Island": "44", + "South Carolina": "45", + "South Dakota": "46", + "Tennessee": "47", + "Texas": "48", + "Utah": "49", + "Vermont": "50", + "Virginia": "51", + "Washington": "53", + "West Virginia": "54", + "Wisconsin": "55", + "Wyoming": "56", } diff --git a/dataretrieval/codes/timezones.py b/dataretrieval/codes/timezones.py index b801f85..3f91421 100644 --- a/dataretrieval/codes/timezones.py +++ b/dataretrieval/codes/timezones.py @@ -2,7 +2,7 @@ Time zone information """ -tz_str = '''-1200 Y +tz_str = """-1200 Y -1100 X NUT SST -1000 W CKT HAST HST TAHT TKT -0900 V AKST GAMT GIT HADT HNY @@ -39,10 +39,10 @@ -0230 HAT NDT -0330 HNT NST NT -0430 HLV VET --0930 MART MIT''' +-0930 MART MIT""" tz = {} -for tz_descr in map(str.split, tz_str.split('\n')): +for tz_descr in map(str.split, tz_str.split("\n")): tz_offset = tz_descr[0] for tz_code in tz_descr[1:]: tz[tz_code] = tz_offset diff --git a/dataretrieval/nadp.py b/dataretrieval/nadp.py index e578bb9..74037f4 100644 --- a/dataretrieval/nadp.py +++ b/dataretrieval/nadp.py @@ -37,26 +37,26 @@ import requests -NADP_URL = 'https://nadp.slh.wisc.edu' -NADP_MAP_EXT = 'filelib/maps' +NADP_URL = "https://nadp.slh.wisc.edu" +NADP_MAP_EXT = "filelib/maps" -NTN_CONC_PARAMS = ['pH', 'So4', 'NO3', 'NH4', 'Ca', 'Mg', 'K', 'Na', 'Cl', 'Br'] +NTN_CONC_PARAMS = ["pH", "So4", "NO3", "NH4", "Ca", "Mg", "K", "Na", "Cl", "Br"] NTN_DEP_PARAMS = [ - 'H', - 'So4', - 'NO3', - 'NH4', - 'Ca', - 'Mg', - 'K', - 'Na', - 'Cl', - 'Br', - 'N', - 'SPlusN', + "H", + "So4", + "NO3", + "NH4", + "Ca", + "Mg", + "K", + "Na", + "Cl", + "Br", + "N", + "SPlusN", ] -NTN_MEAS_TYPE = ['conc', 'dep', 'precip'] # concentration or deposition +NTN_MEAS_TYPE = ["conc", "dep", "precip"] # concentration or deposition class NADP_ZipFile(zipfile.ZipFile): @@ -65,7 +65,7 @@ class NADP_ZipFile(zipfile.ZipFile): def tif_name(self): """Get the name of the tif file in the zip file.""" filenames = self.namelist() - r = re.compile('.*tif$') + r = re.compile(".*tif$") tif_list = list(filter(r.match, filenames)) return tif_list[0] @@ -105,23 +105,23 @@ def get_annual_MDN_map(measurement_type, year, path): >>> # get map of mercury concentration in 2010 and extract it to a path >>> data_path = dataretrieval.nadp.get_annual_MDN_map( - ... measurement_type='conc', year='2010', path='somepath' + ... measurement_type="conc", year="2010", path="somepath" ... ) """ - url = f'{NADP_URL}/{NADP_MAP_EXT}/MDN/grids/' + url = f"{NADP_URL}/{NADP_MAP_EXT}/MDN/grids/" - filename = f'Hg_{measurement_type}_{year}.zip' + filename = f"Hg_{measurement_type}_{year}.zip" z = get_zip(url, filename) if path: z.extractall(path) - return f'{path}{os.sep}{basename(filename)}' + return f"{path}{os.sep}{basename(filename)}" -def get_annual_NTN_map(measurement_type, measurement=None, year=None, path='.'): +def get_annual_NTN_map(measurement_type, measurement=None, year=None, path="."): """Download a NTN map from NDAP. This function looks for a zip file containing gridded information at: @@ -158,23 +158,23 @@ def get_annual_NTN_map(measurement_type, measurement=None, year=None, path='.'): >>> # get a map of precipitation in 2015 and extract it to a path >>> data_path = dataretrieval.nadp.get_annual_NTN_map( - ... measurement_type='Precip', year='2015', path='somepath' + ... measurement_type="Precip", year="2015", path="somepath" ... ) """ - url = f'{NADP_URL}/{NADP_MAP_EXT}/NTN/grids/{year}/' + url = f"{NADP_URL}/{NADP_MAP_EXT}/NTN/grids/{year}/" - filename = f'{measurement_type}_{year}.zip' + filename = f"{measurement_type}_{year}.zip" if measurement: - filename = f'{measurement}_{filename}' + filename = f"{measurement}_{filename}" z = get_zip(url, filename) if path: z.extractall(path) - return f'{path}{os.sep}{basename(filename)}' + return f"{path}{os.sep}{basename(filename)}" def get_zip(url, filename): diff --git a/dataretrieval/nldi.py b/dataretrieval/nldi.py index 30a49c8..157f047 100644 --- a/dataretrieval/nldi.py +++ b/dataretrieval/nldi.py @@ -6,9 +6,9 @@ try: import geopandas as gpd except ImportError: - raise ImportError('Install geopandas to use the NLDI module.') + raise ImportError("Install geopandas to use the NLDI module.") -NLDI_API_BASE_URL = 'https://labs.waterdata.usgs.gov/api/nldi/linked-data' +NLDI_API_BASE_URL = "https://labs.waterdata.usgs.gov/api/nldi/linked-data" _AVAILABLE_DATA_SOURCES = None _CRS = "EPSG:4326" @@ -17,7 +17,7 @@ def _query_nldi(url, query_params, error_message): # A helper function to query the NLDI API response = query(url, payload=query_params) if response.status_code != 200: - raise ValueError(f'{error_message}. Error reason: {response.reason}') + raise ValueError(f"{error_message}. Error reason: {response.reason}") response_data = {} try: @@ -68,10 +68,14 @@ def get_flowlines( >>> # Get flowlines for a feature source: WQP and >>> # feature id: USGS-01031500 in the upstream main >>> gdf = dataretrieval.nldi.get_flowlines( - ... feature_source="WQP", feature_id="USGS-01031500", navigation_mode="UM" + ... feature_source="WQP", + ... feature_id="USGS-01031500", + ... navigation_mode="UM", ... ) >>> # Get flowlines for comid: 13294314 in the upstream main - >>> gdf = dataretrieval.nldi.get_flowlines(comid=13294314, navigation_mode="UM") + >>> gdf = dataretrieval.nldi.get_flowlines( + ... comid=13294314, navigation_mode="UM" + ... ) """ # validate the navigation mode _validate_navigation_mode(navigation_mode) @@ -81,15 +85,15 @@ def get_flowlines( # validate the feature source _validate_data_source(feature_source) - url = f'{NLDI_API_BASE_URL}/{feature_source}/{feature_id}/navigation' - query_params = {'distance': str(distance), 'trimStart': str(trim_start).lower()} + url = f"{NLDI_API_BASE_URL}/{feature_source}/{feature_id}/navigation" + query_params = {"distance": str(distance), "trimStart": str(trim_start).lower()} else: - url = f'{NLDI_API_BASE_URL}/comid/{comid}/navigation' - query_params = {'distance': str(distance)} + url = f"{NLDI_API_BASE_URL}/comid/{comid}/navigation" + query_params = {"distance": str(distance)} - url += f'/{navigation_mode}/flowlines' + url += f"/{navigation_mode}/flowlines" if stop_comid is not None: - query_params['stopComid'] = str(stop_comid) + query_params["stopComid"] = str(stop_comid) if feature_source: err_msg = ( @@ -142,12 +146,12 @@ def get_basin( # validate the feature source _validate_data_source(feature_source) if not feature_id: - raise ValueError('feature_id is required') + raise ValueError("feature_id is required") - url = f'{NLDI_API_BASE_URL}/{feature_source}/{feature_id}/basin' + url = f"{NLDI_API_BASE_URL}/{feature_source}/{feature_id}/basin" simplified = str(simplified).lower() split_catchment = str(split_catchment).lower() - query_params = {'simplified': simplified, 'splitCatchment': split_catchment} + query_params = {"simplified": simplified, "splitCatchment": split_catchment} err_msg = ( f"Error getting basin for feature source '{feature_source}' and " f"feature_id '{feature_id}'" @@ -228,25 +232,25 @@ def get_features( # check only one origin is provided if (lat and long is None) or (long and lat is None): - raise ValueError('Both lat and long are required') + raise ValueError("Both lat and long are required") if lat: if comid: raise ValueError( - 'Provide only one origin type - comid cannot be provided' - ' with lat or long' + "Provide only one origin type - comid cannot be provided" + " with lat or long" ) if feature_source or feature_id: raise ValueError( - 'Provide only one origin type - feature_source and feature_id cannot' - ' be provided with lat or long' + "Provide only one origin type - feature_source and feature_id cannot" + " be provided with lat or long" ) if not lat: if comid or data_source: if navigation_mode is None: raise ValueError( - 'navigation_mode is required if comid or data_source is provided' + "navigation_mode is required if comid or data_source is provided" ) # validate the feature source and comid _validate_feature_source_comid(feature_source, feature_id, comid) @@ -260,20 +264,20 @@ def get_features( _validate_navigation_mode(navigation_mode) if lat: - url = f'{NLDI_API_BASE_URL}/comid/position' - query_params = {'coords': f'POINT({long} {lat})'} + url = f"{NLDI_API_BASE_URL}/comid/position" + query_params = {"coords": f"POINT({long} {lat})"} else: if navigation_mode: if feature_source: - url = f'{NLDI_API_BASE_URL}/{feature_source}/{feature_id}/navigation' + url = f"{NLDI_API_BASE_URL}/{feature_source}/{feature_id}/navigation" else: - url = f'{NLDI_API_BASE_URL}/comid/{comid}/navigation' - url += f'/{navigation_mode}/{data_source}' - query_params = {'distance': str(distance)} + url = f"{NLDI_API_BASE_URL}/comid/{comid}/navigation" + url += f"/{navigation_mode}/{data_source}" + query_params = {"distance": str(distance)} if stop_comid is not None: - query_params['stopComid'] = str(stop_comid) + query_params["stopComid"] = str(stop_comid) else: - url = f'{NLDI_API_BASE_URL}/{feature_source}/{feature_id}' + url = f"{NLDI_API_BASE_URL}/{feature_source}/{feature_id}" query_params = {} if lat: @@ -316,11 +320,13 @@ def get_features_by_data_source(data_source: str) -> gpd.GeoDataFrame: .. doctest:: >>> # Get features for a feature wqp and feature_id USGS-01031500 - >>> gdf = dataretrieval.nldi.get_features_by_data_source(data_source="nwissite") + >>> gdf = dataretrieval.nldi.get_features_by_data_source( + ... data_source="nwissite" + ... ) """ # validate the data source _validate_data_source(data_source) - url = f'{NLDI_API_BASE_URL}/{data_source}' + url = f"{NLDI_API_BASE_URL}/{data_source}" err_msg = f"Error getting features for data source '{data_source}'" feature_collection = _query_nldi(url, {}, err_msg) gdf = gpd.GeoDataFrame.from_features(feature_collection, crs=_CRS) @@ -332,7 +338,7 @@ def search( feature_id: Optional[str] = None, navigation_mode: Optional[str] = None, data_source: Optional[str] = None, - find: Literal['basin', 'flowlines', 'features'] = 'features', + find: Literal["basin", "flowlines", "features"] = "features", comid: Optional[int] = None, lat: Optional[float] = None, long: Optional[float] = None, @@ -365,7 +371,7 @@ def search( >>> # Search for aggregated basin for feature source: WQP >>> # and feature id: USGS-01031500 >>> search_results = dataretrieval.nldi.search( - ... feature_source="WQP", feature_id="USGS-01031500", find='basin' + ... feature_source="WQP", feature_id="USGS-01031500", find="basin" ... ) >>> # Search for flowlines for feature source: WQP and >>> # feature id: USGS-01031500 in the upstream main @@ -373,7 +379,7 @@ def search( ... feature_source="WQP", ... feature_id="USGS-01031500", ... navigation_mode="UM", - ... find='flowlines', + ... find="flowlines", ... ) >>> # Get registered features for a feature source: WQP, >>> # feature id: USGS-01031500 @@ -385,53 +391,57 @@ def search( >>> search_results = dataretrieval.nldi.search( ... feature_source="WQP", ... feature_id="USGS-01031500", - ... data_source='census2020-nhdpv2', - ... navigation_mode='UM', - ... find='features', + ... data_source="census2020-nhdpv2", + ... navigation_mode="UM", + ... find="features", ... ) >>> # Search for features for comid: 13294314, >>> # and data source: census2020-nhdpv2 in the upstream main >>> search_results = dataretrieval.nldi.search( - ... comid=13294314, data_source="census2020-nhdpv2", navigation_mode="UM" + ... comid=13294314, + ... data_source="census2020-nhdpv2", + ... navigation_mode="UM", ... ) >>> # Search for flowlines for comid: 13294314 in the upstream main >>> search_results = dataretrieval.nldi.search( ... comid=13294314, navigation_mode="UM", find="flowlines" ... ) >>> # Search for features for latitude: 43.073051 and longitude: -89.401230 - >>> search_results = dataretrieval.nldi.search(lat=43.073051, long=-89.401230) + >>> search_results = dataretrieval.nldi.search( + ... lat=43.073051, long=-89.401230 + ... ) """ if (lat and long is None) or (long and lat is None): - raise ValueError('Both lat and long are required') + raise ValueError("Both lat and long are required") # validate find find = find.lower() - if find not in ('basin', 'flowlines', 'features'): + if find not in ("basin", "flowlines", "features"): raise ValueError( - f'Invalid value for find: {find} - allowed values are:' + f"Invalid value for find: {find} - allowed values are:" f" 'basin', 'flowlines', or 'features'" ) - if lat and find != 'features': + if lat and find != "features": raise ValueError( - f'Invalid value for find: {find} - lat/long is to get features not {find}' + f"Invalid value for find: {find} - lat/long is to get features not {find}" ) - if comid and find == 'basin': + if comid and find == "basin": raise ValueError( - 'Invalid value for find: basin - comid is to get features' - ' or flowlines not basin' + "Invalid value for find: basin - comid is to get features" + " or flowlines not basin" ) if lat: # get features by hydrologic location return get_features(lat=lat, long=long, as_json=True) - if find == 'basin': + if find == "basin": return get_basin( feature_source=feature_source, feature_id=feature_id, as_json=True ) - if find == 'flowlines': + if find == "flowlines": return get_flowlines( navigation_mode=navigation_mode, distance=distance, @@ -459,22 +469,22 @@ def _validate_data_source(data_source: str): # get the available data/feature sources - if not already cached if _AVAILABLE_DATA_SOURCES is None: - url = f'{NLDI_API_BASE_URL}/' + url = f"{NLDI_API_BASE_URL}/" available_data_sources = _query_nldi( - url, {}, 'Error getting available data sources' + url, {}, "Error getting available data sources" ) - _AVAILABLE_DATA_SOURCES = [ds['source'] for ds in available_data_sources] + _AVAILABLE_DATA_SOURCES = [ds["source"] for ds in available_data_sources] if data_source not in _AVAILABLE_DATA_SOURCES: err_msg = ( f"Invalid data source '{data_source}'." - f' Available data sources are: {_AVAILABLE_DATA_SOURCES}' + f" Available data sources are: {_AVAILABLE_DATA_SOURCES}" ) raise ValueError(err_msg) def _validate_navigation_mode(navigation_mode: str): navigation_mode = navigation_mode.upper() - if navigation_mode not in ('UM', 'DM', 'UT', 'DD'): + if navigation_mode not in ("UM", "DM", "UT", "DD"): raise TypeError(f"Invalid navigation mode '{navigation_mode}'") @@ -482,15 +492,15 @@ def _validate_feature_source_comid( feature_source: Optional[str], feature_id: Optional[str], comid: Optional[int] ): if feature_source is not None and feature_id is None: - raise ValueError('feature_id is required if feature_source is provided') + raise ValueError("feature_id is required if feature_source is provided") if feature_id is not None and feature_source is None: - raise ValueError('feature_source is required if feature_id is provided') + raise ValueError("feature_source is required if feature_id is provided") if comid is not None and feature_source is not None: raise ValueError( - 'Specify only one origin type - comid and feature_source' - ' cannot be provided together' + "Specify only one origin type - comid and feature_source" + " cannot be provided together" ) if comid is None and feature_source is None: raise ValueError( - 'Specify one origin type - comid or feature_source is required' + "Specify one origin type - comid or feature_source is required" ) diff --git a/dataretrieval/nwis.py b/dataretrieval/nwis.py index f6d24fa..60c132d 100644 --- a/dataretrieval/nwis.py +++ b/dataretrieval/nwis.py @@ -28,21 +28,21 @@ except ImportError: gpd = None -WATERDATA_BASE_URL = 'https://nwis.waterdata.usgs.gov/' -WATERDATA_URL = WATERDATA_BASE_URL + 'nwis/' -WATERSERVICE_URL = 'https://waterservices.usgs.gov/nwis/' -PARAMCODES_URL = 'https://help.waterdata.usgs.gov/code/parameter_cd_nm_query?' -ALLPARAMCODES_URL = 'https://help.waterdata.usgs.gov/code/parameter_cd_query?' +WATERDATA_BASE_URL = "https://nwis.waterdata.usgs.gov/" +WATERDATA_URL = WATERDATA_BASE_URL + "nwis/" +WATERSERVICE_URL = "https://waterservices.usgs.gov/nwis/" +PARAMCODES_URL = "https://help.waterdata.usgs.gov/code/parameter_cd_nm_query?" +ALLPARAMCODES_URL = "https://help.waterdata.usgs.gov/code/parameter_cd_query?" -WATERSERVICES_SERVICES = ['dv', 'iv', 'site', 'stat'] +WATERSERVICES_SERVICES = ["dv", "iv", "site", "stat"] WATERDATA_SERVICES = [ - 'qwdata', - 'gwlevels', - 'measurements', - 'peaks', - 'pmcodes', - 'water_use', - 'ratings', + "qwdata", + "gwlevels", + "measurements", + "peaks", + "pmcodes", + "water_use", + "ratings", ] _CRS = "EPSG:4236" @@ -73,34 +73,31 @@ def format_response( The formatted data frame """ - mi = kwargs.pop('multi_index', True) + mi = kwargs.pop("multi_index", True) - if service == 'peaks': + if service == "peaks": df = preformat_peaks_response(df) if gpd is not None: if "dec_lat_va" in list(df): - geoms = gpd.points_from_xy( - df.dec_long_va.values, - df.dec_lat_va.values - ) + geoms = gpd.points_from_xy(df.dec_long_va.values, df.dec_lat_va.values) df = gpd.GeoDataFrame(df, geometry=geoms, crs=_CRS) # check for multiple sites: - if 'datetime' not in df.columns: + if "datetime" not in df.columns: # XXX: consider making site_no index return df - elif len(df['site_no'].unique()) > 1 and mi: + elif len(df["site_no"].unique()) > 1 and mi: # setup multi-index - df.set_index(['site_no', 'datetime'], inplace=True) - if hasattr(df.index.levels[1], 'tzinfo') and df.index.levels[1].tzinfo is None: - df = df.tz_localize('UTC', level=1) + df.set_index(["site_no", "datetime"], inplace=True) + if hasattr(df.index.levels[1], "tzinfo") and df.index.levels[1].tzinfo is None: + df = df.tz_localize("UTC", level=1) else: - df.set_index(['datetime'], inplace=True) - if hasattr(df.index, 'tzinfo') and df.index.tzinfo is None: - df = df.tz_localize('UTC') + df.set_index(["datetime"], inplace=True) + if hasattr(df.index, "tzinfo") and df.index.tzinfo is None: + df = df.tz_localize("UTC") return df.sort_index() @@ -121,8 +118,8 @@ def preformat_peaks_response(df: pd.DataFrame) -> pd.DataFrame: The formatted data frame """ - df['datetime'] = pd.to_datetime(df.pop('peak_dt'), errors='coerce') - df.dropna(subset=['datetime'], inplace=True) + df["datetime"] = pd.to_datetime(df.pop("peak_dt"), errors="coerce") + df.dropna(subset=["datetime"], inplace=True) return df @@ -188,68 +185,72 @@ def get_qwdata( >>> # get water sample information for site 11447650 >>> df, md = dataretrieval.nwis.get_qwdata( - ... sites='11447650', start='2010-01-01', end='2010-02-01' + ... sites="11447650", start="2010-01-01", end="2010-02-01" ... ) """ - warnings.warn(('WARNING: Starting in March 2024, the NWIS qw data endpoint is ' - 'retiring and no longer receives updates. For more information, ' - 'refer to https://waterdata.usgs.gov.nwis/qwdata and ' - 'https://doi-usgs.github.io/dataRetrieval/articles/Status.html ' - 'or email CompTools@usgs.gov.')) + warnings.warn( + ( + "WARNING: Starting in March 2024, the NWIS qw data endpoint is " + "retiring and no longer receives updates. For more information, " + "refer to https://waterdata.usgs.gov.nwis/qwdata and " + "https://doi-usgs.github.io/dataRetrieval/articles/Status.html " + "or email CompTools@usgs.gov." + ) + ) _check_sites_value_types(sites) - kwargs['site_no'] = kwargs.pop('site_no', sites) - kwargs['begin_date'] = kwargs.pop('begin_date', start) - kwargs['end_date'] = kwargs.pop('end_date', end) - kwargs['multi_index'] = multi_index + kwargs["site_no"] = kwargs.pop("site_no", sites) + kwargs["begin_date"] = kwargs.pop("begin_date", start) + kwargs["end_date"] = kwargs.pop("end_date", end) + kwargs["multi_index"] = multi_index if wide_format: - kwargs['qw_sample_wide'] = 'qw_sample_wide' + kwargs["qw_sample_wide"] = "qw_sample_wide" payload = { - 'agency_cd': 'USGS', - 'format': 'rdb', - 'pm_cd_compare': 'Greater than', - 'inventory_output': '0', - 'rdb_inventory_output': 'file', - 'TZoutput': '0', - 'rdb_qw_attributes': 'expanded', - 'date_format': 'YYYY-MM-DD', - 'rdb_compression': 'value', - 'submitted_form': 'brief_list', + "agency_cd": "USGS", + "format": "rdb", + "pm_cd_compare": "Greater than", + "inventory_output": "0", + "rdb_inventory_output": "file", + "TZoutput": "0", + "rdb_qw_attributes": "expanded", + "date_format": "YYYY-MM-DD", + "rdb_compression": "value", + "submitted_form": "brief_list", } # check for parameter codes, and reformat query args - qwdata_parameter_code_field = 'parameterCd' + qwdata_parameter_code_field = "parameterCd" if kwargs.get(qwdata_parameter_code_field): parameter_codes = kwargs.pop(qwdata_parameter_code_field) parameter_codes = to_str(parameter_codes) - kwargs['multiple_parameter_cds'] = parameter_codes - kwargs['param_cd_operator'] = 'OR' + kwargs["multiple_parameter_cds"] = parameter_codes + kwargs["param_cd_operator"] = "OR" - search_criteria = kwargs.get('list_of_search_criteria') + search_criteria = kwargs.get("list_of_search_criteria") if search_criteria: - kwargs['list_of_search_criteria'] = '{},{}'.format( - search_criteria, 'multiple_parameter_cds' + kwargs["list_of_search_criteria"] = "{},{}".format( + search_criteria, "multiple_parameter_cds" ) else: - kwargs['list_of_search_criteria'] = 'multiple_parameter_cds' + kwargs["list_of_search_criteria"] = "multiple_parameter_cds" kwargs.update(payload) warnings.warn( - 'NWIS qw web services are being retired. ' - + 'See this note from the R package for more: ' - + 'https://doi-usgs.github.io/dataRetrieval/articles/qwdata_changes.html', + "NWIS qw web services are being retired. " + + "See this note from the R package for more: " + + "https://doi-usgs.github.io/dataRetrieval/articles/qwdata_changes.html", category=DeprecationWarning, ) - response = query_waterdata('qwdata', ssl_check=ssl_check, **kwargs) + response = query_waterdata("qwdata", ssl_check=ssl_check, **kwargs) df = _read_rdb(response.text) if datetime_index is True: - df = format_datetime(df, 'sample_dt', 'sample_tm', 'sample_start_time_datum_cd') + df = format_datetime(df, "sample_dt", "sample_tm", "sample_start_time_datum_cd") return format_response(df, **kwargs), NWIS_Metadata(response, **kwargs) @@ -294,27 +295,25 @@ def get_discharge_measurements( >>> # Get discharge measurements for site 05114000 >>> df, md = dataretrieval.nwis.get_discharge_measurements( - ... sites='05114000', start='2000-01-01', end='2000-01-30' + ... sites="05114000", start="2000-01-01", end="2000-01-30" ... ) >>> # Get discharge measurements for sites in Alaska >>> df, md = dataretrieval.nwis.get_discharge_measurements( - ... start='2012-01-09', end='2012-01-10', stateCd='AK' + ... start="2012-01-09", end="2012-01-10", stateCd="AK" ... ) """ _check_sites_value_types(sites) - kwargs['site_no'] = kwargs.pop('site_no', sites) - kwargs['begin_date'] = kwargs.pop('begin_date', start) - kwargs['end_date'] = kwargs.pop('end_date', end) + kwargs["site_no"] = kwargs.pop("site_no", sites) + kwargs["begin_date"] = kwargs.pop("begin_date", start) + kwargs["end_date"] = kwargs.pop("end_date", end) - if 'format' not in kwargs: - kwargs['format'] = 'rdb' + if "format" not in kwargs: + kwargs["format"] = "rdb" - response = query_waterdata( - 'measurements', ssl_check=ssl_check, **kwargs - ) + response = query_waterdata("measurements", ssl_check=ssl_check, **kwargs) return _read_rdb(response.text), NWIS_Metadata(response, **kwargs) @@ -362,34 +361,34 @@ def get_discharge_peaks( >>> # Get discharge peaks for site 01491000 >>> df, md = dataretrieval.nwis.get_discharge_peaks( - ... sites='01491000', start='1980-01-01', end='1990-01-01' + ... sites="01491000", start="1980-01-01", end="1990-01-01" ... ) >>> # Get discharge peaks for sites in Hawaii >>> df, md = dataretrieval.nwis.get_discharge_peaks( - ... start='1980-01-01', end='1980-01-02', stateCd='HI' + ... start="1980-01-01", end="1980-01-02", stateCd="HI" ... ) """ _check_sites_value_types(sites) - kwargs['site_no'] = kwargs.pop('site_no', sites) - kwargs['begin_date'] = kwargs.pop('begin_date', start) - kwargs['end_date'] = kwargs.pop('end_date', end) - kwargs['multi_index'] = multi_index + kwargs["site_no"] = kwargs.pop("site_no", sites) + kwargs["begin_date"] = kwargs.pop("begin_date", start) + kwargs["end_date"] = kwargs.pop("end_date", end) + kwargs["multi_index"] = multi_index - response = query_waterdata('peaks', format='rdb', ssl_check=ssl_check, **kwargs) + response = query_waterdata("peaks", format="rdb", ssl_check=ssl_check, **kwargs) df = _read_rdb(response.text) - return format_response(df, service='peaks', **kwargs), NWIS_Metadata( + return format_response(df, service="peaks", **kwargs), NWIS_Metadata( response, **kwargs ) def get_gwlevels( sites: Optional[Union[List[str], str]] = None, - start: str = '1851-01-01', + start: str = "1851-01-01", end: Optional[str] = None, multi_index: bool = True, datetime_index: bool = True, @@ -433,33 +432,33 @@ def get_gwlevels( .. doctest:: >>> # Get groundwater levels for site 434400121275801 - >>> df, md = dataretrieval.nwis.get_gwlevels(sites='434400121275801') + >>> df, md = dataretrieval.nwis.get_gwlevels(sites="434400121275801") """ _check_sites_value_types(sites) # Make kwargs backwards compatible with waterservices # vocabulary - if 'startDT' in kwargs: - kwargs['begin_date'] = kwargs.pop('startDT') - if 'endDT' in kwargs: - kwargs['end_date'] = kwargs.pop('endDT') - if 'sites' in kwargs: - kwargs['site_no'] = kwargs.pop('sites') - if 'stateCd'in kwargs: - kwargs['state_cd'] = kwargs.pop('stateCd') - - kwargs['begin_date'] = kwargs.pop('begin_date', start) - kwargs['end_date'] = kwargs.pop('end_date', end) - kwargs['site_no'] = kwargs.pop('site_no', sites) - kwargs['multi_index'] = multi_index - - response = query_waterdata('gwlevels', format = 'rdb', ssl_check=ssl_check, **kwargs) + if "startDT" in kwargs: + kwargs["begin_date"] = kwargs.pop("startDT") + if "endDT" in kwargs: + kwargs["end_date"] = kwargs.pop("endDT") + if "sites" in kwargs: + kwargs["site_no"] = kwargs.pop("sites") + if "stateCd" in kwargs: + kwargs["state_cd"] = kwargs.pop("stateCd") + + kwargs["begin_date"] = kwargs.pop("begin_date", start) + kwargs["end_date"] = kwargs.pop("end_date", end) + kwargs["site_no"] = kwargs.pop("site_no", sites) + kwargs["multi_index"] = multi_index + + response = query_waterdata("gwlevels", format="rdb", ssl_check=ssl_check, **kwargs) df = _read_rdb(response.text) if datetime_index is True: - df = format_datetime(df, 'lev_dt', 'lev_tm', 'lev_tz_cd') + df = format_datetime(df, "lev_dt", "lev_tm", "lev_tz_cd") return format_response(df, **kwargs), NWIS_Metadata(response, **kwargs) @@ -507,19 +506,19 @@ def get_stats( >>> # Get annual water statistics for a site >>> df, md = dataretrieval.nwis.get_stats( - ... sites='01646500', statReportType='annual', statYearType='water' + ... sites="01646500", statReportType="annual", statYearType="water" ... ) >>> # Get monthly statistics for a site >>> df, md = dataretrieval.nwis.get_stats( - ... sites='01646500', statReportType='monthly' + ... sites="01646500", statReportType="monthly" ... ) """ _check_sites_value_types(sites) response = query_waterservices( - service='stat', sites=sites, ssl_check=ssl_check, **kwargs + service="stat", sites=sites, ssl_check=ssl_check, **kwargs ) return _read_rdb(response.text), NWIS_Metadata(response, **kwargs) @@ -546,24 +545,24 @@ def query_waterdata( request: ``requests.models.Response`` The response object from the API request to the web service """ - major_params = ['site_no', 'state_cd'] + major_params = ["site_no", "state_cd"] bbox_params = [ - 'nw_longitude_va', - 'nw_latitude_va', - 'se_longitude_va', - 'se_latitude_va', + "nw_longitude_va", + "nw_latitude_va", + "se_longitude_va", + "se_latitude_va", ] if not any(key in kwargs for key in major_params + bbox_params): - raise TypeError('Query must specify a major filter: site_no, stateCd, bBox') + raise TypeError("Query must specify a major filter: site_no, stateCd, bBox") elif any(key in kwargs for key in bbox_params) and not all( key in kwargs for key in bbox_params ): - raise TypeError('One or more lat/long coordinates missing or invalid.') + raise TypeError("One or more lat/long coordinates missing or invalid.") if service not in WATERDATA_SERVICES: - raise TypeError('Service not recognized') + raise TypeError("Service not recognized") url = WATERDATA_URL + service @@ -616,17 +615,17 @@ def query_waterservices( """ if not any( - key in kwargs for key in ['sites', 'stateCd', 'bBox', 'huc', 'countyCd'] + key in kwargs for key in ["sites", "stateCd", "bBox", "huc", "countyCd"] ): raise TypeError( - 'Query must specify a major filter: sites, stateCd, bBox, huc, or countyCd' + "Query must specify a major filter: sites, stateCd, bBox, huc, or countyCd" ) if service not in WATERSERVICES_SERVICES: - raise TypeError('Service not recognized') + raise TypeError("Service not recognized") - if 'format' not in kwargs: - kwargs['format'] = 'rdb' + if "format" not in kwargs: + kwargs["format"] = "rdb" url = WATERSERVICE_URL + service @@ -681,21 +680,24 @@ def get_dv( >>> # Get mean statistic daily values for site 04085427 >>> df, md = dataretrieval.nwis.get_dv( - ... sites='04085427', start='2012-01-01', end='2012-06-30', statCd='00003' + ... sites="04085427", + ... start="2012-01-01", + ... end="2012-06-30", + ... statCd="00003", ... ) >>> # Get the latest daily values for site 01646500 - >>> df, md = dataretrieval.nwis.get_dv(sites='01646500') + >>> df, md = dataretrieval.nwis.get_dv(sites="01646500") """ _check_sites_value_types(sites) - kwargs['startDT'] = kwargs.pop('startDT', start) - kwargs['endDT'] = kwargs.pop('endDT', end) - kwargs['sites'] = kwargs.pop('sites', sites) - kwargs['multi_index'] = multi_index + kwargs["startDT"] = kwargs.pop("startDT", start) + kwargs["endDT"] = kwargs.pop("endDT", end) + kwargs["sites"] = kwargs.pop("sites", sites) + kwargs["multi_index"] = multi_index - response = query_waterservices('dv', format='json', ssl_check=ssl_check, **kwargs) + response = query_waterservices("dv", format="json", ssl_check=ssl_check, **kwargs) df = _read_json(response.json()) return format_response(df, **kwargs), NWIS_Metadata(response, **kwargs) @@ -787,27 +789,30 @@ def get_info(ssl_check: bool = True, **kwargs) -> Tuple[pd.DataFrame, BaseMetada .. doctest:: >>> # Get site information for a single site - >>> df, md = dataretrieval.nwis.get_info(sites='05114000') + >>> df, md = dataretrieval.nwis.get_info(sites="05114000") >>> # Get site information for multiple sites - >>> df, md = dataretrieval.nwis.get_info(sites=['05114000', '09423350']) + >>> df, md = dataretrieval.nwis.get_info(sites=["05114000", "09423350"]) """ - seriesCatalogOutput = kwargs.pop('seriesCatalogOutput', None) - if seriesCatalogOutput in ['True', 'TRUE', 'true', True]: - - warnings.warn(('WARNING: Starting in March 2024, the NWIS qw data endpoint is ' - 'retiring and no longer receives updates. For more information, ' - 'refer to https://waterdata.usgs.gov.nwis/qwdata and ' - 'https://doi-usgs.github.io/dataRetrieval/articles/Status.html ' - 'or email CompTools@usgs.gov.')) + seriesCatalogOutput = kwargs.pop("seriesCatalogOutput", None) + if seriesCatalogOutput in ["True", "TRUE", "true", True]: + warnings.warn( + ( + "WARNING: Starting in March 2024, the NWIS qw data endpoint is " + "retiring and no longer receives updates. For more information, " + "refer to https://waterdata.usgs.gov.nwis/qwdata and " + "https://doi-usgs.github.io/dataRetrieval/articles/Status.html " + "or email CompTools@usgs.gov." + ) + ) # convert bool to string if necessary - kwargs['seriesCatalogOutput'] = 'True' + kwargs["seriesCatalogOutput"] = "True" else: # cannot have both seriesCatalogOutput and the expanded format - kwargs['siteOutput'] = 'Expanded' + kwargs["siteOutput"] = "Expanded" - response = query_waterservices('site', ssl_check=ssl_check, **kwargs) + response = query_waterservices("site", ssl_check=ssl_check, **kwargs) return _read_rdb(response.text), NWIS_Metadata(response, **kwargs) @@ -860,22 +865,22 @@ def get_iv( >>> # Get instantaneous discharge data for site 05114000 >>> df, md = dataretrieval.nwis.get_iv( - ... sites='05114000', - ... start='2013-11-03', - ... end='2013-11-03', - ... parameterCd='00060', + ... sites="05114000", + ... start="2013-11-03", + ... end="2013-11-03", + ... parameterCd="00060", ... ) """ _check_sites_value_types(sites) - kwargs['startDT'] = kwargs.pop('startDT', start) - kwargs['endDT'] = kwargs.pop('endDT', end) - kwargs['sites'] = kwargs.pop('sites', sites) - kwargs['multi_index'] = multi_index + kwargs["startDT"] = kwargs.pop("startDT", start) + kwargs["endDT"] = kwargs.pop("endDT", end) + kwargs["sites"] = kwargs.pop("sites", sites) + kwargs["multi_index"] = multi_index response = query_waterservices( - service='iv', format='json', ssl_check=ssl_check, **kwargs + service="iv", format="json", ssl_check=ssl_check, **kwargs ) df = _read_json(response.json()) @@ -883,7 +888,7 @@ def get_iv( def get_pmcodes( - parameterCd: Union[str, List[str]] = 'All', + parameterCd: Union[str, List[str]] = "All", partial: bool = True, ssl_check: bool = True, ) -> Tuple[pd.DataFrame, BaseMetadata]: @@ -913,21 +918,23 @@ def get_pmcodes( .. doctest:: >>> # Get information about the '00060' pcode - >>> df, md = dataretrieval.nwis.get_pmcodes(parameterCd='00060', partial=False) + >>> df, md = dataretrieval.nwis.get_pmcodes( + ... parameterCd="00060", partial=False + ... ) >>> # Get information about all 'Discharge' pcodes >>> df, md = dataretrieval.nwis.get_pmcodes( - ... parameterCd='Discharge', partial=True + ... parameterCd="Discharge", partial=True ... ) """ - payload = {'fmt': 'rdb'} + payload = {"fmt": "rdb"} url = PARAMCODES_URL if isinstance(parameterCd, str): # when a single code or name is given - if parameterCd.lower() == 'all': - payload.update({'group_cd': '%'}) + if parameterCd.lower() == "all": + payload.update({"group_cd": "%"}) url = ALLPARAMCODES_URL response = query(url, payload, ssl_check=ssl_check) return _read_rdb(response.text), NWIS_Metadata(response) @@ -937,7 +944,7 @@ def get_pmcodes( if not isinstance(parameterCd, list): raise TypeError( - 'Parameter information (code or name) must be type string or list' + "Parameter information (code or name) must be type string or list" ) # Querying with a list of parameters names, codes, or mixed @@ -945,25 +952,25 @@ def get_pmcodes( for param in parameterCd: if isinstance(param, str): if partial: - param = f'%{param}%' - payload.update({'parm_nm_cd': param}) + param = f"%{param}%" + payload.update({"parm_nm_cd": param}) response = query(url, payload, ssl_check=ssl_check) if len(response.text.splitlines()) < 10: # empty query raise TypeError( - 'One of the parameter codes or names entered does not' - 'return any information, please try a different value' + "One of the parameter codes or names entered does not" + "return any information, please try a different value" ) return_list.append(_read_rdb(response.text)) else: - raise TypeError('Parameter information (code or name) must be type string') + raise TypeError("Parameter information (code or name) must be type string") return pd.concat(return_list), NWIS_Metadata(response) def get_water_use( - years: Union[str, List[str]] = 'ALL', + years: Union[str, List[str]] = "ALL", state: Optional[str] = None, - counties: Union[str, List[str]] = 'ALL', - categories: Union[str, List[str]] = 'ALL', + counties: Union[str, List[str]] = "ALL", + categories: Union[str, List[str]] = "ALL", ssl_check: bool = True, ) -> Tuple[pd.DataFrame, BaseMetadata]: """ @@ -998,48 +1005,48 @@ def get_water_use( >>> # Get total population for RI from the NWIS water use service >>> df, md = dataretrieval.nwis.get_water_use( - ... years='2000', state='RI', categories='TP' + ... years="2000", state="RI", categories="TP" ... ) >>> # Get the national total water use for livestock in Bgal/day - >>> df, md = dataretrieval.nwis.get_water_use(years='2010', categories='L') + >>> df, md = dataretrieval.nwis.get_water_use(years="2010", categories="L") >>> # Get 2005 domestic water use for Apache County in Arizona >>> df, md = dataretrieval.nwis.get_water_use( - ... years='2005', state='Arizona', counties='001', categories='DO' + ... years="2005", state="Arizona", counties="001", categories="DO" ... ) """ if years: if not isinstance(years, list) and not isinstance(years, str): - raise TypeError('years must be a string or a list of strings') + raise TypeError("years must be a string or a list of strings") if counties: if not isinstance(counties, list) and not isinstance(counties, str): - raise TypeError('counties must be a string or a list of strings') + raise TypeError("counties must be a string or a list of strings") if categories: if not isinstance(categories, list) and not isinstance(categories, str): - raise TypeError('categories must be a string or a list of strings') + raise TypeError("categories must be a string or a list of strings") payload = { - 'rdb_compression': 'value', - 'format': 'rdb', - 'wu_year': years, - 'wu_category': categories, - 'wu_county': counties, + "rdb_compression": "value", + "format": "rdb", + "wu_year": years, + "wu_category": categories, + "wu_county": counties, } - url = WATERDATA_URL + 'water_use' + url = WATERDATA_URL + "water_use" if state is not None: - url = WATERDATA_BASE_URL + state + '/nwis/water_use' - payload.update({'wu_area': 'county'}) + url = WATERDATA_BASE_URL + state + "/nwis/water_use" + payload.update({"wu_area": "county"}) response = query(url, payload, ssl_check=ssl_check) return _read_rdb(response.text), NWIS_Metadata(response) def get_ratings( site: Optional[str] = None, - file_type: str = 'base', + file_type: str = "base", ssl_check: bool = True, **kwargs, ) -> Tuple[pd.DataFrame, BaseMetadata]: @@ -1075,21 +1082,21 @@ def get_ratings( .. doctest:: >>> # Get the rating table for USGS streamgage 01594440 - >>> df, md = dataretrieval.nwis.get_ratings(site='01594440') + >>> df, md = dataretrieval.nwis.get_ratings(site="01594440") """ - site = kwargs.pop('site_no', site) + site = kwargs.pop("site_no", site) payload = {} - url = WATERDATA_BASE_URL + 'nwisweb/get_ratings/' + url = WATERDATA_BASE_URL + "nwisweb/get_ratings/" if site is not None: - payload.update({'site_no': site}) + payload.update({"site_no": site}) if file_type is not None: - if file_type not in ['base', 'corr', 'exsa']: + if file_type not in ["base", "corr", "exsa"]: raise ValueError( f'Unrecognized file_type: {file_type}, must be "base", "corr" or "exsa"' ) - payload.update({'file_type': file_type}) + payload.update({"file_type": file_type}) response = query(url, payload, ssl_check=ssl_check) return _read_rdb(response.text), NWIS_Metadata(response, site_no=site) @@ -1118,14 +1125,16 @@ def what_sites(ssl_check: bool = True, **kwargs) -> Tuple[pd.DataFrame, BaseMeta .. doctest:: >>> # get information about a single site - >>> df, md = dataretrieval.nwis.what_sites(sites='05114000') + >>> df, md = dataretrieval.nwis.what_sites(sites="05114000") >>> # get information about sites with phosphorus in Ohio - >>> df, md = dataretrieval.nwis.what_sites(stateCd='OH', parameterCd='00665') + >>> df, md = dataretrieval.nwis.what_sites( + ... stateCd="OH", parameterCd="00665" + ... ) """ - response = query_waterservices(service='site', ssl_check=ssl_check, **kwargs) + response = query_waterservices(service="site", ssl_check=ssl_check, **kwargs) df = _read_rdb(response.text) @@ -1140,7 +1149,7 @@ def get_record( wide_format: bool = True, datetime_index: bool = True, state: Optional[str] = None, - service: str = 'iv', + service: str = "iv", ssl_check: bool = True, **kwargs, ) -> pd.DataFrame: @@ -1197,54 +1206,58 @@ def get_record( .. doctest:: >>> # Get latest instantaneous data from site 01585200 - >>> df = dataretrieval.nwis.get_record(sites='01585200', service='iv') + >>> df = dataretrieval.nwis.get_record(sites="01585200", service="iv") >>> # Get latest daily mean data from site 01585200 - >>> df = dataretrieval.nwis.get_record(sites='01585200', service='dv') + >>> df = dataretrieval.nwis.get_record(sites="01585200", service="dv") >>> # Get all discrete sample data from site 01585200 - >>> df = dataretrieval.nwis.get_record(sites='01585200', service='qwdata') + >>> df = dataretrieval.nwis.get_record(sites="01585200", service="qwdata") >>> # Get site description for site 01585200 - >>> df = dataretrieval.nwis.get_record(sites='01585200', service='site') + >>> df = dataretrieval.nwis.get_record(sites="01585200", service="site") >>> # Get discharge measurements for site 01585200 - >>> df = dataretrieval.nwis.get_record(sites='01585200', service='measurements') + >>> df = dataretrieval.nwis.get_record( + ... sites="01585200", service="measurements" + ... ) >>> # Get discharge peaks for site 01585200 - >>> df = dataretrieval.nwis.get_record(sites='01585200', service='peaks') + >>> df = dataretrieval.nwis.get_record(sites="01585200", service="peaks") >>> # Get latest groundwater level for site 434400121275801 >>> df = dataretrieval.nwis.get_record( - ... sites='434400121275801', service='gwlevels' + ... sites="434400121275801", service="gwlevels" ... ) >>> # Get information about the discharge parameter code - >>> df = dataretrieval.nwis.get_record(service='pmcodes', parameterCd='00060') + >>> df = dataretrieval.nwis.get_record( + ... service="pmcodes", parameterCd="00060" + ... ) >>> # Get water use data for livestock nationally in 2010 >>> df = dataretrieval.nwis.get_record( - ... service='water_use', years='2010', categories='L' + ... service="water_use", years="2010", categories="L" ... ) >>> # Get rating table for USGS streamgage 01585200 - >>> df = dataretrieval.nwis.get_record(sites='01585200', service='ratings') + >>> df = dataretrieval.nwis.get_record(sites="01585200", service="ratings") >>> # Get annual statistics for USGS station 01646500 >>> df = dataretrieval.nwis.get_record( - ... sites='01646500', - ... service='stat', - ... statReportType='annual', - ... statYearType='water', + ... sites="01646500", + ... service="stat", + ... statReportType="annual", + ... statYearType="water", ... ) """ _check_sites_value_types(sites) if service not in WATERSERVICES_SERVICES + WATERDATA_SERVICES: - raise TypeError(f'Unrecognized service: {service}') + raise TypeError(f"Unrecognized service: {service}") - if service == 'iv': + if service == "iv": df, _ = get_iv( sites=sites, startDT=start, @@ -1255,7 +1268,7 @@ def get_record( ) return df - elif service == 'dv': + elif service == "dv": df, _ = get_dv( sites=sites, startDT=start, @@ -1266,7 +1279,7 @@ def get_record( ) return df - elif service == 'qwdata': + elif service == "qwdata": df, _ = get_qwdata( site_no=sites, begin_date=start, @@ -1278,17 +1291,17 @@ def get_record( ) return df - elif service == 'site': + elif service == "site": df, _ = get_info(sites=sites, ssl_check=ssl_check, **kwargs) return df - elif service == 'measurements': + elif service == "measurements": df, _ = get_discharge_measurements( site_no=sites, begin_date=start, end_date=end, ssl_check=ssl_check, **kwargs ) return df - elif service == 'peaks': + elif service == "peaks": df, _ = get_discharge_peaks( site_no=sites, begin_date=start, @@ -1299,7 +1312,7 @@ def get_record( ) return df - elif service == 'gwlevels': + elif service == "gwlevels": df, _ = get_gwlevels( sites=sites, startDT=start, @@ -1311,24 +1324,24 @@ def get_record( ) return df - elif service == 'pmcodes': + elif service == "pmcodes": df, _ = get_pmcodes(ssl_check=ssl_check, **kwargs) return df - elif service == 'water_use': + elif service == "water_use": df, _ = get_water_use(state=state, ssl_check=ssl_check, **kwargs) return df - elif service == 'ratings': + elif service == "ratings": df, _ = get_ratings(site=sites, ssl_check=ssl_check, **kwargs) return df - elif service == 'stat': + elif service == "stat": df, _ = get_stats(sites=sites, ssl_check=ssl_check, **kwargs) return df else: - raise TypeError(f'{service} service not yet implemented') + raise TypeError(f"{service} service not yet implemented") def _read_json(json): @@ -1348,10 +1361,10 @@ def _read_json(json): A custom metadata object """ - merged_df = pd.DataFrame(columns=['site_no', 'datetime']) + merged_df = pd.DataFrame(columns=["site_no", "datetime"]) site_list = [ - ts['sourceInfo']['siteCode'][0]['value'] for ts in json['value']['timeSeries'] + ts["sourceInfo"]["siteCode"][0]["value"] for ts in json["value"]["timeSeries"] ] # create a list of indexes for each change in site no @@ -1368,33 +1381,33 @@ def _read_json(json): # grab a block containing timeseries 0:21, # which are all from the same site - site_block = json['value']['timeSeries'][start:end] + site_block = json["value"]["timeSeries"][start:end] if not site_block: continue - site_no = site_block[0]['sourceInfo']['siteCode'][0]['value'] - site_df = pd.DataFrame(columns=['datetime']) + site_no = site_block[0]["sourceInfo"]["siteCode"][0]["value"] + site_df = pd.DataFrame(columns=["datetime"]) for timeseries in site_block: - param_cd = timeseries['variable']['variableCode'][0]['value'] + param_cd = timeseries["variable"]["variableCode"][0]["value"] # check whether min, max, mean record XXX - option = timeseries['variable']['options']['option'][0].get('value') + option = timeseries["variable"]["options"]["option"][0].get("value") # loop through each parameter in timeseries, then concat to the merged_df - for parameter in timeseries['values']: + for parameter in timeseries["values"]: col_name = param_cd - method = parameter['method'][0]['methodDescription'] + method = parameter["method"][0]["methodDescription"] # if len(timeseries['values']) > 1 and method: if method: # get method, format it, and append to column name - method = method.strip('[]()').lower() - col_name = f'{col_name}_{method}' + method = method.strip("[]()").lower() + col_name = f"{col_name}_{method}" if option: - col_name = f'{col_name}_{option}' + col_name = f"{col_name}_{option}" - record_json = parameter['value'] + record_json = parameter["value"] if not record_json: # no data in record @@ -1406,33 +1419,33 @@ def _read_json(json): # Lists can't be hashed, thus we cannot df.merge on a list column record_df = pd.read_json( StringIO(record_json), - orient='records', - dtype={'value': 'float64', 'qualifiers': 'unicode'}, + orient="records", + dtype={"value": "float64", "qualifiers": "unicode"}, convert_dates=False, ) - record_df['qualifiers'] = ( - record_df['qualifiers'].str.strip('[]').str.replace("'", '') + record_df["qualifiers"] = ( + record_df["qualifiers"].str.strip("[]").str.replace("'", "") ) record_df.rename( columns={ - 'value': col_name, - 'dateTime': 'datetime', - 'qualifiers': col_name + '_cd', + "value": col_name, + "dateTime": "datetime", + "qualifiers": col_name + "_cd", }, inplace=True, ) - site_df = site_df.merge(record_df, how='outer', on='datetime') + site_df = site_df.merge(record_df, how="outer", on="datetime") # end of site loop - site_df['site_no'] = site_no + site_df["site_no"] = site_no merged_df = pd.concat([merged_df, site_df]) # convert to datetime, normalizing the timezone to UTC when doing so - if 'datetime' in merged_df.columns: - merged_df['datetime'] = pd.to_datetime(merged_df['datetime'], utc=True) + if "datetime" in merged_df.columns: + merged_df["datetime"] = pd.to_datetime(merged_df["datetime"], utc=True) return merged_df @@ -1456,28 +1469,28 @@ def _read_rdb(rdb): for line in rdb.splitlines(): # ignore comment lines - if line.startswith('#'): + if line.startswith("#"): count = count + 1 else: break - fields = re.split('[\t]', rdb.splitlines()[count]) - fields = [field.replace(',', '') for field in fields] + fields = re.split("[\t]", rdb.splitlines()[count]) + fields = [field.replace(",", "") for field in fields] dtypes = { - 'site_no': str, - 'dec_long_va': float, - 'dec_lat_va': float, - 'parm_cd': str, - 'parameter_cd': str, + "site_no": str, + "dec_long_va": float, + "dec_lat_va": float, + "parm_cd": str, + "parameter_cd": str, } df = pd.read_csv( StringIO(rdb), - delimiter='\t', + delimiter="\t", skiprows=count + 2, names=fields, - na_values='NaN', + na_values="NaN", dtype=dtypes, ) @@ -1488,7 +1501,7 @@ def _read_rdb(rdb): def _check_sites_value_types(sites): if sites: if not isinstance(sites, list) and not isinstance(sites, str): - raise TypeError('sites must be a string or a list of strings') + raise TypeError("sites must be a string or a list of strings") class NWIS_Metadata(BaseMetadata): @@ -1532,10 +1545,10 @@ def __init__(self, response, **parameters) -> None: """ super().__init__(response) - comments = '' + comments = "" for line in response.text.splitlines(): - if line.startswith('#'): - comments += line.lstrip('#') + '\n' + if line.startswith("#"): + comments += line.lstrip("#") + "\n" if comments: self.comment = comments @@ -1551,23 +1564,23 @@ def site_info(self) -> Optional[Tuple[pd.DataFrame, BaseMetadata]]: md: :obj:`dataretrieval.nwis.NWIS_Metadata` A NWIS_Metadata object """ - if 'site_no' in self._parameters: - return what_sites(sites=self._parameters['site_no']) + if "site_no" in self._parameters: + return what_sites(sites=self._parameters["site_no"]) - elif 'sites' in self._parameters: - return what_sites(sites=self._parameters['sites']) + elif "sites" in self._parameters: + return what_sites(sites=self._parameters["sites"]) - elif 'stateCd' in self._parameters: - return what_sites(stateCd=self._parameters['stateCd']) + elif "stateCd" in self._parameters: + return what_sites(stateCd=self._parameters["stateCd"]) - elif 'huc' in self._parameters: - return what_sites(huc=self._parameters['huc']) + elif "huc" in self._parameters: + return what_sites(huc=self._parameters["huc"]) - elif 'countyCd' in self._parameters: - return what_sites(countyCd=self._parameters['countyCd']) + elif "countyCd" in self._parameters: + return what_sites(countyCd=self._parameters["countyCd"]) - elif 'bBox' in self._parameters: - return what_sites(bBox=self._parameters['bBox']) + elif "bBox" in self._parameters: + return what_sites(bBox=self._parameters["bBox"]) else: return None # don't set metadata site_info attribute @@ -1575,5 +1588,5 @@ def site_info(self) -> Optional[Tuple[pd.DataFrame, BaseMetadata]]: @property def variable_info(self) -> Optional[Tuple[pd.DataFrame, BaseMetadata]]: # define variable_info metadata based on parameterCd if available - if 'parameterCd' in self._parameters: - return get_pmcodes(parameterCd=self._parameters['parameterCd']) + if "parameterCd" in self._parameters: + return get_pmcodes(parameterCd=self._parameters["parameterCd"]) diff --git a/dataretrieval/streamstats.py b/dataretrieval/streamstats.py index f6508f5..1de0b74 100644 --- a/dataretrieval/streamstats.py +++ b/dataretrieval/streamstats.py @@ -10,7 +10,7 @@ import requests -def download_workspace(workspaceID, format=''): +def download_workspace(workspaceID, format=""): """Function to download streamstats workspace. Parameters @@ -29,8 +29,8 @@ def download_workspace(workspaceID, format=''): geodatabase or shape files. """ - payload = {'workspaceID': workspaceID, 'format': format} - url = 'https://streamstats.usgs.gov/streamstatsservices/download' + payload = {"workspaceID": workspaceID, "format": format} + url = "https://streamstats.usgs.gov/streamstatsservices/download" r = requests.get(url, params=payload) @@ -58,7 +58,7 @@ def get_sample_watershed(): from the streamstats JSON object. """ - return get_watershed('NY', -74.524, 43.939) + return get_watershed("NY", -74.524, 43.939) def get_watershed( @@ -70,7 +70,7 @@ def get_watershed( includeflowtypes=False, includefeatures=True, simplify=True, - format='geojson', + format="geojson", ): """Get watershed object based on location @@ -114,29 +114,29 @@ def get_watershed( """ payload = { - 'rcode': rcode, - 'xlocation': xlocation, - 'ylocation': ylocation, - 'crs': crs, - 'includeparameters': includeparameters, - 'includeflowtypes': includeflowtypes, - 'includefeatures': includefeatures, - 'simplify': simplify, + "rcode": rcode, + "xlocation": xlocation, + "ylocation": ylocation, + "crs": crs, + "includeparameters": includeparameters, + "includeflowtypes": includeflowtypes, + "includefeatures": includefeatures, + "simplify": simplify, } - url = 'https://streamstats.usgs.gov/streamstatsservices/watershed.geojson' + url = "https://streamstats.usgs.gov/streamstatsservices/watershed.geojson" r = requests.get(url, params=payload) r.raise_for_status() - if format == 'geojson': + if format == "geojson": return r - if format == 'shape': + if format == "shape": # use Fiona to return a shape object pass - if format == 'object': + if format == "object": # return a python object pass @@ -150,10 +150,10 @@ class Watershed: @classmethod def from_streamstats_json(cls, streamstats_json): """Method that creates a Watershed object from a streamstats JSON.""" - cls.watershed_point = streamstats_json['featurecollection'][0]['feature'] - cls.watershed_polygon = streamstats_json['featurecollection'][1]['feature'] - cls.parameters = streamstats_json['parameters'] - cls._workspaceID = streamstats_json['workspaceID'] + cls.watershed_point = streamstats_json["featurecollection"][0]["feature"] + cls.watershed_polygon = streamstats_json["featurecollection"][1]["feature"] + cls.parameters = streamstats_json["parameters"] + cls._workspaceID = streamstats_json["workspaceID"] return cls def __init__(self, rcode, xlocation, ylocation): diff --git a/dataretrieval/utils.py b/dataretrieval/utils.py index 20a9e1c..53e95ac 100644 --- a/dataretrieval/utils.py +++ b/dataretrieval/utils.py @@ -1,6 +1,7 @@ """ Useful utilities for data munging. """ + import warnings import pandas as pd @@ -10,7 +11,7 @@ from dataretrieval.codes import tz -def to_str(listlike, delimiter=','): +def to_str(listlike, delimiter=","): """Translates list-like objects into strings. Parameters @@ -31,10 +32,10 @@ def to_str(listlike, delimiter=','): -------- .. doctest:: - >>> dataretrieval.utils.to_str([1, 'a', 2]) + >>> dataretrieval.utils.to_str([1, "a", 2]) '1,a,2' - >>> dataretrieval.utils.to_str([0, 10, 42], delimiter='+') + >>> dataretrieval.utils.to_str([0, 10, 42], delimiter="+") '0+10+42' """ @@ -77,18 +78,18 @@ def format_datetime(df, date_field, time_field, tz_field): # create a datetime index from the columns in qwdata response df[tz_field] = df[tz_field].map(tz) - df['datetime'] = pd.to_datetime( - df[date_field] + ' ' + df[time_field] + ' ' + df[tz_field], - format='ISO8601', + df["datetime"] = pd.to_datetime( + df[date_field] + " " + df[time_field] + " " + df[tz_field], + format="ISO8601", utc=True, ) # if there are any incomplete dates, warn the user - if df['datetime'].isna().any(): - count = df['datetime'].isna().sum() + if df["datetime"].isna().any(): + count = df["datetime"].isna().sum() warnings.warn( - f'Warning: {count} incomplete dates found, ' - + 'consider setting datetime_index to False.', + f"Warning: {count} incomplete dates found, " + + "consider setting datetime_index to False.", UserWarning, ) @@ -140,20 +141,20 @@ def __init__(self, response) -> None: @property def site_info(self): raise NotImplementedError( - 'site_info must be implemented by utils.BaseMetadata children' + "site_info must be implemented by utils.BaseMetadata children" ) @property def variable_info(self): raise NotImplementedError( - 'variable_info must be implemented by utils.BaseMetadata children' + "variable_info must be implemented by utils.BaseMetadata children" ) def __repr__(self) -> str: - return f'{type(self).__name__}(url={self.url})' + return f"{type(self).__name__}(url={self.url})" -def query(url, payload, delimiter=',', ssl_check=True): +def query(url, payload, delimiter=",", ssl_check=True): """Send a query. Wrapper for requests.get that handles errors, converts listed @@ -184,18 +185,18 @@ def query(url, payload, delimiter=',', ssl_check=True): # payload[index] = (key, to_str(value)) # define the user agent for the query - user_agent = {'user-agent': f'python-dataretrieval/{dataretrieval.__version__}'} + user_agent = {"user-agent": f"python-dataretrieval/{dataretrieval.__version__}"} response = requests.get(url, params=payload, headers=user_agent, verify=ssl_check) if response.status_code == 400: raise ValueError( - f'Bad Request, check that your parameters are correct. URL: {response.url}' + f"Bad Request, check that your parameters are correct. URL: {response.url}" ) elif response.status_code == 404: raise ValueError( - 'Page Not Found Error. May be the result of an empty query. ' - + f'URL: {response.url}' + "Page Not Found Error. May be the result of an empty query. " + + f"URL: {response.url}" ) elif response.status_code == 414: _reason = response.reason @@ -209,12 +210,12 @@ def query(url, payload, delimiter=',', ssl_check=True): start=start, end=end) \n data_list.append(data) # append results to list""" raise ValueError( - 'Request URL too long. Modify your query to use fewer sites. ' - + f'API response reason: {_reason}. Pseudo-code example of how to ' - + f'split your query: \n {_example}' + "Request URL too long. Modify your query to use fewer sites. " + + f"API response reason: {_reason}. Pseudo-code example of how to " + + f"split your query: \n {_example}" ) - if response.text.startswith('No sites/data'): + if response.text.startswith("No sites/data"): raise NoSitesError(response.url) return response @@ -228,6 +229,6 @@ def __init__(self, url): def __str__(self): return ( - 'No sites/data found using the selection criteria specified in url: ' - '{url}' + "No sites/data found using the selection criteria specified in url: " + "{url}" ).format(url=self.url) diff --git a/dataretrieval/waterwatch.py b/dataretrieval/waterwatch.py index a8444a6..fc35ecb 100644 --- a/dataretrieval/waterwatch.py +++ b/dataretrieval/waterwatch.py @@ -3,10 +3,10 @@ import pandas as pd import requests -ResponseFormat = 'json' # json, xml +ResponseFormat = "json" # json, xml # WaterWatch won't receive any new features but it will continue to operate. -waterwatch_url = 'https://waterwatch.usgs.gov/webservices/' +waterwatch_url = "https://waterwatch.usgs.gov/webservices/" def _read_json(data: Dict) -> pd.DataFrame: @@ -14,7 +14,7 @@ def _read_json(data: Dict) -> pd.DataFrame: def get_flood_stage( - sites: List[str] = None, fmt: str = 'DF' + sites: List[str] = None, fmt: str = "DF" ) -> Union[pd.DataFrame, Dict]: """ Retrieves flood stages for a list of station numbers. @@ -53,16 +53,16 @@ def get_flood_stage( 50057000 16 20 24 30 """ - res = requests.get(waterwatch_url + 'floodstage', params={'format': ResponseFormat}) + res = requests.get(waterwatch_url + "floodstage", params={"format": ResponseFormat}) if res.ok: json_res = res.json() stages = { - site['site_no']: {k: v for k, v in site.items() if k != 'site_no'} - for site in json_res['sites'] + site["site_no"]: {k: v for k, v in site.items() if k != "site_no"} + for site in json_res["sites"] } else: - raise requests.RequestException(f'[{res.status_code}] - {res.reason}') + raise requests.RequestException(f"[{res.status_code}] - {res.reason}") if not sites: stations_stages = stages @@ -74,7 +74,7 @@ def get_flood_stage( except KeyError: stations_stages[site] = None - if fmt == 'dict': + if fmt == "dict": return stations_stages else: return _read_json(stations_stages) diff --git a/dataretrieval/wqp.py b/dataretrieval/wqp.py index b006a0e..0194c3e 100644 --- a/dataretrieval/wqp.py +++ b/dataretrieval/wqp.py @@ -8,11 +8,12 @@ - implement other services like Organization, Activity, etc. """ + from __future__ import annotations -from typing import TYPE_CHECKING import warnings from io import StringIO +from typing import TYPE_CHECKING import pandas as pd @@ -22,27 +23,27 @@ from pandas import DataFrame -result_profiles_wqx3 = ['basicPhysChem', 'fullPhysChem', 'narrow'] -result_profiles_legacy = ['resultPhysChem', 'biological', 'narrowResult'] -activity_profiles_legacy = ['activityAll'] -services_wqx3 = ['Activity', 'Result', 'Station'] +result_profiles_wqx3 = ["basicPhysChem", "fullPhysChem", "narrow"] +result_profiles_legacy = ["biological", "narrowResult","resultPhysChem"] +activity_profiles_legacy = ["activityAll"] +services_wqx3 = ["Activity", "Result", "Station"] services_legacy = [ - 'Activity', - 'ActivityMetric', - 'BiologicalMetric', - 'Organization', - 'Project', - 'ProjectMonitoringLocationWeighting', - 'Result', - 'ResultDetectionQuantitationLimit', - 'Station', - ] + "Activity", + "ActivityMetric", + "BiologicalMetric", + "Organization", + "Project", + "ProjectMonitoringLocationWeighting", + "Result", + "ResultDetectionQuantitationLimit", + "Station", +] def get_results( - ssl_check=True, - legacy=True, - **kwargs, + ssl_check=True, + legacy=True, + **kwargs, ) -> tuple[DataFrame, WQP_Metadata]: """Query the WQP for results. @@ -73,7 +74,7 @@ def get_results( countycode : string US county FIPS code. huc : string - Eight-digit hydrologic unit (HUC), delimited by semicolons. + Eight-digit hydrologic unit (HUC), delimited by semicolons. bBox : string Search bounding box (Example: bBox=-92.8,44.2,-88.9,46.0) lat : string @@ -110,15 +111,15 @@ def get_results( >>> # Get results within a radial distance of a point >>> df, md = dataretrieval.wqp.get_results( - ... lat='44.2', long='-88.9', within='0.5' + ... lat="44.2", long="-88.9", within="0.5" ... ) >>> # Get results within a bounding box - >>> df, md = dataretrieval.wqp.get_results(bBox='-92.8,44.2,-88.9,46.0') + >>> df, md = dataretrieval.wqp.get_results(bBox="-92.8,44.2,-88.9,46.0") >>> # Get results using a new WQX3.0 profile >>> df, md = dataretrieval.wqp.get_results( - ... legacy=False, siteid='UTAHDWQ_WQX-4993795', dataProfile='narrow' + ... legacy=False, siteid="UTAHDWQ_WQX-4993795", dataProfile="narrow" ... ) """ @@ -136,16 +137,16 @@ def get_results( url = wqp_url("Result") else: - if 'dataProfile' in kwargs: - if kwargs['dataProfile'] not in result_profiles_wqx3: + if "dataProfile" in kwargs: + if kwargs["dataProfile"] not in result_profiles_wqx3: raise TypeError( f"dataProfile {kwargs['dataProfile']} is not a valid WQX3.0" f"profile. Valid options are {result_profiles_wqx3}.", - ) + ) else: kwargs["dataProfile"] = "fullPhysChem" - url = wqx3_url('Result') + url = wqx3_url("Result") response = query(url, kwargs, delimiter=";", ssl_check=ssl_check) @@ -154,9 +155,9 @@ def get_results( def what_sites( - ssl_check=True, - legacy=True, - **kwargs, + ssl_check=True, + legacy=True, + **kwargs, ) -> tuple[DataFrame, WQP_Metadata]: """Search WQP for sites within a region with specific data. @@ -192,7 +193,7 @@ def what_sites( >>> # Get sites within a radial distance of a point >>> df, md = dataretrieval.wqp.what_sites( - ... lat='44.2', long='-88.9', within='2.5' + ... lat="44.2", long="-88.9", within="2.5" ... ) """ @@ -200,21 +201,21 @@ def what_sites( kwargs = _check_kwargs(kwargs) if legacy is True: - url = wqp_url('Station') + url = wqp_url("Station") else: - url = wqx3_url('Station') + url = wqx3_url("Station") - response = query(url, payload=kwargs, delimiter=';', ssl_check=ssl_check) + response = query(url, payload=kwargs, delimiter=";", ssl_check=ssl_check) - df = pd.read_csv(StringIO(response.text), delimiter=',') + df = pd.read_csv(StringIO(response.text), delimiter=",") return df, WQP_Metadata(response) def what_organizations( - ssl_check=True, - legacy=True, - **kwargs, + ssl_check=True, + legacy=True, + **kwargs, ) -> tuple[DataFrame, WQP_Metadata]: """Search WQP for organizations within a region with specific data. @@ -254,14 +255,14 @@ def what_organizations( kwargs = _check_kwargs(kwargs) if legacy is True: - url = wqp_url('Organization') + url = wqp_url("Organization") else: - print('WQX3.0 profile not available, returning legacy profile.') - url = wqp_url('Organization') + print("WQX3.0 profile not available, returning legacy profile.") + url = wqp_url("Organization") - response = query(url, payload=kwargs, delimiter=';', ssl_check=ssl_check) + response = query(url, payload=kwargs, delimiter=";", ssl_check=ssl_check) - df = pd.read_csv(StringIO(response.text), delimiter=',') + df = pd.read_csv(StringIO(response.text), delimiter=",") return df, WQP_Metadata(response) @@ -298,29 +299,29 @@ def what_projects(ssl_check=True, legacy=True, **kwargs): .. code:: >>> # Get projects within a HUC region - >>> df, md = dataretrieval.wqp.what_projects(huc='19') + >>> df, md = dataretrieval.wqp.what_projects(huc="19") """ kwargs = _check_kwargs(kwargs) if legacy is True: - url = wqp_url('Project') + url = wqp_url("Project") else: - print('WQX3.0 profile not available, returning legacy profile.') - url = wqp_url('Project') + print("WQX3.0 profile not available, returning legacy profile.") + url = wqp_url("Project") - response = query(url, payload=kwargs, delimiter=';', ssl_check=ssl_check) + response = query(url, payload=kwargs, delimiter=";", ssl_check=ssl_check) - df = pd.read_csv(StringIO(response.text), delimiter=',') + df = pd.read_csv(StringIO(response.text), delimiter=",") return df, WQP_Metadata(response) def what_activities( - ssl_check=True, - legacy=True, - **kwargs, + ssl_check=True, + legacy=True, + **kwargs, ) -> tuple[DataFrame, WQP_Metadata]: """Search WQP for activities within a region with specific data. @@ -355,16 +356,18 @@ def what_activities( >>> # Get activities within Washington D.C. >>> # during a specific time period >>> df, md = dataretrieval.wqp.what_activities( - ... statecode='US:11', startDateLo='12-30-2019', startDateHi='01-01-2020' + ... statecode="US:11", + ... startDateLo="12-30-2019", + ... startDateHi="01-01-2020", ... ) >>> # Get activities within Washington D.C. >>> # using the WQX3.0 profile during a specific time period >>> df, md = dataretrieval.wqp.what_activities( ... legacy=False, - ... statecode='US:11', - ... startDateLo='12-30-2019', - ... startDateHi='01-01-2020' + ... statecode="US:11", + ... startDateLo="12-30-2019", + ... startDateHi="01-01-2020", ... ) """ @@ -383,9 +386,9 @@ def what_activities( def what_detection_limits( - ssl_check=True, - legacy=True, - **kwargs, + ssl_check=True, + legacy=True, + **kwargs, ) -> tuple[DataFrame, WQP_Metadata]: """Search WQP for result detection limits within a region with specific data. @@ -421,10 +424,10 @@ def what_detection_limits( >>> # Get detection limits for Nitrite measurements in Rhode Island >>> # between specific dates >>> df, md = dataretrieval.wqp.what_detection_limits( - ... statecode='US:44', - ... characteristicName='Nitrite', - ... startDateLo='01-01-2021', - ... startDateHi='02-20-2021', + ... statecode="US:44", + ... characteristicName="Nitrite", + ... startDateLo="01-01-2021", + ... startDateHi="02-20-2021", ... ) """ @@ -432,22 +435,22 @@ def what_detection_limits( kwargs = _check_kwargs(kwargs) if legacy is True: - url = wqp_url('ResultDetectionQuantitationLimit') + url = wqp_url("ResultDetectionQuantitationLimit") else: - print('WQX3.0 profile not available, returning legacy profile.') - url = wqp_url('ResultDetectionQuantitationLimit') + print("WQX3.0 profile not available, returning legacy profile.") + url = wqp_url("ResultDetectionQuantitationLimit") - response = query(url, payload=kwargs, delimiter=';', ssl_check=ssl_check) + response = query(url, payload=kwargs, delimiter=";", ssl_check=ssl_check) - df = pd.read_csv(StringIO(response.text), delimiter=',') + df = pd.read_csv(StringIO(response.text), delimiter=",") return df, WQP_Metadata(response) def what_habitat_metrics( - ssl_check=True, - legacy=True, - **kwargs, + ssl_check=True, + legacy=True, + **kwargs, ) -> tuple[DataFrame, WQP_Metadata]: """Search WQP for habitat metrics within a region with specific data. @@ -480,21 +483,21 @@ def what_habitat_metrics( .. code:: >>> # Get habitat metrics for a state (Rhode Island in this case) - >>> df, md = dataretrieval.wqp.what_habitat_metrics(statecode='US:44') + >>> df, md = dataretrieval.wqp.what_habitat_metrics(statecode="US:44") """ kwargs = _check_kwargs(kwargs) if legacy is True: - url = wqp_url('BiologicalMetric') + url = wqp_url("BiologicalMetric") else: - print('WQX3.0 profile not available, returning legacy profile.') - url = wqp_url('BiologicalMetric') + print("WQX3.0 profile not available, returning legacy profile.") + url = wqp_url("BiologicalMetric") - response = query(url, payload=kwargs, delimiter=';', ssl_check=ssl_check) + response = query(url, payload=kwargs, delimiter=";", ssl_check=ssl_check) - df = pd.read_csv(StringIO(response.text), delimiter=',') + df = pd.read_csv(StringIO(response.text), delimiter=",") return df, WQP_Metadata(response) @@ -533,7 +536,9 @@ def what_project_weights(ssl_check=True, legacy=True, **kwargs): >>> # Get project weights for a state (North Dakota in this case) >>> # within a set time period >>> df, md = dataretrieval.wqp.what_project_weights( - ... statecode='US:38', startDateLo='01-01-2006', startDateHi='01-01-2009' + ... statecode="US:38", + ... startDateLo="01-01-2006", + ... startDateHi="01-01-2009", ... ) """ @@ -541,14 +546,14 @@ def what_project_weights(ssl_check=True, legacy=True, **kwargs): kwargs = _check_kwargs(kwargs) if legacy is True: - url = wqp_url('ProjectMonitoringLocationWeighting') + url = wqp_url("ProjectMonitoringLocationWeighting") else: - print('WQX3.0 profile not available, returning legacy profile.') - url = wqp_url('ProjectMonitoringLocationWeighting') + print("WQX3.0 profile not available, returning legacy profile.") + url = wqp_url("ProjectMonitoringLocationWeighting") - response = query(url, payload=kwargs, delimiter=';', ssl_check=ssl_check) + response = query(url, payload=kwargs, delimiter=";", ssl_check=ssl_check) - df = pd.read_csv(StringIO(response.text), delimiter=',') + df = pd.read_csv(StringIO(response.text), delimiter=",") return df, WQP_Metadata(response) @@ -587,7 +592,9 @@ def what_activity_metrics(ssl_check=True, legacy=True, **kwargs): >>> # Get activity metrics for a state (North Dakota in this case) >>> # within a set time period >>> df, md = dataretrieval.wqp.what_activity_metrics( - ... statecode='US:38', startDateLo='07-01-2006', startDateHi='12-01-2006' + ... statecode="US:38", + ... startDateLo="07-01-2006", + ... startDateHi="12-01-2006", ... ) """ @@ -595,14 +602,14 @@ def what_activity_metrics(ssl_check=True, legacy=True, **kwargs): kwargs = _check_kwargs(kwargs) if legacy is True: - url = wqp_url('ActivityMetric') + url = wqp_url("ActivityMetric") else: - print('WQX3.0 profile not available, returning legacy profile.') - url = wqp_url('ActivityMetric') + print("WQX3.0 profile not available, returning legacy profile.") + url = wqp_url("ActivityMetric") - response = query(url, payload=kwargs, delimiter=';', ssl_check=ssl_check) + response = query(url, payload=kwargs, delimiter=";", ssl_check=ssl_check) - df = pd.read_csv(StringIO(response.text), delimiter=',') + df = pd.read_csv(StringIO(response.text), delimiter=",") return df, WQP_Metadata(response) @@ -610,31 +617,31 @@ def what_activity_metrics(ssl_check=True, legacy=True, **kwargs): def wqp_url(service): """Construct the WQP URL for a given service.""" - base_url = 'https://www.waterqualitydata.us/data/' + base_url = "https://www.waterqualitydata.us/data/" _warn_legacy_use() if service not in services_legacy: raise TypeError( - 'Legacy service not recognized. Valid options are', - f'{services_legacy}.', - ) + "Legacy service not recognized. Valid options are", + f"{services_legacy}.", + ) - return f'{base_url}{service}/Search?' + return f"{base_url}{service}/Search?" def wqx3_url(service): """Construct the WQP URL for a given WQX 3.0 service.""" - base_url = 'https://www.waterqualitydata.us/wqx3/' + base_url = "https://www.waterqualitydata.us/wqx3/" _warn_wqx3_use() if service not in services_wqx3: raise TypeError( - 'WQX3.0 service not recognized. Valid options are', - f'{services_wqx3}.', - ) + "WQX3.0 service not recognized. Valid options are", + f"{services_wqx3}.", + ) - return f'{base_url}{service}/search?' + return f"{base_url}{service}/search?" class WQP_Metadata(BaseMetadata): @@ -688,8 +695,7 @@ def site_info(self): def _check_kwargs(kwargs): - """Private function to check kwargs for unsupported parameters. - """ + """Private function to check kwargs for unsupported parameters.""" mimetype = kwargs.get("mimeType") if mimetype == "geojson": raise NotImplementedError("GeoJSON not yet supported. Set 'mimeType=csv'.") diff --git a/pyproject.toml b/pyproject.toml index 3c2d47e..a276f11 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -61,3 +61,18 @@ profile = "black" [tool.black] skip-string-normalization = true + +[tool.ruff.format] +quote-style = "double" +docstring-code-format = true +docstring-code-line-length = 72 + +[tool.ruff.lint] +preview = true +# Default ["E4", "E7", "E9", and "F"] --> Pyflakes ("F") and pycodestyle ("E") +extend-select = [ + "B", "I", "Q", + "W291", "W292", "W293", "W605", + "E231", "E252", "E261", "E262", "E303", "E501", +] + diff --git a/tests/iii.py b/tests/iii.py new file mode 100755 index 0000000..f460af6 --- /dev/null +++ b/tests/iii.py @@ -0,0 +1,201 @@ +import datetime + +import pytest +from pandas import DataFrame + +from dataretrieval.wqp import ( + _alter_kwargs, + get_results, + what_activities, + what_activity_metrics, + what_detection_limits, + what_habitat_metrics, + what_organizations, + what_project_weights, + what_projects, + what_sites, +) + + +def test_get_ratings(requests_mock): + """Tests water quality portal ratings query""" + request_url = ( + "https://www.waterqualitydata.us/data/Result/Search?siteid=WIDNR_WQX-10032762" + "&characteristicName=Specific+conductance&startDateLo=05-01-2011&startDateHi=09-30-2011" + "&zip=no&mimeType=csv" + ) + response_file_path = "data/wqp_results.txt" + mock_request(requests_mock, request_url, response_file_path) + df, md = get_results( + siteid="WIDNR_WQX-10032762", + characteristicName="Specific conductance", + startDateLo="05-01-2011", + startDateHi="09-30-2011", + ) + assert type(df) is DataFrame + assert df.size == 315 + assert md.url == request_url + assert isinstance(md.query_time, datetime.timedelta) + assert md.header == {"mock_header": "value"} + assert md.comment is None + + +def test_what_sites(requests_mock): + """Tests Water quality portal sites query""" + request_url = ( + "https://www.waterqualitydata.us/data/Station/Search?statecode=US%3A34&characteristicName=Chloride&zip=no" + "&mimeType=csv" + ) + response_file_path = "data/wqp_sites.txt" + mock_request(requests_mock, request_url, response_file_path) + df, md = what_sites(statecode="US:34", characteristicName="Chloride") + assert type(df) is DataFrame + assert df.size == 239868 + assert md.url == request_url + assert isinstance(md.query_time, datetime.timedelta) + assert md.header == {"mock_header": "value"} + assert md.comment is None + + +def test_what_organizations(requests_mock): + """Tests Water quality portal organizations query""" + request_url = ( + "https://www.waterqualitydata.us/data/Organization/Search?statecode=US%3A34&characteristicName=Chloride&zip=no" + "&mimeType=csv" + ) + response_file_path = "data/wqp_organizations.txt" + mock_request(requests_mock, request_url, response_file_path) + df, md = what_organizations(statecode="US:34", characteristicName="Chloride") + assert type(df) is DataFrame + assert df.size == 576 + assert md.url == request_url + assert isinstance(md.query_time, datetime.timedelta) + assert md.header == {"mock_header": "value"} + assert md.comment is None + + +def test_what_projects(requests_mock): + """Tests Water quality portal projects query""" + request_url = ( + "https://www.waterqualitydata.us/data/Project/Search?statecode=US%3A34&characteristicName=Chloride&zip=no" + "&mimeType=csv" + ) + response_file_path = "data/wqp_projects.txt" + mock_request(requests_mock, request_url, response_file_path) + df, md = what_projects(statecode="US:34", characteristicName="Chloride") + assert type(df) is DataFrame + assert df.size == 530 + assert md.url == request_url + assert isinstance(md.query_time, datetime.timedelta) + assert md.header == {"mock_header": "value"} + assert md.comment is None + + +def test_what_activities(requests_mock): + """Tests Water quality portal activities query""" + request_url = ( + "https://www.waterqualitydata.us/data/Activity/Search?statecode=US%3A34&characteristicName=Chloride&zip=no" + "&mimeType=csv" + ) + response_file_path = "data/wqp_activities.txt" + mock_request(requests_mock, request_url, response_file_path) + df, md = what_activities(statecode="US:34", characteristicName="Chloride") + assert type(df) is DataFrame + assert df.size == 5087443 + assert md.url == request_url + assert isinstance(md.query_time, datetime.timedelta) + assert md.header == {"mock_header": "value"} + assert md.comment is None + + +def test_what_detection_limits(requests_mock): + """Tests Water quality portal detection limits query""" + request_url = ( + "https://www.waterqualitydata.us/data/ResultDetectionQuantitationLimit/Search?statecode=US%3A34&characteristicName=Chloride&zip=no" + "&mimeType=csv" + ) + response_file_path = "data/wqp_detection_limits.txt" + mock_request(requests_mock, request_url, response_file_path) + df, md = what_detection_limits(statecode="US:34", characteristicName="Chloride") + assert type(df) is DataFrame + assert df.size == 98770 + assert md.url == request_url + assert isinstance(md.query_time, datetime.timedelta) + assert md.header == {"mock_header": "value"} + assert md.comment is None + + +def test_what_habitat_metrics(requests_mock): + """Tests Water quality portal habitat metrics query""" + request_url = ( + "https://www.waterqualitydata.us/data/BiologicalMetric/Search?statecode=US%3A34&characteristicName=Chloride&zip=no" + "&mimeType=csv" + ) + response_file_path = "data/wqp_habitat_metrics.txt" + mock_request(requests_mock, request_url, response_file_path) + df, md = what_habitat_metrics(statecode="US:34", characteristicName="Chloride") + assert type(df) is DataFrame + assert df.size == 48114 + assert md.url == request_url + assert isinstance(md.query_time, datetime.timedelta) + assert md.header == {"mock_header": "value"} + assert md.comment is None + + +def test_what_project_weights(requests_mock): + """Tests Water quality portal project weights query""" + request_url = ( + "https://www.waterqualitydata.us/data/ProjectMonitoringLocationWeighting/Search?statecode=US%3A34&characteristicName=Chloride&zip=no" + "&mimeType=csv" + ) + response_file_path = "data/wqp_project_weights.txt" + mock_request(requests_mock, request_url, response_file_path) + df, md = what_project_weights(statecode="US:34", characteristicName="Chloride") + assert type(df) is DataFrame + assert df.size == 33098 + assert md.url == request_url + assert isinstance(md.query_time, datetime.timedelta) + assert md.header == {"mock_header": "value"} + assert md.comment is None + + +def test_what_activity_metrics(requests_mock): + """Tests Water quality portal activity metrics query""" + request_url = ( + "https://www.waterqualitydata.us/data/ActivityMetric/Search?statecode=US%3A34&characteristicName=Chloride&zip=no" + "&mimeType=csv" + ) + response_file_path = "data/wqp_activity_metrics.txt" + mock_request(requests_mock, request_url, response_file_path) + df, md = what_activity_metrics(statecode="US:34", characteristicName="Chloride") + assert type(df) is DataFrame + assert df.size == 378 + assert md.url == request_url + assert isinstance(md.query_time, datetime.timedelta) + assert md.header == {"mock_header": "value"} + assert md.comment is None + + +def mock_request(requests_mock, request_url, file_path): + with open(file_path) as text: + requests_mock.get( + request_url, text=text.read(), headers={"mock_header": "value"} + ) + + +class TestAlterKwargs: + """Tests for keyword alteration.""" + + def test_alter_kwargs_zip(self): + """Tests that zip kwarg is altered correctly and warning is thrown.""" + kwargs = {"zip": "yes", "mimeType": "csv"} + with pytest.warns(UserWarning): + kwargs = _alter_kwargs(kwargs) + assert kwargs == {"zip": "no", "mimeType": "csv"} + + def test_alter_kwargs_mimetype(self): + """Tests that mimetype kwarg is altered correctly and warning is thrown.""" + kwargs = {"zip": "no", "mimeType": "geojson"} + with pytest.warns(UserWarning): + kwargs = _alter_kwargs(kwargs) + assert kwargs == {"zip": "no", "mimeType": "csv"} diff --git a/tests/nadp_test.py b/tests/nadp_test.py index b747828..123e9e0 100644 --- a/tests/nadp_test.py +++ b/tests/nadp_test.py @@ -1,5 +1,7 @@ """Tests for NADP functions.""" + import os + import dataretrieval.nadp as nadp @@ -15,14 +17,15 @@ class TestMDNmap: def test_get_annual_MDN_map_zip(self, tmp_path): """Test the get_annual_MDN_map function zip return.""" z_path = nadp.get_annual_MDN_map( - measurement_type='conc', year='2010', path=tmp_path) - exp_path = os.path.join(tmp_path, 'Hg_conc_2010.zip') + measurement_type="conc", year="2010", path=tmp_path + ) + exp_path = os.path.join(tmp_path, "Hg_conc_2010.zip") # assert path matches expectation assert z_path == str(exp_path) # assert unpacked zip exists as a directory assert os.path.exists(exp_path[:-4]) # assert tif exists in directory - assert os.path.exists(os.path.join(z_path[:-4], 'conc_Hg_2010.tif')) + assert os.path.exists(os.path.join(z_path[:-4], "conc_Hg_2010.tif")) class TestNTNmap: @@ -31,11 +34,12 @@ class TestNTNmap: def test_get_annual_NTN_map_zip(self, tmp_path): """Test the get_annual_NTN_map function zip return.""" z_path = nadp.get_annual_NTN_map( - measurement_type='Precip', year='2015', path=tmp_path) - exp_path = os.path.join(tmp_path, 'Precip_2015.zip') + measurement_type="Precip", year="2015", path=tmp_path + ) + exp_path = os.path.join(tmp_path, "Precip_2015.zip") # assert path matches expectation assert z_path == str(exp_path) # assert unpacked zip exists as a directory assert os.path.exists(exp_path[:-4]) # assert tif exists in directory - assert os.path.exists(os.path.join(z_path[:-4], 'Precip_2015.tif')) + assert os.path.exists(os.path.join(z_path[:-4], "Precip_2015.tif")) diff --git a/tests/nldi_test.py b/tests/nldi_test.py index d0fb4a2..c4d6675 100644 --- a/tests/nldi_test.py +++ b/tests/nldi_test.py @@ -10,48 +10,48 @@ def mock_request_data_sources(requests_mock): - request_url = f'{NLDI_API_BASE_URL}/' + request_url = f"{NLDI_API_BASE_URL}/" available_data_sources = [ - {'source': 'ca_gages'}, - {'source': 'census2020-nhdpv2'}, - {'source': 'epa_nrsa'}, - {'source': 'geoconnex-demo'}, - {'source': 'gfv11_pois'}, - {'source': 'huc12pp'}, - {'source': 'huc12pp_102020'}, - {'source': 'nmwdi-st'}, - {'source': 'npdes'}, - {'source': 'nwisgw'}, - {'source': 'nwissite'}, - {'source': 'ref_gage'}, - {'source': 'vigil'}, - {'source': 'wade'}, - {'source': 'WQP'}, - {'source': 'comid'}, + {"source": "ca_gages"}, + {"source": "census2020-nhdpv2"}, + {"source": "epa_nrsa"}, + {"source": "geoconnex-demo"}, + {"source": "gfv11_pois"}, + {"source": "huc12pp"}, + {"source": "huc12pp_102020"}, + {"source": "nmwdi-st"}, + {"source": "npdes"}, + {"source": "nwisgw"}, + {"source": "nwissite"}, + {"source": "ref_gage"}, + {"source": "vigil"}, + {"source": "wade"}, + {"source": "WQP"}, + {"source": "comid"}, ] requests_mock.get( - request_url, json=available_data_sources, headers={'mock_header': 'value'} + request_url, json=available_data_sources, headers={"mock_header": "value"} ) def mock_request(requests_mock, request_url, file_path): with open(file_path) as text: requests_mock.get( - request_url, text=text.read(), headers={'mock_header': 'value'} + request_url, text=text.read(), headers={"mock_header": "value"} ) def test_get_basin(requests_mock): """Tests NLDI get basin query""" request_url = ( - f'{NLDI_API_BASE_URL}/WQP/USGS-054279485/basin' - f'?simplified=true&splitCatchment=false' + f"{NLDI_API_BASE_URL}/WQP/USGS-054279485/basin" + f"?simplified=true&splitCatchment=false" ) - response_file_path = 'data/nldi_get_basin.json' + response_file_path = "data/nldi_get_basin.json" mock_request_data_sources(requests_mock) mock_request(requests_mock, request_url, response_file_path) - gdf = get_basin(feature_source='WQP', feature_id='USGS-054279485') + gdf = get_basin(feature_source="WQP", feature_id="USGS-054279485") assert isinstance(gdf, GeoDataFrame) assert gdf.size == 1 @@ -59,15 +59,15 @@ def test_get_basin(requests_mock): def test_get_flowlines(requests_mock): """Tests NLDI get flowlines query using feature source as the origin""" request_url = ( - f'{NLDI_API_BASE_URL}/WQP/USGS-054279485/navigation/UM/flowlines' - f'?distance=5&trimStart=false' + f"{NLDI_API_BASE_URL}/WQP/USGS-054279485/navigation/UM/flowlines" + f"?distance=5&trimStart=false" ) - response_file_path = 'data/nldi_get_flowlines.json' + response_file_path = "data/nldi_get_flowlines.json" mock_request_data_sources(requests_mock) mock_request(requests_mock, request_url, response_file_path) gdf = get_flowlines( - feature_source='WQP', feature_id='USGS-054279485', navigation_mode='UM' + feature_source="WQP", feature_id="USGS-054279485", navigation_mode="UM" ) assert isinstance(gdf, GeoDataFrame) assert gdf.size == 2 @@ -76,13 +76,13 @@ def test_get_flowlines(requests_mock): def test_get_flowlines_by_comid(requests_mock): """Tests NLDI get flowlines query using comid as the origin""" request_url = ( - f'{NLDI_API_BASE_URL}/comid/13294314/navigation/UM/flowlines?distance=50' + f"{NLDI_API_BASE_URL}/comid/13294314/navigation/UM/flowlines?distance=50" ) - response_file_path = 'data/nldi_get_flowlines_by_comid.json' + response_file_path = "data/nldi_get_flowlines_by_comid.json" mock_request_data_sources(requests_mock) mock_request(requests_mock, request_url, response_file_path) - gdf = get_flowlines(navigation_mode='UM', comid=13294314, distance=50) + gdf = get_flowlines(navigation_mode="UM", comid=13294314, distance=50) assert isinstance(gdf, GeoDataFrame) assert gdf.size == 16 @@ -92,17 +92,17 @@ def test_features_by_feature_source_with_navigation(requests_mock): with navigation mode """ request_url = ( - f'{NLDI_API_BASE_URL}/WQP/USGS-054279485/navigation/UM/nwissite?distance=50' + f"{NLDI_API_BASE_URL}/WQP/USGS-054279485/navigation/UM/nwissite?distance=50" ) - response_file_path = 'data/nldi_get_features_by_feature_source_with_nav_mode.json' + response_file_path = "data/nldi_get_features_by_feature_source_with_nav_mode.json" mock_request_data_sources(requests_mock) mock_request(requests_mock, request_url, response_file_path) gdf = get_features( - feature_source='WQP', - feature_id='USGS-054279485', - data_source='nwissite', - navigation_mode='UM', + feature_source="WQP", + feature_id="USGS-054279485", + data_source="nwissite", + navigation_mode="UM", distance=50, ) assert isinstance(gdf, GeoDataFrame) @@ -113,27 +113,27 @@ def test_features_by_feature_source_without_navigation(requests_mock): """Tests NLDI get features query using feature source as the origin without navigation mode """ - request_url = f'{NLDI_API_BASE_URL}/WQP/USGS-054279485' + request_url = f"{NLDI_API_BASE_URL}/WQP/USGS-054279485" response_file_path = ( - 'data/nldi_get_features_by_feature_source_without_nav_mode.json' + "data/nldi_get_features_by_feature_source_without_nav_mode.json" ) mock_request_data_sources(requests_mock) mock_request(requests_mock, request_url, response_file_path) - gdf = get_features(feature_source='WQP', feature_id='USGS-054279485') + gdf = get_features(feature_source="WQP", feature_id="USGS-054279485") assert isinstance(gdf, GeoDataFrame) assert gdf.size == 10 def test_get_features_by_comid(requests_mock): """Tests NLDI get features query using comid as the origin""" - request_url = f'{NLDI_API_BASE_URL}/comid/13294314/navigation/UM/WQP?distance=5' - response_file_path = 'data/nldi_get_features_by_comid.json' + request_url = f"{NLDI_API_BASE_URL}/comid/13294314/navigation/UM/WQP?distance=5" + response_file_path = "data/nldi_get_features_by_comid.json" mock_request_data_sources(requests_mock) mock_request(requests_mock, request_url, response_file_path) gdf = get_features( - comid=13294314, data_source='WQP', navigation_mode='UM', distance=5 + comid=13294314, data_source="WQP", navigation_mode="UM", distance=5 ) assert isinstance(gdf, GeoDataFrame) assert gdf.size == 405 @@ -142,9 +142,9 @@ def test_get_features_by_comid(requests_mock): def test_get_features_by_lat_long(requests_mock): """Tests NLDI get features query using lat/long as the origin""" request_url = ( - f'{NLDI_API_BASE_URL}/comid/position?coords=POINT%28-89.509%2043.087%29' + f"{NLDI_API_BASE_URL}/comid/position?coords=POINT%28-89.509%2043.087%29" ) - response_file_path = 'data/nldi_get_features_by_lat_long.json' + response_file_path = "data/nldi_get_features_by_lat_long.json" mock_request_data_sources(requests_mock) mock_request(requests_mock, request_url, response_file_path) @@ -155,124 +155,124 @@ def test_get_features_by_lat_long(requests_mock): def test_search_for_basin(requests_mock): """Tests NLDI search query for basin""" - request_url = f'{NLDI_API_BASE_URL}/WQP/USGS-054279485/basin' - response_file_path = 'data/nldi_get_basin.json' + request_url = f"{NLDI_API_BASE_URL}/WQP/USGS-054279485/basin" + response_file_path = "data/nldi_get_basin.json" mock_request_data_sources(requests_mock) mock_request(requests_mock, request_url, response_file_path) search_results = search( - feature_source='WQP', feature_id='USGS-054279485', find='basin' + feature_source="WQP", feature_id="USGS-054279485", find="basin" ) assert isinstance(search_results, dict) - assert search_results['features'][0]['type'] == 'Feature' - assert search_results['features'][0]['geometry']['type'] == 'Polygon' - assert len(search_results['features'][0]['geometry']['coordinates'][0]) == 122 + assert search_results["features"][0]["type"] == "Feature" + assert search_results["features"][0]["geometry"]["type"] == "Polygon" + assert len(search_results["features"][0]["geometry"]["coordinates"][0]) == 122 def test_search_for_flowlines(requests_mock): """Tests NLDI search query for flowlines""" - request_url = f'{NLDI_API_BASE_URL}/WQP/USGS-054279485/navigation/UM/flowlines' - response_file_path = 'data/nldi_get_flowlines.json' + request_url = f"{NLDI_API_BASE_URL}/WQP/USGS-054279485/navigation/UM/flowlines" + response_file_path = "data/nldi_get_flowlines.json" mock_request_data_sources(requests_mock) mock_request(requests_mock, request_url, response_file_path) search_results = search( - feature_source='WQP', - feature_id='USGS-054279485', - navigation_mode='UM', - find='flowlines', + feature_source="WQP", + feature_id="USGS-054279485", + navigation_mode="UM", + find="flowlines", ) assert isinstance(search_results, dict) - assert search_results['features'][0]['type'] == 'Feature' - assert search_results['features'][0]['geometry']['type'] == 'LineString' - assert len(search_results['features'][0]['geometry']['coordinates']) == 27 + assert search_results["features"][0]["type"] == "Feature" + assert search_results["features"][0]["geometry"]["type"] == "LineString" + assert len(search_results["features"][0]["geometry"]["coordinates"]) == 27 def test_search_for_flowlines_by_comid(requests_mock): """Tests NLDI search query for flowlines by comid""" - request_url = f'{NLDI_API_BASE_URL}/comid/13294314/navigation/UM/flowlines' - response_file_path = 'data/nldi_get_flowlines_by_comid.json' + request_url = f"{NLDI_API_BASE_URL}/comid/13294314/navigation/UM/flowlines" + response_file_path = "data/nldi_get_flowlines_by_comid.json" mock_request_data_sources(requests_mock) mock_request(requests_mock, request_url, response_file_path) - search_results = search(comid=13294314, navigation_mode='UM', find='flowlines') + search_results = search(comid=13294314, navigation_mode="UM", find="flowlines") assert isinstance(search_results, dict) - assert search_results['features'][0]['type'] == 'Feature' - assert search_results['features'][0]['geometry']['type'] == 'LineString' - assert len(search_results['features'][0]['geometry']['coordinates']) == 27 + assert search_results["features"][0]["type"] == "Feature" + assert search_results["features"][0]["geometry"]["type"] == "LineString" + assert len(search_results["features"][0]["geometry"]["coordinates"]) == 27 def test_search_for_features_by_feature_source_with_navigation(requests_mock): """Tests NLDI search query for features by feature source""" request_url = ( - f'{NLDI_API_BASE_URL}/WQP/USGS-054279485/navigation/UM/nwissite?distance=50' + f"{NLDI_API_BASE_URL}/WQP/USGS-054279485/navigation/UM/nwissite?distance=50" ) - response_file_path = 'data/nldi_get_features_by_feature_source_with_nav_mode.json' + response_file_path = "data/nldi_get_features_by_feature_source_with_nav_mode.json" mock_request_data_sources(requests_mock) mock_request(requests_mock, request_url, response_file_path) search_results = search( - feature_source='WQP', - feature_id='USGS-054279485', - data_source='nwissite', - navigation_mode='UM', - find='features', + feature_source="WQP", + feature_id="USGS-054279485", + data_source="nwissite", + navigation_mode="UM", + find="features", ) assert isinstance(search_results, dict) - assert search_results['features'][0]['type'] == 'Feature' - assert search_results['features'][0]['geometry']['type'] == 'Point' - assert len(search_results['features']) == 9 + assert search_results["features"][0]["type"] == "Feature" + assert search_results["features"][0]["geometry"]["type"] == "Point" + assert len(search_results["features"]) == 9 def test_search_for_features_by_feature_source_without_navigation(requests_mock): """Tests NLDI search query for features by feature source""" - request_url = f'{NLDI_API_BASE_URL}/WQP/USGS-054279485' + request_url = f"{NLDI_API_BASE_URL}/WQP/USGS-054279485" response_file_path = ( - 'data/nldi_get_features_by_feature_source_without_nav_mode.json' + "data/nldi_get_features_by_feature_source_without_nav_mode.json" ) mock_request_data_sources(requests_mock) mock_request(requests_mock, request_url, response_file_path) search_results = search( - feature_source='WQP', feature_id='USGS-054279485', find='features' + feature_source="WQP", feature_id="USGS-054279485", find="features" ) assert isinstance(search_results, dict) - assert search_results['features'][0]['type'] == 'Feature' - assert search_results['features'][0]['geometry']['type'] == 'Point' - assert len(search_results['features']) == 1 + assert search_results["features"][0]["type"] == "Feature" + assert search_results["features"][0]["geometry"]["type"] == "Point" + assert len(search_results["features"]) == 1 def test_search_for_features_by_comid(requests_mock): """Tests NLDI search query for features by comid""" - request_url = f'{NLDI_API_BASE_URL}/comid/13294314/navigation/UM/WQP?distance=5' - response_file_path = 'data/nldi_get_features_by_comid.json' + request_url = f"{NLDI_API_BASE_URL}/comid/13294314/navigation/UM/WQP?distance=5" + response_file_path = "data/nldi_get_features_by_comid.json" mock_request_data_sources(requests_mock) mock_request(requests_mock, request_url, response_file_path) search_results = search( comid=13294314, - data_source='WQP', - navigation_mode='UM', - find='features', + data_source="WQP", + navigation_mode="UM", + find="features", distance=5, ) assert isinstance(search_results, dict) - assert search_results['features'][0]['type'] == 'Feature' - assert search_results['features'][0]['geometry']['type'] == 'Point' - assert len(search_results['features']) == 45 + assert search_results["features"][0]["type"] == "Feature" + assert search_results["features"][0]["geometry"]["type"] == "Point" + assert len(search_results["features"]) == 45 def test_search_for_features_by_lat_long(requests_mock): """Tests NLDI search query for features by lat/long""" request_url = ( - f'{NLDI_API_BASE_URL}/comid/position?coords=POINT%28-89.509%2043.087%29' + f"{NLDI_API_BASE_URL}/comid/position?coords=POINT%28-89.509%2043.087%29" ) - response_file_path = 'data/nldi_get_features_by_lat_long.json' + response_file_path = "data/nldi_get_features_by_lat_long.json" mock_request_data_sources(requests_mock) mock_request(requests_mock, request_url, response_file_path) - search_results = search(lat=43.087, long=-89.509, find='features') + search_results = search(lat=43.087, long=-89.509, find="features") assert isinstance(search_results, dict) - assert search_results['features'][0]['type'] == 'Feature' - assert search_results['features'][0]['geometry']['type'] == 'LineString' - assert len(search_results['features'][0]['geometry']['coordinates']) == 27 + assert search_results["features"][0]["type"] == "Feature" + assert search_results["features"][0]["geometry"]["type"] == "LineString" + assert len(search_results["features"][0]["geometry"]["coordinates"]) == 27 diff --git a/tests/nwis_test.py b/tests/nwis_test.py index c44a987..3cfcef0 100644 --- a/tests/nwis_test.py +++ b/tests/nwis_test.py @@ -5,23 +5,28 @@ import pandas as pd import pytest -from dataretrieval.nwis import NWIS_Metadata -from dataretrieval.nwis import get_info, get_record, preformat_peaks_response, get_iv, what_sites +from dataretrieval.nwis import ( + NWIS_Metadata, + get_info, + get_iv, + get_record, + preformat_peaks_response, + what_sites, +) -START_DATE = '2018-01-24' -END_DATE = '2018-01-25' +START_DATE = "2018-01-24" +END_DATE = "2018-01-25" -DATETIME_COL = 'datetime' -SITENO_COL = 'site_no' +DATETIME_COL = "datetime" +SITENO_COL = "site_no" def test_measurements_service(): - """Test measurement service - """ - start = '2018-01-24' - end = '2018-01-25' - service = 'measurements' - site = '03339000' + """Test measurement service""" + start = "2018-01-24" + end = "2018-01-25" + service = "measurements" + site = "03339000" df = get_record(site, start, end, service=service) return df @@ -29,60 +34,56 @@ def test_measurements_service(): def test_measurements_service_answer(): df = test_measurements_service() # check parsing - assert df.iloc[0]['measurement_nu'] == 801 + assert df.iloc[0]["measurement_nu"] == 801 def test_iv_service(): - """Unit test of instantaneous value service - """ + """Unit test of instantaneous value service""" start = START_DATE end = END_DATE - service = 'iv' - site = ['03339000', '05447500', '03346500'] + service = "iv" + site = ["03339000", "05447500", "03346500"] return get_record(site, start, end, service=service) def test_iv_service_answer(): df = test_iv_service() # check multiindex function - assert df.index.names == [SITENO_COL, DATETIME_COL], "iv service returned incorrect index: {}".format(df.index.names) + assert df.index.names == [ + SITENO_COL, + DATETIME_COL, + ], "iv service returned incorrect index: {}".format(df.index.names) def test_preformat_peaks_response(): # make a data frame with a "peak_dt" datetime column # it will have some nan and none values - data = {"peak_dt": ["2000-03-22", - np.nan, - None], - "peak_va": [1000, - 2000, - 3000] - } + data = {"peak_dt": ["2000-03-22", np.nan, None], "peak_va": [1000, 2000, 3000]} # turn data into dataframe df = pd.DataFrame(data) # run preformat function df = preformat_peaks_response(df) # assertions - assert 'datetime' in df.columns - assert df['datetime'].isna().sum() == 0 + assert "datetime" in df.columns + assert df["datetime"].isna().sum() == 0 @pytest.mark.parametrize("site_input_type_list", [True, False]) def test_get_record_site_value_types(site_input_type_list): """Test that get_record method for valid input types for the 'sites' parameter.""" - start = '2018-01-24' - end = '2018-01-25' - service = 'measurements' - site = '03339000' + start = "2018-01-24" + end = "2018-01-25" + service = "measurements" + site = "03339000" if site_input_type_list: sites = [site] else: sites = site df = get_record(sites=sites, start=start, end=end, service=service) - assert df.iloc[0]['measurement_nu'] == 801 + assert df.iloc[0]["measurement_nu"] == 801 -if __name__ == '__main__': +if __name__ == "__main__": test_measurements_service_answer() test_iv_service_answer() @@ -91,23 +92,25 @@ def test_get_record_site_value_types(site_input_type_list): # these specific queries represent some edge-cases and the tests to address # incomplete date-time information + def test_inc_date_01(): """Test based on GitHub Issue #47 - lack of timestamp for measurement.""" site = "403451073585601" # make call expecting a warning to be thrown due to incomplete dates with pytest.warns(UserWarning): - df = get_record(site, "1980-01-01", "1990-01-01", service='gwlevels') + df = get_record(site, "1980-01-01", "1990-01-01", service="gwlevels") # assert that there are indeed incomplete dates assert any(pd.isna(df.index) == True) # assert that the datetime index is there - assert df.index.name == 'datetime' + assert df.index.name == "datetime" # make call without defining a datetime index and check that it isn't there - df2 = get_record(site, "1980-01-01", "1990-01-01", service='gwlevels', - datetime_index=False) + df2 = get_record( + site, "1980-01-01", "1990-01-01", service="gwlevels", datetime_index=False + ) # assert shape of both dataframes is the same (contain the same data) assert df.shape == df2.shape # assert that the datetime index is not there - assert df2.index.name != 'datetime' + assert df2.index.name != "datetime" def test_inc_date_02(): @@ -115,18 +118,19 @@ def test_inc_date_02(): site = "180049066381200" # make call expecting a warning to be thrown due to incomplete dates with pytest.warns(UserWarning): - df = get_record(site, "1900-01-01", "2013-01-01", service='gwlevels') + df = get_record(site, "1900-01-01", "2013-01-01", service="gwlevels") # assert that there are indeed incomplete dates assert any(pd.isna(df.index) == True) # assert that the datetime index is there - assert df.index.name == 'datetime' + assert df.index.name == "datetime" # make call without defining a datetime index and check that it isn't there - df2 = get_record(site, "1900-01-01", "2013-01-01", service='gwlevels', - datetime_index=False) + df2 = get_record( + site, "1900-01-01", "2013-01-01", service="gwlevels", datetime_index=False + ) # assert shape of both dataframes is the same (contain the same data) assert df.shape == df2.shape # assert that the datetime index is not there - assert df2.index.name != 'datetime' + assert df2.index.name != "datetime" def test_inc_date_03(): @@ -134,30 +138,32 @@ def test_inc_date_03(): site = "290000095192602" # make call expecting a warning to be thrown due to incomplete dates with pytest.warns(UserWarning): - df = get_record(site, "1975-01-01", "2000-01-01", service='gwlevels') + df = get_record(site, "1975-01-01", "2000-01-01", service="gwlevels") # assert that there are indeed incomplete dates assert any(pd.isna(df.index) == True) # assert that the datetime index is there - assert df.index.name == 'datetime' + assert df.index.name == "datetime" # make call without defining a datetime index and check that it isn't there - df2 = get_record(site, "1975-01-01", "2000-01-01", service='gwlevels', - datetime_index=False) + df2 = get_record( + site, "1975-01-01", "2000-01-01", service="gwlevels", datetime_index=False + ) # assert shape of both dataframes is the same (contain the same data) assert df.shape == df2.shape # assert that the datetime index is not there - assert df2.index.name != 'datetime' + assert df2.index.name != "datetime" class TestTZ: """Tests relating to GitHub Issue #60.""" - sites, _ = what_sites(stateCd='MD') + + sites, _ = what_sites(stateCd="MD") def test_multiple_tz_01(self): """Test based on GitHub Issue #60 - error merging different time zones.""" # this test fails before issue #60 is fixed iv, _ = get_iv(sites=self.sites.site_no.values[:25].tolist()) # assert that the datetime column exists - assert 'datetime' in iv.index.names + assert "datetime" in iv.index.names # assert that it is a datetime type assert isinstance(iv.index[0][1], datetime.datetime) @@ -166,7 +172,7 @@ def test_multiple_tz_02(self): # this test passes before issue #60 is fixed iv, _ = get_iv(sites=self.sites.site_no.values[:20].tolist()) # assert that the datetime column exists - assert 'datetime' in iv.index.names + assert "datetime" in iv.index.names # assert that it is a datetime type assert isinstance(iv.index[0][1], datetime.datetime) @@ -176,58 +182,59 @@ class TestSiteseriesCatalogOutput: def test_seriesCatalogOutput_get_record(self): """Test setting seriesCatalogOutput to true with get_record.""" - data = get_record(huc='20', parameterCd='00060', - service='site', seriesCatalogOutput='True') + data = get_record( + huc="20", parameterCd="00060", service="site", seriesCatalogOutput="True" + ) # assert that expected data columns are present - assert 'begin_date' in data.columns - assert 'end_date' in data.columns - assert 'count_nu' in data.columns + assert "begin_date" in data.columns + assert "end_date" in data.columns + assert "count_nu" in data.columns def test_seriesCatalogOutput_get_info(self): """Test setting seriesCatalogOutput to true with get_info.""" - data, _ = get_info( - huc='20', parameterCd='00060', seriesCatalogOutput='TRUE') + data, _ = get_info(huc="20", parameterCd="00060", seriesCatalogOutput="TRUE") # assert that expected data columns are present - assert 'begin_date' in data.columns - assert 'end_date' in data.columns - assert 'count_nu' in data.columns + assert "begin_date" in data.columns + assert "end_date" in data.columns + assert "count_nu" in data.columns def test_seriesCatalogOutput_bool(self): """Test setting seriesCatalogOutput with a boolean.""" - data, _ = get_info( - huc='20', parameterCd='00060', seriesCatalogOutput=True) + data, _ = get_info(huc="20", parameterCd="00060", seriesCatalogOutput=True) # assert that expected data columns are present - assert 'begin_date' in data.columns - assert 'end_date' in data.columns - assert 'count_nu' in data.columns + assert "begin_date" in data.columns + assert "end_date" in data.columns + assert "count_nu" in data.columns def test_expandedrdb_get_record(self): """Test default expanded_rdb format with get_record.""" - data = get_record(huc='20', parameterCd='00060', - service='site', seriesCatalogOutput='False') + data = get_record( + huc="20", parameterCd="00060", service="site", seriesCatalogOutput="False" + ) # assert that seriesCatalogOutput columns are not present - assert 'begin_date' not in data.columns - assert 'end_date' not in data.columns - assert 'count_nu' not in data.columns + assert "begin_date" not in data.columns + assert "end_date" not in data.columns + assert "count_nu" not in data.columns def test_expandedrdb_get_info(self): """Test default expanded_rdb format with get_info.""" - data, _ = get_info(huc='20', parameterCd='00060') + data, _ = get_info(huc="20", parameterCd="00060") # assert that seriesCatalogOutput columns are not present - assert 'begin_date' not in data.columns - assert 'end_date' not in data.columns - assert 'count_nu' not in data.columns + assert "begin_date" not in data.columns + assert "end_date" not in data.columns + assert "count_nu" not in data.columns def test_empty_timeseries(): """Test based on empty case from GitHub Issue #26.""" - df = get_record(sites='011277906', service='iv', - start='2010-07-20', end='2010-07-20') + df = get_record( + sites="011277906", service="iv", start="2010-07-20", end="2010-07-20" + ) assert df.empty is True class TestMetaData: - """Tests of NWIS metadata setting, + """Tests of NWIS metadata setting, Notes ----- @@ -241,16 +248,16 @@ def test_set_metadata_info_site(self): # mock the query response response = mock.MagicMock() # make metadata call - md = NWIS_Metadata(response, sites='01491000') + md = NWIS_Metadata(response, sites="01491000") # assert that site_info is implemented assert md.site_info - + def test_set_metadata_info_site_no(self): """Test metadata info is set when site_no parameter is supplied.""" # mock the query response response = mock.MagicMock() # make metadata call - md = NWIS_Metadata(response, site_no='01491000') + md = NWIS_Metadata(response, site_no="01491000") # assert that site_info is implemented assert md.site_info @@ -259,7 +266,7 @@ def test_set_metadata_info_stateCd(self): # mock the query response response = mock.MagicMock() # make metadata call - md = NWIS_Metadata(response, stateCd='RI') + md = NWIS_Metadata(response, stateCd="RI") # assert that site_info is implemented assert md.site_info @@ -268,7 +275,7 @@ def test_set_metadata_info_huc(self): # mock the query response response = mock.MagicMock() # make metadata call - md = NWIS_Metadata(response, huc='01') + md = NWIS_Metadata(response, huc="01") # assert that site_info is implemented assert md.site_info @@ -277,7 +284,7 @@ def test_set_metadata_info_bbox(self): # mock the query response response = mock.MagicMock() # make metadata call - md = NWIS_Metadata(response, bBox='-92.8,44.2,-88.9,46.0') + md = NWIS_Metadata(response, bBox="-92.8,44.2,-88.9,46.0") # assert that site_info is implemented assert md.site_info @@ -286,6 +293,6 @@ def test_set_metadata_info_countyCd(self): # mock the query response response = mock.MagicMock() # make metadata call - md = NWIS_Metadata(response, countyCd='01001') + md = NWIS_Metadata(response, countyCd="01001") # assert that site_info is implemented assert md.site_info diff --git a/tests/utils_test.py b/tests/utils_test.py index 76522c5..a99f91e 100644 --- a/tests/utils_test.py +++ b/tests/utils_test.py @@ -1,8 +1,11 @@ """Unit tests for functions in utils.py""" + +import unittest.mock as mock + import pytest -from dataretrieval import utils + import dataretrieval.nwis as nwis -import unittest.mock as mock +from dataretrieval import utils class Test_query: @@ -14,7 +17,7 @@ def test_url_too_long(self): Test based on GitHub Issue #64 """ # all sites in MD - sites, _ = nwis.what_sites(stateCd='MD') + sites, _ = nwis.what_sites(stateCd="MD") # expected error message _msg = "Request URL too long. Modify your query to use fewer sites. API response reason: Request-URI Too Long" # raise error by trying to query them all, so URL is way too long @@ -23,15 +26,18 @@ def test_url_too_long(self): def test_header(self): """Test checking header info with user-agent is part of query.""" - url = 'https://waterservices.usgs.gov/nwis/dv' - payload = {'format': 'json', - 'startDT': '2010-10-01', - 'endDT': '2010-10-10', - 'sites': '01646500', - 'multi_index': True} + url = "https://waterservices.usgs.gov/nwis/dv" + payload = { + "format": "json", + "startDT": "2010-10-01", + "endDT": "2010-10-10", + "sites": "01646500", + "multi_index": True, + } response = utils.query(url, payload) assert response.status_code == 200 # GET was successful - assert 'user-agent' in response.request.headers + assert "user-agent" in response.request.headers + class Test_BaseMetadata: """Tests of BaseMetadata""" @@ -39,15 +45,14 @@ class Test_BaseMetadata: def test_init_with_response(self): response = mock.MagicMock() md = utils.BaseMetadata(response) - + ## Test parameters initialized from the API response assert md.url is not None assert md.query_time is not None assert md.header is not None - ## Test NotImplementedError parameters + ## Test NotImplementedError parameters with pytest.raises(NotImplementedError): md.site_info with pytest.raises(NotImplementedError): md.variable_info - diff --git a/tests/waterservices_test.py b/tests/waterservices_test.py index 10eda33..323b605 100755 --- a/tests/waterservices_test.py +++ b/tests/waterservices_test.py @@ -18,7 +18,7 @@ get_water_use, query_waterdata, query_waterservices, - what_sites + what_sites, ) from dataretrieval.utils import NoSitesError @@ -31,39 +31,49 @@ def test_query_waterdata_validation(): """Tests the validation parameters of the query_waterservices method""" with pytest.raises(TypeError) as type_error: - query_waterdata(service='pmcodes', format='rdb') - assert 'Query must specify a major filter: site_no, stateCd, bBox' == str(type_error.value) + query_waterdata(service="pmcodes", format="rdb") + assert "Query must specify a major filter: site_no, stateCd, bBox" == str( + type_error.value + ) with pytest.raises(TypeError) as type_error: - query_waterdata(service=None, site_no='sites') - assert 'Service not recognized' == str(type_error.value) + query_waterdata(service=None, site_no="sites") + assert "Service not recognized" == str(type_error.value) with pytest.raises(TypeError) as type_error: - query_waterdata(service='pmcodes', nw_longitude_va='something') - assert 'One or more lat/long coordinates missing or invalid.' == str(type_error.value) + query_waterdata(service="pmcodes", nw_longitude_va="something") + assert "One or more lat/long coordinates missing or invalid." == str( + type_error.value + ) def test_query_waterservices_validation(): """Tests the validation parameters of the query_waterservices method""" with pytest.raises(TypeError) as type_error: - query_waterservices(service='dv', format='rdb') - assert 'Query must specify a major filter: sites, stateCd, bBox, huc, or countyCd' == str(type_error.value) + query_waterservices(service="dv", format="rdb") + assert ( + "Query must specify a major filter: sites, stateCd, bBox, huc, or countyCd" + == str(type_error.value) + ) with pytest.raises(TypeError) as type_error: - query_waterservices(service=None, sites='sites') - assert 'Service not recognized' == str(type_error.value) + query_waterservices(service=None, sites="sites") + assert "Service not recognized" == str(type_error.value) def test_query_validation(requests_mock): - request_url = "https://waterservices.usgs.gov/nwis/stat?sites=bad_site_id&format=rdb" + request_url = ( + "https://waterservices.usgs.gov/nwis/stat?sites=bad_site_id&format=rdb" + ) requests_mock.get(request_url, status_code=400) with pytest.raises(ValueError) as type_error: get_stats(sites="bad_site_id") assert request_url in str(type_error) request_url = "https://waterservices.usgs.gov/nwis/stat?sites=123456&format=rdb" - requests_mock.get(request_url, - text="No sites/data found using the selection criteria specified") + requests_mock.get( + request_url, text="No sites/data found using the selection criteria specified" + ) with pytest.raises(NoSitesError) as no_sites_error: get_stats(sites="123456") assert request_url in str(no_sites_error) @@ -72,19 +82,23 @@ def test_query_validation(requests_mock): def test_get_record_validation(): """Tests the validation parameters of the get_record method""" with pytest.raises(TypeError) as type_error: - get_record(sites=['01491000'], service='not_a_service') - assert 'Unrecognized service: not_a_service' == str(type_error.value) + get_record(sites=["01491000"], service="not_a_service") + assert "Unrecognized service: not_a_service" == str(type_error.value) def test_get_dv(requests_mock): """Tests get_dv method correctly generates the request url and returns the result in a DataFrame""" format = "json" - site = '01491000%2C01645000' - request_url = 'https://waterservices.usgs.gov/nwis/dv?format={}' \ - '&startDT=2020-02-14&endDT=2020-02-15&sites={}'.format(format, site) - response_file_path = 'data/waterservices_dv.txt' + site = "01491000%2C01645000" + request_url = ( + "https://waterservices.usgs.gov/nwis/dv?format={}" + "&startDT=2020-02-14&endDT=2020-02-15&sites={}".format(format, site) + ) + response_file_path = "data/waterservices_dv.txt" mock_request(requests_mock, request_url, response_file_path) - df, md = get_dv(sites=["01491000", "01645000"], start='2020-02-14', end='2020-02-15') + df, md = get_dv( + sites=["01491000", "01645000"], start="2020-02-14", end="2020-02-15" + ) if not isinstance(df, DataFrame): raise AssertionError(f"{type(df)} is not DataFrame base class type") @@ -97,16 +111,18 @@ def test_get_dv(requests_mock): def test_get_dv_site_value_types(requests_mock, site_input_type_list): """Tests get_dv method for valid input types for the 'sites' parameter""" _format = "json" - site = '01491000' - request_url = 'https://waterservices.usgs.gov/nwis/dv?format={}' \ - '&startDT=2020-02-14&endDT=2020-02-15&sites={}'.format(_format, site) - response_file_path = 'data/waterservices_dv.txt' + site = "01491000" + request_url = ( + "https://waterservices.usgs.gov/nwis/dv?format={}" + "&startDT=2020-02-14&endDT=2020-02-15&sites={}".format(_format, site) + ) + response_file_path = "data/waterservices_dv.txt" mock_request(requests_mock, request_url, response_file_path) if site_input_type_list: sites = [site] else: sites = site - df, md = get_dv(sites=sites, start='2020-02-14', end='2020-02-15') + df, md = get_dv(sites=sites, start="2020-02-14", end="2020-02-15") if not isinstance(df, DataFrame): raise AssertionError(f"{type(df)} is not DataFrame base class type") @@ -116,12 +132,16 @@ def test_get_dv_site_value_types(requests_mock, site_input_type_list): def test_get_iv(requests_mock): """Tests get_iv method correctly generates the request url and returns the result in a DataFrame""" format = "json" - site = '01491000%2C01645000' - request_url = 'https://waterservices.usgs.gov/nwis/iv?format={}' \ - '&startDT=2019-02-14&endDT=2020-02-15&sites={}'.format(format, site) - response_file_path = 'data/waterservices_iv.txt' + site = "01491000%2C01645000" + request_url = ( + "https://waterservices.usgs.gov/nwis/iv?format={}" + "&startDT=2019-02-14&endDT=2020-02-15&sites={}".format(format, site) + ) + response_file_path = "data/waterservices_iv.txt" mock_request(requests_mock, request_url, response_file_path) - df, md = get_iv(sites=["01491000", "01645000"], start='2019-02-14', end='2020-02-15') + df, md = get_iv( + sites=["01491000", "01645000"], start="2019-02-14", end="2020-02-15" + ) if not isinstance(df, DataFrame): raise AssertionError(f"{type(df)} is not DataFrame base class type") @@ -134,16 +154,18 @@ def test_get_iv(requests_mock): def test_get_iv_site_value_types(requests_mock, site_input_type_list): """Tests get_iv method for valid input type for the 'sites' parameter""" _format = "json" - site = '01491000' - request_url = 'https://waterservices.usgs.gov/nwis/iv?format={}' \ - '&startDT=2019-02-14&endDT=2020-02-15&sites={}'.format(_format, site) - response_file_path = 'data/waterservices_iv.txt' + site = "01491000" + request_url = ( + "https://waterservices.usgs.gov/nwis/iv?format={}" + "&startDT=2019-02-14&endDT=2020-02-15&sites={}".format(_format, site) + ) + response_file_path = "data/waterservices_iv.txt" mock_request(requests_mock, request_url, response_file_path) if site_input_type_list: sites = [site] else: sites = site - df, md = get_iv(sites=sites, start='2019-02-14', end='2020-02-15') + df, md = get_iv(sites=sites, start="2019-02-14", end="2020-02-15") if not isinstance(df, DataFrame): raise AssertionError(f"{type(df)} is not DataFrame base class type") assert df.size == 563380 @@ -157,10 +179,12 @@ def test_get_info(requests_mock): """ size = 24 format = "rdb" - site = '01491000%2C01645000' + site = "01491000%2C01645000" parameter_cd = "00618" - request_url = 'https://waterservices.usgs.gov/nwis/site?sites={}¶meterCd={}&siteOutput=Expanded&format={}'.format(site, parameter_cd, format) - response_file_path = 'data/waterservices_site.txt' + request_url = "https://waterservices.usgs.gov/nwis/site?sites={}¶meterCd={}&siteOutput=Expanded&format={}".format( + site, parameter_cd, format + ) + response_file_path = "data/waterservices_site.txt" mock_request(requests_mock, request_url, response_file_path) df, md = get_info(sites=["01491000", "01645000"], parameterCd="00618") if not isinstance(df, DataFrame): @@ -183,12 +207,16 @@ def test_get_qwdata(requests_mock): """Tests get_qwdata method correctly generates the request url and returns the result in a DataFrame""" format = "rdb" - site = '01491000%2C01645000' - request_url = 'https://nwis.waterdata.usgs.gov/nwis/qwdata?site_no={}' \ - '&qw_sample_wide=qw_sample_wide&agency_cd=USGS&format={}&pm_cd_compare=Greater+than' \ - '&inventory_output=0&rdb_inventory_output=file&TZoutput=0&rdb_qw_attributes=expanded' \ - '&date_format=YYYY-MM-DD&rdb_compression=value&submitted_form=brief_list'.format(site, format) - response_file_path = 'data/waterdata_qwdata.txt' + site = "01491000%2C01645000" + request_url = ( + "https://nwis.waterdata.usgs.gov/nwis/qwdata?site_no={}" + "&qw_sample_wide=qw_sample_wide&agency_cd=USGS&format={}&pm_cd_compare=Greater+than" + "&inventory_output=0&rdb_inventory_output=file&TZoutput=0&rdb_qw_attributes=expanded" + "&date_format=YYYY-MM-DD&rdb_compression=value&submitted_form=brief_list".format( + site, format + ) + ) + response_file_path = "data/waterdata_qwdata.txt" mock_request(requests_mock, request_url, response_file_path) with pytest.warns(DeprecationWarning): df, md = get_qwdata(sites=["01491000", "01645000"]) @@ -213,12 +241,16 @@ def test_get_qwdata(requests_mock): def test_get_qwdata_site_value_types(requests_mock, site_input_type_list): """Tests get_qwdata method for valid input types for the 'sites' parameter""" _format = "rdb" - site = '01491000' - request_url = 'https://nwis.waterdata.usgs.gov/nwis/qwdata?site_no={}' \ - '&qw_sample_wide=qw_sample_wide&agency_cd=USGS&format={}&pm_cd_compare=Greater+than' \ - '&inventory_output=0&rdb_inventory_output=file&TZoutput=0&rdb_qw_attributes=expanded' \ - '&date_format=YYYY-MM-DD&rdb_compression=value&submitted_form=brief_list'.format(site, _format) - response_file_path = 'data/waterdata_qwdata.txt' + site = "01491000" + request_url = ( + "https://nwis.waterdata.usgs.gov/nwis/qwdata?site_no={}" + "&qw_sample_wide=qw_sample_wide&agency_cd=USGS&format={}&pm_cd_compare=Greater+than" + "&inventory_output=0&rdb_inventory_output=file&TZoutput=0&rdb_qw_attributes=expanded" + "&date_format=YYYY-MM-DD&rdb_compression=value&submitted_form=brief_list".format( + site, _format + ) + ) + response_file_path = "data/waterdata_qwdata.txt" mock_request(requests_mock, request_url, response_file_path) if site_input_type_list: sites = [site] @@ -233,10 +265,12 @@ def test_get_qwdata_site_value_types(requests_mock, site_input_type_list): def test_get_gwlevels(requests_mock): """Tests get_gwlevels method correctly generates the request url and returns the result in a DataFrame.""" format = "rdb" - site = '434400121275801' - request_url = 'https://nwis.waterdata.usgs.gov/nwis/gwlevels?format={}&begin_date=1851-01-01' \ - '&site_no={}'.format(format, site) - response_file_path = 'data/waterdata_gwlevels.txt' + site = "434400121275801" + request_url = ( + "https://nwis.waterdata.usgs.gov/nwis/gwlevels?format={}&begin_date=1851-01-01" + "&site_no={}".format(format, site) + ) + response_file_path = "data/waterdata_gwlevels.txt" mock_request(requests_mock, request_url, response_file_path) df, md = get_gwlevels(sites=site) if not isinstance(df, DataFrame): @@ -250,10 +284,12 @@ def test_get_gwlevels(requests_mock): def test_get_gwlevels_site_value_types(requests_mock, site_input_type_list): """Tests get_gwlevels method for valid input types for the 'sites' parameter.""" _format = "rdb" - site = '434400121275801' - request_url = 'https://nwis.waterdata.usgs.gov/nwis/gwlevels?format={}&begin_date=1851-01-01' \ - '&site_no={}'.format(_format, site) - response_file_path = 'data/waterdata_gwlevels.txt' + site = "434400121275801" + request_url = ( + "https://nwis.waterdata.usgs.gov/nwis/gwlevels?format={}&begin_date=1851-01-01" + "&site_no={}".format(_format, site) + ) + response_file_path = "data/waterdata_gwlevels.txt" mock_request(requests_mock, request_url, response_file_path) if site_input_type_list: sites = [site] @@ -268,12 +304,14 @@ def test_get_gwlevels_site_value_types(requests_mock, site_input_type_list): def test_get_discharge_peaks(requests_mock): """Tests get_discharge_peaks method correctly generates the request url and returns the result in a DataFrame""" format = "rdb" - site = '01594440' - request_url = 'https://nwis.waterdata.usgs.gov/nwis/peaks?format={}&site_no={}' \ - '&begin_date=2000-02-14&end_date=2020-02-15'.format(format, site) - response_file_path = 'data/waterservices_peaks.txt' + site = "01594440" + request_url = ( + "https://nwis.waterdata.usgs.gov/nwis/peaks?format={}&site_no={}" + "&begin_date=2000-02-14&end_date=2020-02-15".format(format, site) + ) + response_file_path = "data/waterservices_peaks.txt" mock_request(requests_mock, request_url, response_file_path) - df, md = get_discharge_peaks(sites=[site], start='2000-02-14', end='2020-02-15') + df, md = get_discharge_peaks(sites=[site], start="2000-02-14", end="2020-02-15") if not isinstance(df, DataFrame): raise AssertionError(f"{type(df)} is not DataFrame base class type") @@ -286,17 +324,19 @@ def test_get_discharge_peaks_sites_value_types(requests_mock, site_input_type_li """Tests get_discharge_peaks for valid input types of the 'sites' parameter""" _format = "rdb" - site = '01594440' - request_url = 'https://nwis.waterdata.usgs.gov/nwis/peaks?format={}&site_no={}' \ - '&begin_date=2000-02-14&end_date=2020-02-15'.format(_format, site) - response_file_path = 'data/waterservices_peaks.txt' + site = "01594440" + request_url = ( + "https://nwis.waterdata.usgs.gov/nwis/peaks?format={}&site_no={}" + "&begin_date=2000-02-14&end_date=2020-02-15".format(_format, site) + ) + response_file_path = "data/waterservices_peaks.txt" mock_request(requests_mock, request_url, response_file_path) if site_input_type_list: sites = [site] else: sites = site - df, md = get_discharge_peaks(sites=sites, start='2000-02-14', end='2020-02-15') + df, md = get_discharge_peaks(sites=sites, start="2000-02-14", end="2020-02-15") if not isinstance(df, DataFrame): raise AssertionError(f"{type(df)} is not DataFrame base class type") @@ -308,11 +348,15 @@ def test_get_discharge_measurements(requests_mock): DataFrame""" format = "rdb" site = "01594440" - request_url = 'https://nwis.waterdata.usgs.gov/nwis/measurements?site_no={}' \ - '&begin_date=2000-02-14&end_date=2020-02-15&format={}'.format(site, format) - response_file_path = 'data/waterdata_measurements.txt' + request_url = ( + "https://nwis.waterdata.usgs.gov/nwis/measurements?site_no={}" + "&begin_date=2000-02-14&end_date=2020-02-15&format={}".format(site, format) + ) + response_file_path = "data/waterdata_measurements.txt" mock_request(requests_mock, request_url, response_file_path) - df, md = get_discharge_measurements(sites=[site], start='2000-02-14', end='2020-02-15') + df, md = get_discharge_measurements( + sites=[site], start="2000-02-14", end="2020-02-15" + ) if not isinstance(df, DataFrame): raise AssertionError(f"{type(df)} is not DataFrame base class type") @@ -321,19 +365,25 @@ def test_get_discharge_measurements(requests_mock): @pytest.mark.parametrize("site_input_type_list", [True, False]) -def test_get_discharge_measurements_sites_value_types(requests_mock, site_input_type_list): +def test_get_discharge_measurements_sites_value_types( + requests_mock, site_input_type_list +): """Tests get_discharge_measurements method for valid input types for 'sites' parameter""" format = "rdb" site = "01594440" - request_url = 'https://nwis.waterdata.usgs.gov/nwis/measurements?site_no={}' \ - '&begin_date=2000-02-14&end_date=2020-02-15&format={}'.format(site, format) - response_file_path = 'data/waterdata_measurements.txt' + request_url = ( + "https://nwis.waterdata.usgs.gov/nwis/measurements?site_no={}" + "&begin_date=2000-02-14&end_date=2020-02-15&format={}".format(site, format) + ) + response_file_path = "data/waterdata_measurements.txt" mock_request(requests_mock, request_url, response_file_path) if site_input_type_list: sites = [site] else: sites = site - df, md = get_discharge_measurements(sites=sites, start='2000-02-14', end='2020-02-15') + df, md = get_discharge_measurements( + sites=sites, start="2000-02-14", end="2020-02-15" + ) if not isinstance(df, DataFrame): raise AssertionError(f"{type(df)} is not DataFrame base class type") assert df.size == 2130 @@ -344,9 +394,9 @@ def test_get_pmcodes(requests_mock): DataFrame""" format = "rdb" request_url = "https://help.waterdata.usgs.gov/code/parameter_cd_nm_query?fmt=rdb&parm_nm_cd=%2500618%25" - response_file_path = 'data/waterdata_pmcodes.txt' + response_file_path = "data/waterdata_pmcodes.txt" mock_request(requests_mock, request_url, response_file_path) - df, md = get_pmcodes(parameterCd='00618') + df, md = get_pmcodes(parameterCd="00618") if not isinstance(df, DataFrame): raise AssertionError(f"{type(df)} is not DataFrame base class type") assert df.size == 13 @@ -354,13 +404,15 @@ def test_get_pmcodes(requests_mock): @pytest.mark.parametrize("parameterCd_input_type_list", [True, False]) -def test_get_pmcodes_parameterCd_value_types(requests_mock, parameterCd_input_type_list): +def test_get_pmcodes_parameterCd_value_types( + requests_mock, parameterCd_input_type_list +): """Tests get_pmcodes method for valid input types for the 'parameterCd' parameter""" _format = "rdb" - parameterCd = '00618' + parameterCd = "00618" request_url = "https://help.waterdata.usgs.gov/code/parameter_cd_nm_query?fmt={}&parm_nm_cd=%25{}%25" request_url = request_url.format(_format, parameterCd) - response_file_path = 'data/waterdata_pmcodes.txt' + response_file_path = "data/waterdata_pmcodes.txt" mock_request(requests_mock, request_url, response_file_path) if parameterCd_input_type_list: parameterCd = [parameterCd] @@ -376,9 +428,11 @@ def test_get_water_use_national(requests_mock): """Tests get_discharge_measurements method correctly generates the request url and returns the result in a DataFrame""" format = "rdb" - request_url = 'https://nwis.waterdata.usgs.gov/nwis/water_use?rdb_compression=value&format={}&wu_year=ALL' \ - '&wu_category=ALL&wu_county=ALL'.format(format) - response_file_path = 'data/water_use_national.txt' + request_url = ( + "https://nwis.waterdata.usgs.gov/nwis/water_use?rdb_compression=value&format={}&wu_year=ALL" + "&wu_category=ALL&wu_county=ALL".format(format) + ) + response_file_path = "data/water_use_national.txt" mock_request(requests_mock, request_url, response_file_path) df, md = get_water_use() if not isinstance(df, DataFrame): @@ -392,9 +446,11 @@ def test_get_water_use_national_year_value_types(requests_mock, year_input_type_ """Tests get_water_use method for valid input types for the 'years' parameter""" _format = "rdb" year = "ALL" - request_url = 'https://nwis.waterdata.usgs.gov/nwis/water_use?rdb_compression=value&format={}&wu_year=ALL' \ - '&wu_category=ALL&wu_county=ALL'.format(_format) - response_file_path = 'data/water_use_national.txt' + request_url = ( + "https://nwis.waterdata.usgs.gov/nwis/water_use?rdb_compression=value&format={}&wu_year=ALL" + "&wu_category=ALL&wu_county=ALL".format(_format) + ) + response_file_path = "data/water_use_national.txt" mock_request(requests_mock, request_url, response_file_path) if year_input_type_list: years = [year] @@ -406,13 +462,17 @@ def test_get_water_use_national_year_value_types(requests_mock, year_input_type_ @pytest.mark.parametrize("county_input_type_list", [True, False]) -def test_get_water_use_national_county_value_types(requests_mock, county_input_type_list): +def test_get_water_use_national_county_value_types( + requests_mock, county_input_type_list +): """Tests get_water_use method for valid input types for the 'counties' parameter""" _format = "rdb" county = "ALL" - request_url = 'https://nwis.waterdata.usgs.gov/nwis/water_use?rdb_compression=value&format={}&wu_year=ALL' \ - '&wu_category=ALL&wu_county=ALL'.format(_format) - response_file_path = 'data/water_use_national.txt' + request_url = ( + "https://nwis.waterdata.usgs.gov/nwis/water_use?rdb_compression=value&format={}&wu_year=ALL" + "&wu_category=ALL&wu_county=ALL".format(_format) + ) + response_file_path = "data/water_use_national.txt" mock_request(requests_mock, request_url, response_file_path) if county_input_type_list: counties = [county] @@ -425,13 +485,17 @@ def test_get_water_use_national_county_value_types(requests_mock, county_input_t @pytest.mark.parametrize("category_input_type_list", [True, False]) -def test_get_water_use_national_county_value_types(requests_mock, category_input_type_list): +def test_get_water_use_national_county_value_types( + requests_mock, category_input_type_list +): """Tests get_water_use method for valid input types for the 'categories' parameter""" _format = "rdb" category = "ALL" - request_url = 'https://nwis.waterdata.usgs.gov/nwis/water_use?rdb_compression=value&format={}&wu_year=ALL' \ - '&wu_category=ALL&wu_county=ALL'.format(_format) - response_file_path = 'data/water_use_national.txt' + request_url = ( + "https://nwis.waterdata.usgs.gov/nwis/water_use?rdb_compression=value&format={}&wu_year=ALL" + "&wu_category=ALL&wu_county=ALL".format(_format) + ) + response_file_path = "data/water_use_national.txt" mock_request(requests_mock, request_url, response_file_path) if category_input_type_list: categories = [category] @@ -447,9 +511,11 @@ def test_get_water_use_allegheny(requests_mock): """Tests get_discharge_measurements method correctly generates the request url and returns the result in a DataFrame""" format = "rdb" - request_url = 'https://nwis.waterdata.usgs.gov/PA/nwis/water_use?rdb_compression=value&format=rdb&wu_year=ALL' \ - '&wu_category=ALL&wu_county=003&wu_area=county' - response_file_path = 'data/water_use_allegheny.txt' + request_url = ( + "https://nwis.waterdata.usgs.gov/PA/nwis/water_use?rdb_compression=value&format=rdb&wu_year=ALL" + "&wu_category=ALL&wu_county=003&wu_area=county" + ) + response_file_path = "data/water_use_allegheny.txt" mock_request(requests_mock, request_url, response_file_path) df, md = get_water_use(state="PA", counties="003") if not isinstance(df, DataFrame): @@ -463,15 +529,19 @@ def test_get_ratings_validation(): site = "01594440" with pytest.raises(ValueError) as value_error: get_ratings(site=site, file_type="BAD") - assert 'Unrecognized file_type: BAD, must be "base", "corr" or "exsa"' in str(value_error) + assert 'Unrecognized file_type: BAD, must be "base", "corr" or "exsa"' in str( + value_error + ) def test_get_ratings(requests_mock): """Tests get_ratings method correctly generates the request url and returns the result in a DataFrame""" format = "rdb" site = "01594440" - request_url = "https://nwis.waterdata.usgs.gov/nwisweb/get_ratings/?site_no={}&file_type=base".format(site) - response_file_path = 'data/waterservices_ratings.txt' + request_url = "https://nwis.waterdata.usgs.gov/nwisweb/get_ratings/?site_no={}&file_type=base".format( + site + ) + response_file_path = "data/waterservices_ratings.txt" mock_request(requests_mock, request_url, response_file_path) df, md = get_ratings(site_no=site) if not isinstance(df, DataFrame): @@ -485,14 +555,20 @@ def test_what_sites(requests_mock): """Tests what_sites method correctly generates the request url and returns the result in a DataFrame""" size = 2472 format = "rdb" - parameter_cd = '00010%2C00060' - parameter_cd_list = ["00010","00060"] - request_url = "https://waterservices.usgs.gov/nwis/site?bBox=-83.0%2C36.5%2C-81.0%2C38.5" \ - "¶meterCd={}&hasDataTypeCd=dv&format={}".format(parameter_cd, format) - response_file_path = 'data/nwis_sites.txt' + parameter_cd = "00010%2C00060" + parameter_cd_list = ["00010", "00060"] + request_url = ( + "https://waterservices.usgs.gov/nwis/site?bBox=-83.0%2C36.5%2C-81.0%2C38.5" + "¶meterCd={}&hasDataTypeCd=dv&format={}".format(parameter_cd, format) + ) + response_file_path = "data/nwis_sites.txt" mock_request(requests_mock, request_url, response_file_path) - df, md = what_sites(bBox=[-83.0,36.5,-81.0,38.5], parameterCd=parameter_cd_list, hasDataTypeCd="dv") + df, md = what_sites( + bBox=[-83.0, 36.5, -81.0, 38.5], + parameterCd=parameter_cd_list, + hasDataTypeCd="dv", + ) if not isinstance(df, DataFrame): raise AssertionError(f"{type(df)} is not DataFrame base class type") @@ -515,8 +591,10 @@ def test_what_sites(requests_mock): def test_get_stats(requests_mock): """Tests get_stats method correctly generates the request url and returns the result in a DataFrame""" format = "rdb" - request_url = "https://waterservices.usgs.gov/nwis/stat?sites=01491000%2C01645000&format={}".format(format) - response_file_path = 'data/waterservices_stats.txt' + request_url = "https://waterservices.usgs.gov/nwis/stat?sites=01491000%2C01645000&format={}".format( + format + ) + response_file_path = "data/waterservices_stats.txt" mock_request(requests_mock, request_url, response_file_path) df, md = get_stats(sites=["01491000", "01645000"]) @@ -530,9 +608,11 @@ def test_get_stats(requests_mock): def test_get_stats_site_value_types(requests_mock, site_input_type_list): """Tests get_stats method for valid input types for the 'sites' parameter""" _format = "rdb" - site = '01491000' - request_url = "https://waterservices.usgs.gov/nwis/stat?sites={}&format={}".format(site, _format) - response_file_path = 'data/waterservices_stats.txt' + site = "01491000" + request_url = "https://waterservices.usgs.gov/nwis/stat?sites={}&format={}".format( + site, _format + ) + response_file_path = "data/waterservices_stats.txt" mock_request(requests_mock, request_url, response_file_path) if site_input_type_list: sites = [site] @@ -546,7 +626,9 @@ def test_get_stats_site_value_types(requests_mock, site_input_type_list): def mock_request(requests_mock, request_url, file_path): with open(file_path) as text: - requests_mock.get(request_url, text=text.read(), headers={"mock_header": "value"}) + requests_mock.get( + request_url, text=text.read(), headers={"mock_header": "value"} + ) def assert_metadata(requests_mock, request_url, md, site, parameter_cd, format): @@ -554,20 +636,22 @@ def assert_metadata(requests_mock, request_url, md, site, parameter_cd, format): assert isinstance(md.query_time, datetime.timedelta) assert md.header == {"mock_header": "value"} if site is not None: - site_request_url = "https://waterservices.usgs.gov/nwis/site?sites={}&format=rdb".format(site) - with open('data/waterservices_site.txt') as text: + site_request_url = ( + "https://waterservices.usgs.gov/nwis/site?sites={}&format=rdb".format(site) + ) + with open("data/waterservices_site.txt") as text: requests_mock.get(site_request_url, text=text.read()) site_info, _ = md.site_info if not isinstance(site_info, DataFrame): - raise AssertionError( - f"{type(site_info)} is not DataFrame base class type" - ) + raise AssertionError(f"{type(site_info)} is not DataFrame base class type") if parameter_cd is None: assert md.variable_info is None else: for param in parameter_cd: - pcode_request_url = "https://help.waterdata.usgs.gov/code/parameter_cd_nm_query?fmt=rdb&parm_nm_cd=%25{}%25".format(param) - with open('data/waterdata_pmcodes.txt') as text: + pcode_request_url = "https://help.waterdata.usgs.gov/code/parameter_cd_nm_query?fmt=rdb&parm_nm_cd=%25{}%25".format( + param + ) + with open("data/waterdata_pmcodes.txt") as text: requests_mock.get(pcode_request_url, text=text.read()) variable_info, _ = md.variable_info assert type(variable_info) is DataFrame diff --git a/tests/wqp_test.py b/tests/wqp_test.py index affb096..acf48c3 100755 --- a/tests/wqp_test.py +++ b/tests/wqp_test.py @@ -1,33 +1,37 @@ -import pytest -import requests import datetime +import pytest from pandas import DataFrame from dataretrieval.wqp import ( + _check_kwargs, get_results, - what_sites, - what_organizations, - what_projects, what_activities, + what_activity_metrics, what_detection_limits, what_habitat_metrics, + what_organizations, what_project_weights, - what_activity_metrics, - _check_kwargs, + what_projects, + what_sites, ) def test_get_results(requests_mock): """Tests water quality portal ratings query""" - request_url = "https://www.waterqualitydata.us/data/Result/Search?siteid=WIDNR_WQX-10032762" \ - "&characteristicName=Specific+conductance&startDateLo=05-01-2011&startDateHi=09-30-2011" \ - "&mimeType=csv" - response_file_path = 'data/wqp_results.txt' + request_url = ( + "https://www.waterqualitydata.us/data/Result/Search?siteid=WIDNR_WQX-10032762" + "&characteristicName=Specific+conductance&startDateLo=05-01-2011&startDateHi=09-30-2011" + "&mimeType=csv" + ) + response_file_path = "data/wqp_results.txt" mock_request(requests_mock, request_url, response_file_path) - df, md = get_results(siteid='WIDNR_WQX-10032762', - characteristicName = 'Specific conductance', - startDateLo='05-01-2011', startDateHi='09-30-2011') + df, md = get_results( + siteid="WIDNR_WQX-10032762", + characteristicName="Specific conductance", + startDateLo="05-01-2011", + startDateHi="09-30-2011", + ) assert type(df) is DataFrame assert df.size == 315 assert md.url == request_url @@ -38,15 +42,21 @@ def test_get_results(requests_mock): def test_get_results_WQX3(requests_mock): """Tests water quality portal results query with new WQX3.0 profile""" - request_url = "https://www.waterqualitydata.us/wqx3/Result/search?siteid=WIDNR_WQX-10032762" \ - "&characteristicName=Specific+conductance&startDateLo=05-01-2011&startDateHi=09-30-2011" \ - "&mimeType=csv" \ - "&dataProfile=fullPhysChem" - response_file_path = 'data/wqp3_results.txt' + request_url = ( + "https://www.waterqualitydata.us/wqx3/Result/search?siteid=WIDNR_WQX-10032762" + "&characteristicName=Specific+conductance&startDateLo=05-01-2011&startDateHi=09-30-2011" + "&mimeType=csv" + "&dataProfile=fullPhysChem" + ) + response_file_path = "data/wqp3_results.txt" mock_request(requests_mock, request_url, response_file_path) - df, md = get_results(legacy=False, siteid='WIDNR_WQX-10032762', - characteristicName = 'Specific conductance', - startDateLo='05-01-2011', startDateHi='09-30-2011') + df, md = get_results( + legacy=False, + siteid="WIDNR_WQX-10032762", + characteristicName="Specific conductance", + startDateLo="05-01-2011", + startDateHi="09-30-2011", + ) assert type(df) is DataFrame assert df.size == 900 assert md.url == request_url @@ -57,9 +67,11 @@ def test_get_results_WQX3(requests_mock): def test_what_sites(requests_mock): """Tests Water quality portal sites query""" - request_url = "https://www.waterqualitydata.us/data/Station/Search?statecode=US%3A34&characteristicName=Chloride" \ - "&mimeType=csv" - response_file_path = 'data/wqp_sites.txt' + request_url = ( + "https://www.waterqualitydata.us/data/Station/Search?statecode=US%3A34&characteristicName=Chloride" + "&mimeType=csv" + ) + response_file_path = "data/wqp_sites.txt" mock_request(requests_mock, request_url, response_file_path) df, md = what_sites(statecode="US:34", characteristicName="Chloride") assert type(df) is DataFrame @@ -72,9 +84,11 @@ def test_what_sites(requests_mock): def test_what_organizations(requests_mock): """Tests Water quality portal organizations query""" - request_url = "https://www.waterqualitydata.us/data/Organization/Search?statecode=US%3A34&characteristicName=Chloride" \ - "&mimeType=csv" - response_file_path = 'data/wqp_organizations.txt' + request_url = ( + "https://www.waterqualitydata.us/data/Organization/Search?statecode=US%3A34&characteristicName=Chloride" + "&mimeType=csv" + ) + response_file_path = "data/wqp_organizations.txt" mock_request(requests_mock, request_url, response_file_path) df, md = what_organizations(statecode="US:34", characteristicName="Chloride") assert type(df) is DataFrame @@ -87,9 +101,11 @@ def test_what_organizations(requests_mock): def test_what_projects(requests_mock): """Tests Water quality portal projects query""" - request_url = "https://www.waterqualitydata.us/data/Project/Search?statecode=US%3A34&characteristicName=Chloride" \ - "&mimeType=csv" - response_file_path = 'data/wqp_projects.txt' + request_url = ( + "https://www.waterqualitydata.us/data/Project/Search?statecode=US%3A34&characteristicName=Chloride" + "&mimeType=csv" + ) + response_file_path = "data/wqp_projects.txt" mock_request(requests_mock, request_url, response_file_path) df, md = what_projects(statecode="US:34", characteristicName="Chloride") assert type(df) is DataFrame @@ -102,9 +118,11 @@ def test_what_projects(requests_mock): def test_what_activities(requests_mock): """Tests Water quality portal activities query""" - request_url = "https://www.waterqualitydata.us/data/Activity/Search?statecode=US%3A34&characteristicName=Chloride" \ - "&mimeType=csv" - response_file_path = 'data/wqp_activities.txt' + request_url = ( + "https://www.waterqualitydata.us/data/Activity/Search?statecode=US%3A34&characteristicName=Chloride" + "&mimeType=csv" + ) + response_file_path = "data/wqp_activities.txt" mock_request(requests_mock, request_url, response_file_path) df, md = what_activities(statecode="US:34", characteristicName="Chloride") assert type(df) is DataFrame @@ -117,9 +135,11 @@ def test_what_activities(requests_mock): def test_what_detection_limits(requests_mock): """Tests Water quality portal detection limits query""" - request_url = "https://www.waterqualitydata.us/data/ResultDetectionQuantitationLimit/Search?statecode=US%3A34&characteristicName=Chloride" \ - "&mimeType=csv" - response_file_path = 'data/wqp_detection_limits.txt' + request_url = ( + "https://www.waterqualitydata.us/data/ResultDetectionQuantitationLimit/Search?statecode=US%3A34&characteristicName=Chloride" + "&mimeType=csv" + ) + response_file_path = "data/wqp_detection_limits.txt" mock_request(requests_mock, request_url, response_file_path) df, md = what_detection_limits(statecode="US:34", characteristicName="Chloride") assert type(df) is DataFrame @@ -132,9 +152,11 @@ def test_what_detection_limits(requests_mock): def test_what_habitat_metrics(requests_mock): """Tests Water quality portal habitat metrics query""" - request_url = "https://www.waterqualitydata.us/data/BiologicalMetric/Search?statecode=US%3A34&characteristicName=Chloride" \ - "&mimeType=csv" - response_file_path = 'data/wqp_habitat_metrics.txt' + request_url = ( + "https://www.waterqualitydata.us/data/BiologicalMetric/Search?statecode=US%3A34&characteristicName=Chloride" + "&mimeType=csv" + ) + response_file_path = "data/wqp_habitat_metrics.txt" mock_request(requests_mock, request_url, response_file_path) df, md = what_habitat_metrics(statecode="US:34", characteristicName="Chloride") assert type(df) is DataFrame @@ -147,9 +169,11 @@ def test_what_habitat_metrics(requests_mock): def test_what_project_weights(requests_mock): """Tests Water quality portal project weights query""" - request_url = "https://www.waterqualitydata.us/data/ProjectMonitoringLocationWeighting/Search?statecode=US%3A34&characteristicName=Chloride" \ - "&mimeType=csv" - response_file_path = 'data/wqp_project_weights.txt' + request_url = ( + "https://www.waterqualitydata.us/data/ProjectMonitoringLocationWeighting/Search?statecode=US%3A34&characteristicName=Chloride" + "&mimeType=csv" + ) + response_file_path = "data/wqp_project_weights.txt" mock_request(requests_mock, request_url, response_file_path) df, md = what_project_weights(statecode="US:34", characteristicName="Chloride") assert type(df) is DataFrame @@ -162,9 +186,11 @@ def test_what_project_weights(requests_mock): def test_what_activity_metrics(requests_mock): """Tests Water quality portal activity metrics query""" - request_url = "https://www.waterqualitydata.us/data/ActivityMetric/Search?statecode=US%3A34&characteristicName=Chloride" \ - "&mimeType=csv" - response_file_path = 'data/wqp_activity_metrics.txt' + request_url = ( + "https://www.waterqualitydata.us/data/ActivityMetric/Search?statecode=US%3A34&characteristicName=Chloride" + "&mimeType=csv" + ) + response_file_path = "data/wqp_activity_metrics.txt" mock_request(requests_mock, request_url, response_file_path) df, md = what_activity_metrics(statecode="US:34", characteristicName="Chloride") assert type(df) is DataFrame @@ -177,7 +203,9 @@ def test_what_activity_metrics(requests_mock): def mock_request(requests_mock, request_url, file_path): with open(file_path) as text: - requests_mock.get(request_url, text=text.read(), headers={"mock_header": "value"}) + requests_mock.get( + request_url, text=text.read(), headers={"mock_header": "value"} + ) def test_check_kwargs():