forked from zachghiaccio/icesat2-snowex
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathground_data_processing.py
104 lines (85 loc) · 3.55 KB
/
ground_data_processing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# -*- coding: utf-8 -*-
"""
Created on Thu Oct 6 13:39:20 2022
@author: zfair
"""
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
from pyproj import Proj, transform
import ulmo
#---------------#
def process_raw_data(df):
"""
Takes unprocessed site data and makes it more easily readable for further
analysis. Currently works for the magnaprobe data gathered from Alaska 2022.
Parameters
----------
txt_file : str
.txt file containing lat/lon coordinates and snow depth data from one of
the SnowEx Alaska sites.
Returns
-------
df : DataFrame
DataFrame containing processed magnaprobe data, including snow depth (in meters),
condensed lat/lon coordinates, and easting/northing estimates.
"""
#df = pd.read_csv(txt_file, header=1)
# Include only rows with data
df = df.iloc[2:]
# Convert numeric data to floats
df['DepthCm'] = pd.to_numeric(df['DepthCm'])
df['altitudeB'] = pd.to_numeric(df['altitudeB'])
df[['latitude_a', 'LatitudeDDDDD']] = df[['latitude_a', 'LatitudeDDDDD']].apply(pd.to_numeric)
df[['Longitude_a', 'LongitudeDDDDD']] = df[['Longitude_a', 'LongitudeDDDDD']].apply(pd.to_numeric)
# Combine coordinates into single columns
df['Latitude'] = df['latitude_a'] + df['LatitudeDDDDD']
df['Longitude'] = df['Longitude_a'] + df['LongitudeDDDDD']
# Convert snow depth to meters
df['Depth'] = df['DepthCm'] / 100.
# Estimate easting/northing coordinates
inp = Proj(init='epsg:4326')
outp = Proj(init='epsg:32606')
df['Easting'], df['Northing'] = transform(inp, outp,
df['Longitude'], df['Latitude'])
# Rename time and elevation columns for consistency with other scripts
df = df.rename(columns={'TIMESTAMP': 'UTCdoy',
'altitudeB': 'Elevation'})
# Plotting, for debugging/quick views
if True:
plt.scatter(df['Easting'], df['Northing'], c=df['Depth'])
plt.xlabel('easting [m]')
plt.ylabel('northing [m]')
plt.show()
return df
#---------------#
def snotel_fetch(sitecode, variablecode='SNOTEL:SNWD_D', start_date='1950-10-01', end_date='2020-12-31'):
#print(sitecode, variablecode, start_date, end_date)
values_df = None
wsdlurl = 'https://hydroportal.cuahsi.org/Snotel/cuahsi_1_1.asmx?WSDL'
try:
#Request data from the server
site_values = ulmo.cuahsi.wof.get_values(wsdlurl, sitecode, variablecode, start=start_date, end=end_date)
#Convert to a Pandas DataFrame
values_df = pd.DataFrame.from_dict(site_values['values'])
#Parse the datetime values to Pandas Timestamp objects
values_df['datetime'] = pd.to_datetime(values_df['datetime'], utc=True)
#Set the DataFrame index to the Timestamps
values_df = values_df.set_index('datetime')
#Convert values to float and replace -9999 nodata values with NaN
values_df['value'] = pd.to_numeric(values_df['value']).replace(-9999, np.nan)
#Remove any records flagged with lower quality
values_df = values_df[values_df['quality_control_level_code'] == '1']
except:
print("Unable to fetch %s" % variablecode)
return values_df
#---------------#
def add_dowy(df, col=None):
if col is None:
df['doy'] = df.index.dayofyear
else:
df['doy'] = df[col].dayofyear
# Sept 30 is doy 273
df['dowy'] = df['doy'] - 273
df.loc[df['dowy'] <= 0, 'dowy'] += 365