-
Notifications
You must be signed in to change notification settings - Fork 0
/
tabla2paper_referenciada10m.py
161 lines (123 loc) · 5.02 KB
/
tabla2paper_referenciada10m.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
"""
En esta rutina armamos la tabla 2 del paper donde se comparan los
datos de viento de la boya contra los datos de CCMPv2 y ERA- interim.
En este caso recalibramos los datos de la boya a 10m
Dani Risaro
Diciembre 2019
"""
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
def reject_outliers(data, m=2):
"""
Función que remueve outliers del conjunto de datos
Parámetros de entrada:
data: dataframe de diferencias
m: int. Cantidad de desvíos std
Salida:
dataframe without outliers
"""
return data[abs(data - np.mean(data)) < m * np.std(data)]
def remove_outliers_diference(data_x, data_y):
"""
Esta funcion remueve los outliers de un par de set de datos
a partir de las diferencias entre ellos (entre dfA y dfB).
Cuando las difs exceden 2 std, esos datos son removidos
Parámetros de entrada:
data_x: dataframe A
data_y: dataframe B
Salida:
"""
dif = data_x - data_y
outliers = reject_outliers(dif, m=2)
data_x_without_out = data_x[outliers.index]
data_y_without_out = data_y[outliers.index]
return data_x_without_out, data_y_without_out
def ref_10m(wnd_speed, z): # ver supplement de atlas 2011 para la parametrizacion de esta funcion
c0 = 3.7
c1 = 1.165
a = 0.032
k = 0.40
g = 9.81
zeta_cero = z * np.exp(-c0 + c1*np.log(a*k**2*wnd_speed**2/g*z))
wnd_speed_10m = (np.log(10/zeta_cero)/np.log(z/zeta_cero)) * wnd_speed
return wnd_speed_10m
z = 4 # la boya mide a 4m
# table to fill
sites = ['A' ,'B', 'C1', 'C2', 'D']
parameters = ['Latitude (S)',
'Longitude (W)',
'Date',
'Period of data (days)',
'Mean wsp buoy (m/s)',
'Std wsp buoy (m/s)',
'R (CCMPv2, buoy)',
'R (ERA-Interim, buoy)',
'R (CCMPv2, buoy) without outliers',
'R (ERA-Interim, buoy) without outliers']
df = pd.DataFrame(index=sites, columns=parameters)
# organizo la iteracion
directory = '/home/daniu/Documentos/datos_boya/'
buoyfiles = ['boya_2015/datos_boya_1hora.csv',
'boya_2006/datos_boya_1hora.csv',
'boya_2006_corto/datos_boya_1hora.csv',
'boya_2005/datos_boya_1hora.csv',
'boya_2016/datos_boya_1hora.csv']
ccmpfiles = ['boya_2015/datos_ccmp_6horas.csv',
'boya_2006/datos_ccmp_6horas.csv',
'boya_2006_corto/datos_ccmp_6horas.csv',
'boya_2005/datos_ccmp_6horas.csv',
'boya_2016/datos_ccmp_6horas.csv']
eraifiles = ['boya_2015/datos_era_interim_6horas.csv',
'boya_2006/datos_era_interim_6horas.csv',
'boya_2006_corto/datos_era_interim_6horas.csv',
'boya_2005/datos_era_interim_6horas.csv',
'boya_2016/datos_era_interim_6horas.csv']
for i in range(5):
ibuoyfile = directory + buoyfiles[i]
iccmpfile = directory + ccmpfiles[i]
ieraifile = directory + eraifiles[i]
ccmpv2 = pd.read_csv(iccmpfile, index_col=0)
erai = pd.read_csv(ieraifile, index_col=0)
buoy_hor = pd.read_csv(ibuoyfile, header=[0], index_col=0, delimiter='\t')
boya = buoy_hor.iloc[::6]
if i==4:
boya = buoy_hor.iloc[3::6]
# comparison with CCMP
x_boya = boya['int']
x_boya = ref_10m(boya['int'], z)
y_ccmp = ccmpv2['speed']
x_boya.index = y_ccmp.index # reindex just in case they are not in the same format
x_boya_without_out, y_ccmp_without_out = remove_outliers_diference(x_boya, y_ccmp)
R_with_out_ccmp = x_boya.corr(y_ccmp)
R_without_out_ccmp = x_boya_without_out.corr(y_ccmp_without_out)
# comparison with ERA i
x_boya = boya['int']
x_boya = ref_10m(boya['int'], z)
y_erai = erai['speed']
x_boya.index = y_erai.index # reindex just in case they are not in the same format
x_boya_without_out, y_erai_without_out = remove_outliers_diference(x_boya, y_erai)
R_with_out_erai = x_boya.corr(y_erai)
R_without_out_erai = x_boya_without_out.corr(y_erai_without_out)
# lat - lon position and length
pos_lat = boya.lat.values[0]
pos_lon = boya.lon.values[0]
ndays = len(buoy_hor)/24
# date range
date = buoy_hor.index[0] + ' to ' + buoy_hor.index[-1]
# mean and std from buoy
mean_buoy = x_boya.mean()
std_buoy = x_boya.std()
# fill the table
df.loc[sites[i], 'Latitude (S)'] = pos_lat.round(2)
df.loc[sites[i], 'Longitude (W)'] = pos_lon.round(2)
df.loc[sites[i], 'Date'] = date
df.loc[sites[i], 'Period of data (days)'] = int(ndays)
df.loc[sites[i], 'Mean wsp buoy (m/s)'] = mean_buoy.round(2)
df.loc[sites[i], 'Std wsp buoy (m/s)'] = std_buoy.round(2)
df.loc[sites[i], 'R (CCMPv2, buoy)'] = R_with_out_ccmp.round(2)
df.loc[sites[i], 'R (ERA-Interim, buoy)'] = R_with_out_erai.round(2)
df.loc[sites[i], 'R (CCMPv2, buoy) without outliers'] = R_without_out_ccmp.round(2)
df.loc[sites[i], 'R (ERA-Interim, buoy) without outliers'] = R_without_out_erai.round(2)
print(df)
df.to_csv('/home/daniu/Documentos/tablas/tabla_buoy_observations_ref.csv', sep=',')