Skip to content

Commit

Permalink
Fix truncated text and decode SPCL obs
Browse files Browse the repository at this point in the history
Text within GEMPAK files should no longer be truncated as the correct
number of bytes are now read. This affected surface and sounding files.

SPCL (non-hourly) surface observations were orginally only stored as
text. Methods were added to decode them in to JSON along with the hourly
observations. The `nearest_time` method now should be able to select
the closest match from both hourly and non-hourly observations.

Fixes Unidata#2112
  • Loading branch information
nawendt committed Sep 10, 2022
1 parent 72bb8cc commit fd52640
Show file tree
Hide file tree
Showing 7 changed files with 269 additions and 62 deletions.
263 changes: 204 additions & 59 deletions src/metpy/io/gempak.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,13 @@

from ._tools import IOBuffer, NamedStruct, open_as_needed
from .. import constants
from ..calc import (scale_height, specific_humidity_from_dewpoint, thickness_hydrostatic,
from ..calc import (altimeter_to_sea_level_pressure, scale_height,
specific_humidity_from_dewpoint, thickness_hydrostatic,
virtual_temperature)
from ..io.metar import parse_metar
from ..package_tools import Exporter
from ..plots.mapping import CFProjection
from ..units import units

exporter = Exporter(globals())
log = logging.getLogger(__name__)
Expand Down Expand Up @@ -169,6 +172,11 @@ class DataSource(Enum):
])


def _check_nan(value, missing=-9999):
"""Check for nan values and replace with missing."""
return missing if math.isnan(value) else value


def convert_degc_to_k(val, missing=-9999):
"""Convert scalar values from degC to K, handling missing values."""
return val + constants.nounit.zero_degc if val != missing else val
Expand Down Expand Up @@ -480,6 +488,86 @@ def _interp_parameters(vlev, adata, bdata, missing=-9999):
return outdata


def _wx_to_wnum(wx1, wx2, wx3, missing=-9999):
"""Convert METAR present weather code to GEMPAK weather number.
Notes
-----
See GEMAPK function PT_WNMT.
"""
metar_codes = [
'BR', 'DS', 'DU', 'DZ', 'FC', 'FG', 'FU', 'GR', 'GS',
'HZ', 'IC', 'PL', 'PO', 'RA', 'SA', 'SG', 'SN', 'SQ',
'SS', 'TS', 'UP', 'VA', '+DS', '-DZ', '+DZ', '+FC',
'-GS', '+GS', '-PL', '+PL', '-RA', '+RA', '-SG',
'+SG', '-SN', '+SN', '+SS', 'BCFG', 'BLDU', 'BLPY',
'BLSA', 'BLSN', 'DRDU', 'DRSA', 'DRSN', 'FZDZ', 'FZFG',
'FZRA', 'MIFG', 'PRFG', 'SHGR', 'SHGS', 'SHPL', 'SHRA',
'SHSN', 'TSRA', '+BLDU', '+BLSA', '+BLSN', '-FZDZ',
'+FZDZ', '+FZFG', '-FZRA', '+FZRA', '-SHGS', '+SHGS',
'-SHPL', '+SHPL', '-SHRA', '+SHRA', '-SHSN', '+SHSN',
'-TSRA', '+TSRA'
]

gempak_wnum = [
9, 33, 8, 2, -2, 9, 7, 4, 25, 6, 36, 23, 40, 1, 35, 24, 3, 10,
35, 5, 41, 11, 68, 17, 18, -1, 61, 62, 57, 58, 13, 14, 59, 60, 20,
21, 69, 9, 33, 34, 35, 32, 33, 35, 32, 19, 30, 15, 31, 9, 27, 67,
63, 16, 22, 66, 68, 69, 70, 53, 54, 30, 49, 50, 67, 67, 75, 76, 51,
52, 55, 56, 77, 78
]

if wx1 in metar_codes:
wn1 = gempak_wnum[metar_codes.index(wx1)]
else:
wn1 = 0

if wx2 in metar_codes:
wn2 = gempak_wnum[metar_codes.index(wx2)]
else:
wn2 = 0

if wx3 in metar_codes:
wn3 = gempak_wnum[metar_codes.index(wx3)]
else:
wn3 = 0

if all(w >= 0 for w in [wn1, wn2, wn3]):
wnum = wn3 * 80 * 80 + wn2 * 80 + wn1
else:
wnum = min([wn1, wn2, wn3])
if wnum == 0:
wnum = missing

return wnum


def _convert_clouds(cover, height, missing=-9999):
"""Convert METAR cloud cover to GEMPAK code.
Notes
-----
See GEMPAK function BR_CMTN.
"""
cover_text = ['CLR', 'SCT', 'BKN', 'OVC', 'VV', 'FEW', 'SKC']
if not isinstance(cover, str):
return missing

code = 0
if cover in cover_text:
code = cover_text.index(cover) + 1

if code == 7:
code = 1

if not math.isnan(height):
code += height
if height == 0:
code *= -1

return code


class GempakFile():
"""Base class for GEMPAK files.
Expand Down Expand Up @@ -1393,6 +1481,10 @@ def _unpack_merged(self, sndno):
if fmt_code is None:
raise NotImplementedError('No methods for data type {}'
.format(part.data_type))

if fmt_code == 's':
lendat *= BYTES_PER_WORD

packed_buffer = (
self._buffer.read_struct(
struct.Struct(f'{self.prefmt}{lendat}{fmt_code}')
Expand Down Expand Up @@ -1456,6 +1548,10 @@ def _unpack_unmerged(self, sndno):
if fmt_code is None:
raise NotImplementedError('No methods for data type {}'
.format(part.data_type))

if fmt_code == 's':
lendat *= BYTES_PER_WORD

packed_buffer = (
self._buffer.read_struct(
struct.Struct(f'{self.prefmt}{lendat}{fmt_code}')
Expand Down Expand Up @@ -1679,7 +1775,7 @@ def _merge_winds_height(self, merged, parts, nsgw, nasw, istart):
hght,
drct,
sped
]:
] or hght <= zold:
skip = True
elif abs(zold - hght) < 1:
skip = True
Expand All @@ -1689,8 +1785,6 @@ def _merge_winds_height(self, merged, parts, nsgw, nasw, istart):
]:
merged['DRCT'][ilev - 1] = drct
merged['SPED'][ilev - 1] = sped
elif hght <= zold:
skip = True
elif hght >= znxt:
while more and hght > znxt:
zold = znxt
Expand Down Expand Up @@ -2135,7 +2229,7 @@ def snxarray(self, station_id=None, station_number=None,
if snd is None or 'PRES' not in snd:
continue
station_pressure = snd['PRES'][0]
radat_text = {}
wmo_text = {}
attrs = {
'station_id': snd.pop('STID'),
'station_number': snd.pop('STNM'),
Expand All @@ -2148,15 +2242,15 @@ def snxarray(self, station_id=None, station_number=None,
}

if 'TXTA' in snd:
radat_text['txta'] = snd.pop('TXTA')
wmo_text['txta'] = snd.pop('TXTA')
if 'TXTB' in snd:
radat_text['txtb'] = snd.pop('TXTB')
wmo_text['txtb'] = snd.pop('TXTB')
if 'TXTC' in snd:
radat_text['txtc'] = snd.pop('TXTC')
wmo_text['txtc'] = snd.pop('TXTC')
if 'TXPB' in snd:
radat_text['txpb'] = snd.pop('TXPB')
if radat_text:
attrs['RADAT'] = radat_text
wmo_text['txpb'] = snd.pop('TXPB')
if wmo_text:
attrs['WMO_CODES'] = wmo_text

dt = datetime.combine(snd.pop('DATE'), snd.pop('TIME'))
press = np.array(snd.pop('PRES'))
Expand Down Expand Up @@ -2212,9 +2306,36 @@ def __init__(self, file, *args, **kwargs):
if self.surface_type == 'standard':
for irow, row_head in enumerate(self.row_headers):
for icol, col_head in enumerate(self.column_headers):
for iprt in range(len(self.parts)):
pointer = (self.prod_desc.data_block_ptr
+ (irow * self.prod_desc.columns * self.prod_desc.parts)
+ (icol * self.prod_desc.parts + iprt))

self._buffer.jump_to(self._start, _word_to_position(pointer))
data_ptr = self._buffer.read_int(4, self.endian, False)

if data_ptr:
self._sfinfo.append(
Surface(
irow,
icol,
datetime.combine(row_head.DATE, row_head.TIME),
col_head.STID + col_head.STD2,
col_head.STNM,
col_head.SLAT,
col_head.SLON,
col_head.SELV,
col_head.STAT,
col_head.COUN,
)
)
elif self.surface_type == 'ship':
irow = 0
for icol, col_head in enumerate(self.column_headers):
for iprt in range(len(self.parts)):
pointer = (self.prod_desc.data_block_ptr
+ (irow * self.prod_desc.columns * self.prod_desc.parts)
+ (icol * self.prod_desc.parts))
+ (icol * self.prod_desc.parts + iprt))

self._buffer.jump_to(self._start, _word_to_position(pointer))
data_ptr = self._buffer.read_int(4, self.endian, False)
Expand All @@ -2224,7 +2345,7 @@ def __init__(self, file, *args, **kwargs):
Surface(
irow,
icol,
datetime.combine(row_head.DATE, row_head.TIME),
datetime.combine(col_head.DATE, col_head.TIME),
col_head.STID + col_head.STD2,
col_head.STNM,
col_head.SLAT,
Expand All @@ -2234,56 +2355,32 @@ def __init__(self, file, *args, **kwargs):
col_head.COUN,
)
)
elif self.surface_type == 'ship':
irow = 0
for icol, col_head in enumerate(self.column_headers):
pointer = (self.prod_desc.data_block_ptr
+ (irow * self.prod_desc.columns * self.prod_desc.parts)
+ (icol * self.prod_desc.parts))

self._buffer.jump_to(self._start, _word_to_position(pointer))
data_ptr = self._buffer.read_int(4, self.endian, False)

if data_ptr:
self._sfinfo.append(
Surface(
irow,
icol,
datetime.combine(col_head.DATE, col_head.TIME),
col_head.STID + col_head.STD2,
col_head.STNM,
col_head.SLAT,
col_head.SLON,
col_head.SELV,
col_head.STAT,
col_head.COUN,
)
)
elif self.surface_type == 'climate':
for icol, col_head in enumerate(self.column_headers):
for irow, row_head in enumerate(self.row_headers):
pointer = (self.prod_desc.data_block_ptr
+ (irow * self.prod_desc.columns * self.prod_desc.parts)
+ (icol * self.prod_desc.parts))

self._buffer.jump_to(self._start, _word_to_position(pointer))
data_ptr = self._buffer.read_int(4, self.endian, False)

if data_ptr:
self._sfinfo.append(
Surface(
irow,
icol,
datetime.combine(col_head.DATE, col_head.TIME),
row_head.STID + row_head.STD2,
row_head.STNM,
row_head.SLAT,
row_head.SLON,
row_head.SELV,
row_head.STAT,
row_head.COUN,
for iprt in range(len(self.parts)):
pointer = (self.prod_desc.data_block_ptr
+ (irow * self.prod_desc.columns * self.prod_desc.parts)
+ (icol * self.prod_desc.parts + iprt))

self._buffer.jump_to(self._start, _word_to_position(pointer))
data_ptr = self._buffer.read_int(4, self.endian, False)

if data_ptr:
self._sfinfo.append(
Surface(
irow,
icol,
datetime.combine(col_head.DATE, col_head.TIME),
row_head.STID + row_head.STD2,
row_head.STNM,
row_head.SLAT,
row_head.SLON,
row_head.SELV,
row_head.STAT,
row_head.COUN,
)
)
)
else:
raise TypeError('Unknown surface type {}'.format(self.surface_type))

Expand Down Expand Up @@ -2360,6 +2457,10 @@ def _unpack_climate(self, sfcno):
if fmt_code is None:
raise NotImplementedError('No methods for data type {}'
.format(part.data_type))

if fmt_code == 's':
lendat *= BYTES_PER_WORD

packed_buffer = (
self._buffer.read_struct(
struct.Struct(f'{self.prefmt}{lendat}{fmt_code}')
Expand Down Expand Up @@ -2427,6 +2528,10 @@ def _unpack_ship(self, sfcno):
if fmt_code is None:
raise NotImplementedError('No methods for data type {}'
.format(part.data_type))

if fmt_code == 's':
lendat *= BYTES_PER_WORD

packed_buffer = (
self._buffer.read_struct(
struct.Struct(f'{self.prefmt}{lendat}{fmt_code}')
Expand Down Expand Up @@ -2494,6 +2599,10 @@ def _unpack_standard(self, sfcno):
if fmt_code is None:
raise NotImplementedError('No methods for data type {}'
.format(part.data_type))

if fmt_code == 's':
lendat *= BYTES_PER_WORD

packed_buffer = (
self._buffer.read_struct(
struct.Struct(f'{self.prefmt}{lendat}{fmt_code}')
Expand All @@ -2516,6 +2625,40 @@ def _unpack_standard(self, sfcno):
stations.append(station)
return stations

@staticmethod
def _decode_special_observation(station, missing=-9999):
"""Decode raw special obsrvation text."""
text = station['SPCL']
dt = datetime.combine(station['DATE'], station['TIME'])
parsed = parse_metar(text, dt.year, dt.month)

station['TIME'] = parsed.date_time.time()
if math.nan in [parsed.altimeter, parsed.elevation, parsed.temperature]:
station['PMSL'] = missing
else:
station['PMSL'] = altimeter_to_sea_level_pressure(
units.Quantity(parsed.altimeter, 'inHg'),
units.Quantity(parsed.elevation, 'm'),
units.Quantity(parsed.temperature, 'degC')
).to('hPa').m
station['ALTI'] = _check_nan(parsed.altimeter, missing)
station['TMPC'] = _check_nan(parsed.temperature, missing)
station['DWPC'] = _check_nan(parsed.dewpoint, missing)
station['SKNT'] = _check_nan(parsed.wind_speed, missing)
station['DRCT'] = _check_nan(float(parsed.wind_direction), missing)
station['GUST'] = _check_nan(parsed.wind_gust, missing)
station['WNUM'] = float(_wx_to_wnum(parsed.current_wx1, parsed.current_wx2,
parsed.current_wx3, missing))
station['CHC1'] = _convert_clouds(parsed.skyc1, parsed.skylev1, missing)
station['CHC2'] = _convert_clouds(parsed.skyc2, parsed.skylev2, missing)
station['CHC3'] = _convert_clouds(parsed.skyc3, parsed.skylev3, missing)
if math.isnan(parsed.visibility):
station['VSBY'] = missing
else:
station['VSBY'] = float(round(parsed.visibility / 1609.344))

return station

def nearest_time(self, date_time, station_id=None, station_number=None):
"""Get nearest observation to given time for selected stations.
Expand Down Expand Up @@ -2696,6 +2839,8 @@ def sfjson(self, station_id=None, station_number=None,

stnarr = []
for stn in data:
if 'SPCL' in stn:
stn = self._decode_special_observation(stn, self.prod_desc.missing_float)
props = {'date_time': datetime.combine(stn.pop('DATE'), stn.pop('TIME')),
'station_id': stn.pop('STID') + stn.pop('STD2'),
'station_number': stn.pop('STNM'),
Expand Down
Loading

0 comments on commit fd52640

Please sign in to comment.