Fix truncated text and decode SPCL obs

Text within GEMPAK files should no longer be truncated as the correct number of bytes are now read. This affected surface and sounding files. SPCL (non-hourly) surface observations were orginally only stored as text. Methods were added to decode them in to JSON along with the hourly observations. The `nearest_time` method now should be able to select the closest match from both hourly and non-hourly observations. Fixes Unidata#2112
sgdecker · Sep 10, 2022 · fd52640 · fd52640
1 parent 72bb8cc
commit fd52640
Show file tree

Hide file tree

Showing 7 changed files with 269 additions and 62 deletions.
diff --git a/src/metpy/io/gempak.py b/src/metpy/io/gempak.py
@@ -21,10 +21,13 @@
 
 from ._tools import IOBuffer, NamedStruct, open_as_needed
 from .. import constants
-from ..calc import (scale_height, specific_humidity_from_dewpoint, thickness_hydrostatic,
+from ..calc import (altimeter_to_sea_level_pressure, scale_height,
+                    specific_humidity_from_dewpoint, thickness_hydrostatic,
                     virtual_temperature)
+from ..io.metar import parse_metar
 from ..package_tools import Exporter
 from ..plots.mapping import CFProjection
+from ..units import units
 
 exporter = Exporter(globals())
 log = logging.getLogger(__name__)
@@ -169,6 +172,11 @@ class DataSource(Enum):
 ])
 
 
+def _check_nan(value, missing=-9999):
+    """Check for nan values and replace with missing."""
+    return missing if math.isnan(value) else value
+
+
 def convert_degc_to_k(val, missing=-9999):
     """Convert scalar values from degC to K, handling missing values."""
     return val + constants.nounit.zero_degc if val != missing else val
@@ -480,6 +488,86 @@ def _interp_parameters(vlev, adata, bdata, missing=-9999):
     return outdata
 
 
+def _wx_to_wnum(wx1, wx2, wx3, missing=-9999):
+    """Convert METAR present weather code to GEMPAK weather number.
+
+    Notes
+    -----
+    See GEMAPK function PT_WNMT.
+    """
+    metar_codes = [
+        'BR', 'DS', 'DU', 'DZ', 'FC', 'FG', 'FU', 'GR', 'GS',
+        'HZ', 'IC', 'PL', 'PO', 'RA', 'SA', 'SG', 'SN', 'SQ',
+        'SS', 'TS', 'UP', 'VA', '+DS', '-DZ', '+DZ', '+FC',
+        '-GS', '+GS', '-PL', '+PL', '-RA', '+RA', '-SG',
+        '+SG', '-SN', '+SN', '+SS', 'BCFG', 'BLDU', 'BLPY',
+        'BLSA', 'BLSN', 'DRDU', 'DRSA', 'DRSN', 'FZDZ', 'FZFG',
+        'FZRA', 'MIFG', 'PRFG', 'SHGR', 'SHGS', 'SHPL', 'SHRA',
+        'SHSN', 'TSRA', '+BLDU', '+BLSA', '+BLSN', '-FZDZ',
+        '+FZDZ', '+FZFG', '-FZRA', '+FZRA', '-SHGS', '+SHGS',
+        '-SHPL', '+SHPL', '-SHRA', '+SHRA', '-SHSN', '+SHSN',
+        '-TSRA', '+TSRA'
+    ]
+
+    gempak_wnum = [
+        9, 33, 8, 2, -2, 9, 7, 4, 25, 6, 36, 23, 40, 1, 35, 24, 3, 10,
+        35, 5, 41, 11, 68, 17, 18, -1, 61, 62, 57, 58, 13, 14, 59, 60, 20,
+        21, 69, 9, 33, 34, 35, 32, 33, 35, 32, 19, 30, 15, 31, 9, 27, 67,
+        63, 16, 22, 66, 68, 69, 70, 53, 54, 30, 49, 50, 67, 67, 75, 76, 51,
+        52, 55, 56, 77, 78
+    ]
+
+    if wx1 in metar_codes:
+        wn1 = gempak_wnum[metar_codes.index(wx1)]
+    else:
+        wn1 = 0
+
+    if wx2 in metar_codes:
+        wn2 = gempak_wnum[metar_codes.index(wx2)]
+    else:
+        wn2 = 0
+
+    if wx3 in metar_codes:
+        wn3 = gempak_wnum[metar_codes.index(wx3)]
+    else:
+        wn3 = 0
+
+    if all(w >= 0 for w in [wn1, wn2, wn3]):
+        wnum = wn3 * 80 * 80 + wn2 * 80 + wn1
+    else:
+        wnum = min([wn1, wn2, wn3])
+        if wnum == 0:
+            wnum = missing
+
+    return wnum
+
+
+def _convert_clouds(cover, height, missing=-9999):
+    """Convert METAR cloud cover to GEMPAK code.
+
+    Notes
+    -----
+    See GEMPAK function BR_CMTN.
+    """
+    cover_text = ['CLR', 'SCT', 'BKN', 'OVC', 'VV', 'FEW', 'SKC']
+    if not isinstance(cover, str):
+        return missing
+
+    code = 0
+    if cover in cover_text:
+        code = cover_text.index(cover) + 1
+
+    if code == 7:
+        code = 1
+
+    if not math.isnan(height):
+        code += height
+        if height == 0:
+            code *= -1
+
+    return code
+
+
 class GempakFile():
     """Base class for GEMPAK files.
 
@@ -1393,6 +1481,10 @@ def _unpack_merged(self, sndno):
                     if fmt_code is None:
                         raise NotImplementedError('No methods for data type {}'
                                                   .format(part.data_type))
+
+                    if fmt_code == 's':
+                        lendat *= BYTES_PER_WORD
+
                     packed_buffer = (
                         self._buffer.read_struct(
                             struct.Struct(f'{self.prefmt}{lendat}{fmt_code}')
@@ -1456,6 +1548,10 @@ def _unpack_unmerged(self, sndno):
                     if fmt_code is None:
                         raise NotImplementedError('No methods for data type {}'
                                                   .format(part.data_type))
+
+                    if fmt_code == 's':
+                        lendat *= BYTES_PER_WORD
+
                     packed_buffer = (
                         self._buffer.read_struct(
                             struct.Struct(f'{self.prefmt}{lendat}{fmt_code}')
@@ -1679,7 +1775,7 @@ def _merge_winds_height(self, merged, parts, nsgw, nasw, istart):
                 hght,
                 drct,
                 sped
-            ]:
+            ] or hght <= zold:
                 skip = True
             elif abs(zold - hght) < 1:
                 skip = True
@@ -1689,8 +1785,6 @@ def _merge_winds_height(self, merged, parts, nsgw, nasw, istart):
                 ]:
                     merged['DRCT'][ilev - 1] = drct
                     merged['SPED'][ilev - 1] = sped
-            elif hght <= zold:
-                skip = True
             elif hght >= znxt:
                 while more and hght > znxt:
                     zold = znxt
@@ -2135,7 +2229,7 @@ def snxarray(self, station_id=None, station_number=None,
             if snd is None or 'PRES' not in snd:
                 continue
             station_pressure = snd['PRES'][0]
-            radat_text = {}
+            wmo_text = {}
             attrs = {
                 'station_id': snd.pop('STID'),
                 'station_number': snd.pop('STNM'),
@@ -2148,15 +2242,15 @@ def snxarray(self, station_id=None, station_number=None,
             }
 
             if 'TXTA' in snd:
-                radat_text['txta'] = snd.pop('TXTA')
+                wmo_text['txta'] = snd.pop('TXTA')
             if 'TXTB' in snd:
-                radat_text['txtb'] = snd.pop('TXTB')
+                wmo_text['txtb'] = snd.pop('TXTB')
             if 'TXTC' in snd:
-                radat_text['txtc'] = snd.pop('TXTC')
+                wmo_text['txtc'] = snd.pop('TXTC')
             if 'TXPB' in snd:
-                radat_text['txpb'] = snd.pop('TXPB')
-            if radat_text:
-                attrs['RADAT'] = radat_text
+                wmo_text['txpb'] = snd.pop('TXPB')
+            if wmo_text:
+                attrs['WMO_CODES'] = wmo_text
 
             dt = datetime.combine(snd.pop('DATE'), snd.pop('TIME'))
             press = np.array(snd.pop('PRES'))
@@ -2212,9 +2306,36 @@ def __init__(self, file, *args, **kwargs):
         if self.surface_type == 'standard':
             for irow, row_head in enumerate(self.row_headers):
                 for icol, col_head in enumerate(self.column_headers):
+                    for iprt in range(len(self.parts)):
+                        pointer = (self.prod_desc.data_block_ptr
+                                   + (irow * self.prod_desc.columns * self.prod_desc.parts)
+                                   + (icol * self.prod_desc.parts + iprt))
+
+                        self._buffer.jump_to(self._start, _word_to_position(pointer))
+                        data_ptr = self._buffer.read_int(4, self.endian, False)
+
+                        if data_ptr:
+                            self._sfinfo.append(
+                                Surface(
+                                    irow,
+                                    icol,
+                                    datetime.combine(row_head.DATE, row_head.TIME),
+                                    col_head.STID + col_head.STD2,
+                                    col_head.STNM,
+                                    col_head.SLAT,
+                                    col_head.SLON,
+                                    col_head.SELV,
+                                    col_head.STAT,
+                                    col_head.COUN,
+                                )
+                            )
+        elif self.surface_type == 'ship':
+            irow = 0
+            for icol, col_head in enumerate(self.column_headers):
+                for iprt in range(len(self.parts)):
                     pointer = (self.prod_desc.data_block_ptr
                                + (irow * self.prod_desc.columns * self.prod_desc.parts)
-                               + (icol * self.prod_desc.parts))
+                               + (icol * self.prod_desc.parts + iprt))
 
                     self._buffer.jump_to(self._start, _word_to_position(pointer))
                     data_ptr = self._buffer.read_int(4, self.endian, False)
@@ -2224,7 +2345,7 @@ def __init__(self, file, *args, **kwargs):
                             Surface(
                                 irow,
                                 icol,
-                                datetime.combine(row_head.DATE, row_head.TIME),
+                                datetime.combine(col_head.DATE, col_head.TIME),
                                 col_head.STID + col_head.STD2,
                                 col_head.STNM,
                                 col_head.SLAT,
@@ -2234,56 +2355,32 @@ def __init__(self, file, *args, **kwargs):
                                 col_head.COUN,
                             )
                         )
-        elif self.surface_type == 'ship':
-            irow = 0
-            for icol, col_head in enumerate(self.column_headers):
-                pointer = (self.prod_desc.data_block_ptr
-                           + (irow * self.prod_desc.columns * self.prod_desc.parts)
-                           + (icol * self.prod_desc.parts))
-
-                self._buffer.jump_to(self._start, _word_to_position(pointer))
-                data_ptr = self._buffer.read_int(4, self.endian, False)
-
-                if data_ptr:
-                    self._sfinfo.append(
-                        Surface(
-                            irow,
-                            icol,
-                            datetime.combine(col_head.DATE, col_head.TIME),
-                            col_head.STID + col_head.STD2,
-                            col_head.STNM,
-                            col_head.SLAT,
-                            col_head.SLON,
-                            col_head.SELV,
-                            col_head.STAT,
-                            col_head.COUN,
-                        )
-                    )
         elif self.surface_type == 'climate':
             for icol, col_head in enumerate(self.column_headers):
                 for irow, row_head in enumerate(self.row_headers):
-                    pointer = (self.prod_desc.data_block_ptr
-                               + (irow * self.prod_desc.columns * self.prod_desc.parts)
-                               + (icol * self.prod_desc.parts))
-
-                    self._buffer.jump_to(self._start, _word_to_position(pointer))
-                    data_ptr = self._buffer.read_int(4, self.endian, False)
-
-                    if data_ptr:
-                        self._sfinfo.append(
-                            Surface(
-                                irow,
-                                icol,
-                                datetime.combine(col_head.DATE, col_head.TIME),
-                                row_head.STID + row_head.STD2,
-                                row_head.STNM,
-                                row_head.SLAT,
-                                row_head.SLON,
-                                row_head.SELV,
-                                row_head.STAT,
-                                row_head.COUN,
+                    for iprt in range(len(self.parts)):
+                        pointer = (self.prod_desc.data_block_ptr
+                                   + (irow * self.prod_desc.columns * self.prod_desc.parts)
+                                   + (icol * self.prod_desc.parts + iprt))
+
+                        self._buffer.jump_to(self._start, _word_to_position(pointer))
+                        data_ptr = self._buffer.read_int(4, self.endian, False)
+
+                        if data_ptr:
+                            self._sfinfo.append(
+                                Surface(
+                                    irow,
+                                    icol,
+                                    datetime.combine(col_head.DATE, col_head.TIME),
+                                    row_head.STID + row_head.STD2,
+                                    row_head.STNM,
+                                    row_head.SLAT,
+                                    row_head.SLON,
+                                    row_head.SELV,
+                                    row_head.STAT,
+                                    row_head.COUN,
+                                )
                             )
-                        )
         else:
             raise TypeError('Unknown surface type {}'.format(self.surface_type))
 
@@ -2360,6 +2457,10 @@ def _unpack_climate(self, sfcno):
                     if fmt_code is None:
                         raise NotImplementedError('No methods for data type {}'
                                                   .format(part.data_type))
+
+                    if fmt_code == 's':
+                        lendat *= BYTES_PER_WORD
+
                     packed_buffer = (
                         self._buffer.read_struct(
                             struct.Struct(f'{self.prefmt}{lendat}{fmt_code}')
@@ -2427,6 +2528,10 @@ def _unpack_ship(self, sfcno):
                 if fmt_code is None:
                     raise NotImplementedError('No methods for data type {}'
                                               .format(part.data_type))
+
+                if fmt_code == 's':
+                    lendat *= BYTES_PER_WORD
+
                 packed_buffer = (
                     self._buffer.read_struct(
                         struct.Struct(f'{self.prefmt}{lendat}{fmt_code}')
@@ -2494,6 +2599,10 @@ def _unpack_standard(self, sfcno):
                     if fmt_code is None:
                         raise NotImplementedError('No methods for data type {}'
                                                   .format(part.data_type))
+
+                    if fmt_code == 's':
+                        lendat *= BYTES_PER_WORD
+
                     packed_buffer = (
                         self._buffer.read_struct(
                             struct.Struct(f'{self.prefmt}{lendat}{fmt_code}')
@@ -2516,6 +2625,40 @@ def _unpack_standard(self, sfcno):
                 stations.append(station)
         return stations
 
+    @staticmethod
+    def _decode_special_observation(station, missing=-9999):
+        """Decode raw special obsrvation text."""
+        text = station['SPCL']
+        dt = datetime.combine(station['DATE'], station['TIME'])
+        parsed = parse_metar(text, dt.year, dt.month)
+
+        station['TIME'] = parsed.date_time.time()
+        if math.nan in [parsed.altimeter, parsed.elevation, parsed.temperature]:
+            station['PMSL'] = missing
+        else:
+            station['PMSL'] = altimeter_to_sea_level_pressure(
+                units.Quantity(parsed.altimeter, 'inHg'),
+                units.Quantity(parsed.elevation, 'm'),
+                units.Quantity(parsed.temperature, 'degC')
+            ).to('hPa').m
+        station['ALTI'] = _check_nan(parsed.altimeter, missing)
+        station['TMPC'] = _check_nan(parsed.temperature, missing)
+        station['DWPC'] = _check_nan(parsed.dewpoint, missing)
+        station['SKNT'] = _check_nan(parsed.wind_speed, missing)
+        station['DRCT'] = _check_nan(float(parsed.wind_direction), missing)
+        station['GUST'] = _check_nan(parsed.wind_gust, missing)
+        station['WNUM'] = float(_wx_to_wnum(parsed.current_wx1, parsed.current_wx2,
+                                            parsed.current_wx3, missing))
+        station['CHC1'] = _convert_clouds(parsed.skyc1, parsed.skylev1, missing)
+        station['CHC2'] = _convert_clouds(parsed.skyc2, parsed.skylev2, missing)
+        station['CHC3'] = _convert_clouds(parsed.skyc3, parsed.skylev3, missing)
+        if math.isnan(parsed.visibility):
+            station['VSBY'] = missing
+        else:
+            station['VSBY'] = float(round(parsed.visibility / 1609.344))
+
+        return station
+
     def nearest_time(self, date_time, station_id=None, station_number=None):
         """Get nearest observation to given time for selected stations.
 
@@ -2696,6 +2839,8 @@ def sfjson(self, station_id=None, station_number=None,
 
         stnarr = []
         for stn in data:
+            if 'SPCL' in stn:
+                stn = self._decode_special_observation(stn, self.prod_desc.missing_float)
             props = {'date_time': datetime.combine(stn.pop('DATE'), stn.pop('TIME')),
                      'station_id': stn.pop('STID') + stn.pop('STD2'),
                      'station_number': stn.pop('STNM'),