From 83876e9fe6d851eb793e84d8a69b40e3be03b886 Mon Sep 17 00:00:00 2001
From: Joe Ranalli <jar339@psu.edu>
Date: Tue, 1 Oct 2024 13:17:42 -0400
Subject: [PATCH] Baseline fixed generation of cloud field following Lave
 scaling method

---
 src/solarspatialtools/cloudfield.py | 153 ++++++++++++++++++----------
 1 file changed, 100 insertions(+), 53 deletions(-)

diff --git a/src/solarspatialtools/cloudfield.py b/src/solarspatialtools/cloudfield.py
index 20c6123..4914c5b 100644
--- a/src/solarspatialtools/cloudfield.py
+++ b/src/solarspatialtools/cloudfield.py
@@ -128,18 +128,33 @@ def stacked_field(vs, size, weights=None, scales=(1, 2, 3, 4, 5, 6, 7), plot=Fal
 
     return field
 
-def _clip_field(field, kt=0.5, plot=False):
+def _clip_field(field, clear_frac=0.5, plot=False):
+    """
+    Find the value in the field that will produce an X% clear sky mask. The
+    mask is 1 where clear, and 0 where cloudy.
+
+    Parameters
+    ----------
+    field
+    clear_frac
+    plot
+
+    Returns
+    -------
+
+    """
     # Zero where clouds, 1 where clear
 
     # clipping needs to be based on pixel fraction, which thus needs to be
     # done on quantile because the field has a normal distribution
-    quant = np.quantile(field, kt)
+    quant = np.quantile(field, clear_frac)
 
     # Find that quantile and cap it
     field_out = np.ones_like(field)
     field_out[field > quant] = 0
 
-    assert (np.isclose(kt, np.sum(field_out) / field.size, rtol=1e-3))
+    # Test to make sure that we're close to matching the desired fraction
+    assert (np.isclose(clear_frac, np.sum(field_out) / field.size, rtol=1e-3))
 
     if plot:
         plt.imshow(field_out, extent=(0, field.shape[1], 0, field.shape[0]))
@@ -147,26 +162,39 @@ def _clip_field(field, kt=0.5, plot=False):
 
     return field_out
 
-def _find_edges(size, plot=False):
-    edges = np.abs(sobel(out_field))
+def _find_edges(base_mask, size, plot=False):
+    """
+    Find the edges of the field using a sobel filter and then smooth it with a
+    Parameters
+    ----------
+    size
+    plot
+
+    Returns
+    -------
+
+    """
+
+    # This gets us roughly 50% overlapping with mask and 50% outside
+    edges = np.abs(sobel(base_mask))
     smoothed = uniform_filter(edges, size=size)
 
     # We want to binarize it
     smoothed[smoothed < 1e-5] = 0  # Zero out the small floating point values
     # Calculate a threshold based on quantile, because otherwise we get the whole clouds
     baseline = np.quantile(smoothed[smoothed>0], 0.5)
-    smoothed = smoothed > baseline
+    smoothed_binary = smoothed > baseline
 
     if plot:
         # Compare the edges and uniform filtered edges side by side
         fig, axs = plt.subplots(1, 2, figsize=(10, 5))
         axs[0].imshow(edges, extent=(0, ysiz, 0, xsiz))
         axs[0].set_title('Edges')
-        axs[1].imshow(smoothed, extent=(0, ysiz, 0, xsiz))
+        axs[1].imshow(smoothed_binary, extent=(0, ysiz, 0, xsiz))
         axs[1].set_title('Uniform Filtered Edges')
         plt.show()
 
-    return edges, smoothed
+    return edges, smoothed_binary
 
 def shift_mean_lave(field, ktmean, max_overshoot=1.4, ktmin=0.2, min_quant=0.005, max_quant=0.995, plot=True):
 
@@ -186,7 +214,7 @@ def shift_mean_lave(field, ktmean, max_overshoot=1.4, ktmin=0.2, min_quant=0.005
     # ##### Apply multiplier to shift mean to ktmean #####
 
     # Rescale the mean
-    tgtsum = np.prod(np.shape(field_out)) * ktmean  # Mean scaled over whole field
+    tgtsum = field_out.size * ktmean  # Mean scaled over whole field
     diff_sum = tgtsum - np.sum(field_out == 1)  # Shifting to exclude fully clear values
     tgt_mean = diff_sum / np.sum(field_out < 1)  # Recalculating the expected mean of the cloudy-only aareas
     current_cloud_mean = np.mean(field_out[field_out < 1]) # Actual cloud mean
@@ -213,15 +241,18 @@ def shift_mean_lave(field, ktmean, max_overshoot=1.4, ktmin=0.2, min_quant=0.005
     return field_out
 
 
-def lave_scaling_exact(field, ktmean, max_overshoot=1.4, ktmin=0.2, min_quant=0.005, max_quant=0.995, plot=True):
+def lave_scaling_exact(field, clear_mask, edge_mask, ktmean, ktmax=1.4, kt1pct=0.2, max_quant=0.99, plot=True):
 
     # ##### Shift values of kt to range from 0.2 - 1
 
     # Calc the "max" and "min", excluding clear values
-    field_min = np.quantile(field[field < 1], .99)
+    field_max = np.quantile(field[clear_mask == 0], max_quant)
+    print(f"Field Max: {field_max}")
+    print(f"kt1pct: {kt1pct}")
 
-    # Scale it between ktmin and max_overshoot
-    clouds3 = 1 - field*0.8/field_min
+    # Create a flipped version of the distribution that scales between slightly below kt1pct and bascially (1-field_min)
+    # I think the intent here would be to make it vary between kt1pct and 1, but that's not quite what it does.
+    clouds3 = 1 - field*(1-kt1pct)/field_max
 
 
     # # Clip limits to sensible boundaries
@@ -229,49 +260,61 @@ def lave_scaling_exact(field, ktmean, max_overshoot=1.4, ktmin=0.2, min_quant=0.
     clouds3[clouds3 < 0] = 0
 
     # ##### Apply multiplier to shift mean to ktmean #####
-    mn = np.mean(clouds3)
-    minmn = np.min(clouds3)/mn
-    maxmn = np.max(clouds3/mn-minmn)
-
-    ce = 1+ (clouds3/mn-minmn)/maxmn*(1.4-1)
-
-    # Rescale the mean
-    tgtsum = np.prod(np.shape(field_out)) * ktmean  # Mean scaled over whole field
-    diff_sum = tgtsum - np.sum(field_out == 1)  # Shifting to exclude fully clear values
-    tgt_mean = diff_sum / np.sum(field_out < 1)  # Recalculating the expected mean of the cloudy-only aareas
-    current_cloud_mean = np.mean(field_out[field_out < 1]) # Actual cloud mean
+    mean_c3 = np.mean(clouds3)
+    nmin_c3 = np.min(clouds3)/mean_c3
+    nrange_c3 = np.max(clouds3)/mean_c3-nmin_c3
+    ce = 1+ (clouds3/mean_c3-nmin_c3)/nrange_c3*(ktmax-1)
+
+    # Rescale one more time to make the mean of clouds3 match the ktmean from the timeseries
+    cloud_mask = np.bitwise_or(clear_mask>0, edge_mask) == 0  # Where is it neither clear nor edge
+    tgtsum = field.size * ktmean  # Mean scaled over whole field
+    diff_sum = tgtsum - np.sum(clear_mask) - np.sum(ce[np.bitwise_and(edge_mask > 0, clear_mask==0)])  # Shifting target to exclude fully clear values and the cloud enhancement
+    tgt_cloud_mean = diff_sum / np.sum(cloud_mask)  # Find average required in areas where it's neither cloud nor edge
+    current_cloud_mean = np.mean(clouds3[cloud_mask]) # Actual cloud mean
 
     if diff_sum > 0:
-        field_out[field_out!=1] = tgt_mean / current_cloud_mean * field_out[field_out!=1]
+        clouds4 = tgt_cloud_mean / current_cloud_mean * clouds3
+    else:
+        clouds4 = clouds3.copy()
 
-    # print(diff_sum)
-    # print(current_cloud_mean)
-    print(f"Desired Mean: {ktmean}, actual global mean {np.mean(field_out)}.")
+    clouds5 = clouds4.copy()
+
+    # Edges then clear means that the clearsky overrides the edge enhancement
+    clouds5[edge_mask] = ce[edge_mask > 0]
+    clouds5[clear_mask > 0] = 1
+    print(f"Desired Mean: {ktmean}, actual global mean {np.mean(clouds5)}.")
 
 
     if plot:
-        plt.hist(field_out[field_out<1].flatten(), bins=100)
-        plt.show()
+        plt.hist(ce.flatten(), bins=100)
+        plt.hist(clouds3.flatten(), bins=100, alpha=0.5)
+        plt.hist(clouds4.flatten(), bins=100, alpha=0.5)
+        plt.hist(clouds5.flatten(), bins=100, alpha=0.5)
+        plt.hist(field.flatten(), bins=100, alpha=0.5)
+        plt.legend(["Cloud Enhancement", "1st Scaled Cloud Distribution", "2nd Scaled Cloud Distribution", "Fully Remapped Distribution",
+                    "Original Field Distribution"])
 
-        # plot field and field_out side by side
         fig, axs = plt.subplots(1, 2, figsize=(10, 5))
         axs[0].imshow(field, extent=(0, ysiz, 0, xsiz))
         axs[0].set_title('Original Field')
-        axs[1].imshow(field_out, extent=(0, ysiz, 0, xsiz))
+        axs[1].imshow(clouds5, extent=(0, ysiz, 0, xsiz))
         axs[1].set_title('Shifted Field')
         plt.show()
-    return field_out
+
+    return clouds5
 
 
-def get_settings_from_timeseries(kt_ts, plot=True):
+def get_settings_from_timeseries(kt_ts, clear_threshold=0.95, plot=True):
     # Get the mean and standard deviation of the time series
-    ktmean = np.mean(kt_ts)
+    ktmean = np.mean(kt_ts)  # represents mean of kt
     ktstd = np.std(kt_ts)
-    ktmax = np.max(kt_ts)
+    ktmax = np.max(kt_ts)  # represents peak cloud enhancement
     ktmin = np.min(kt_ts)
 
-    # Get the fraction of clear sky
-    frac_clear = np.sum(kt_ts > 0.95) / np.prod(np.shape(kt_ts))
+    kt1pct = np.nanquantile(kt_ts, 0.01)  # represents "lowest" kt
+
+    # Get the fraction of clear sky with a threshold
+    frac_clear = np.sum(kt_ts > clear_threshold) / kt_ts.size
 
     vs = variability_score(kt) * 1e4
 
@@ -291,7 +334,7 @@ def get_settings_from_timeseries(kt_ts, plot=True):
     tmscales = [i+1 for i, _ in enumerate(tmscales[:-1])]
     weights = _calc_wavelet_weights(waves)
 
-    return ktmean, ktstd, ktmin, ktmax, frac_clear, vs, weights, tmscales
+    return ktmean, kt1pct, ktmax, frac_clear, vs, weights, tmscales
 
 
 
@@ -305,23 +348,26 @@ def get_settings_from_timeseries(kt_ts, plot=True):
     twin = pd.date_range('2013-09-08 9:15:00', '2013-09-08 10:15:00', freq='1s', tz='UTC')
     data = pd.read_hdf(datafn, mode="r", key="data")
     data = data[40]
-    plt.plot(data)
-    plt.show()
+    # plt.plot(data)
+    # plt.show()
 
     pos = pd.read_hdf(datafn, mode="r", key="latlon")
     loc = pvlib.location.Location(np.mean(pos['lat']), np.mean(pos['lon']))
     cs_ghi = loc.get_clearsky(data.index, model='simplified_solis')['ghi']
-    cs_ghi = 1000/max(cs_ghi) * cs_ghi
+    cs_ghi = 1000/max(cs_ghi) * cs_ghi  # Rescale (possible scaling on
     kt = pvlib.irradiance.clearsky_index(data, cs_ghi, 2)
 
-    plt.plot(data)
-    plt.plot(cs_ghi)
-    plt.show()
+    # plt.plot(data)
+    # plt.plot(cs_ghi)
+    # plt.show()
+    #
+    # plt.plot(kt)
+    # plt.show()
 
-    plt.plot(kt)
-    plt.show()
+    # plt.hist(kt, bins=100)
+    # plt.show()
 
-    ktmean, ktstd, ktmin, ktmax, frac_clear, vs, weights, scales = get_settings_from_timeseries(kt, plot=False)
+    ktmean, kt1pct, ktmax, frac_clear, vs, weights, scales = get_settings_from_timeseries(kt, plot=False)
 
     print(f"Clear Fraction is: {frac_clear}")
 
@@ -332,21 +378,22 @@ def get_settings_from_timeseries(kt_ts, plot=True):
 
     cfield = stacked_field(vs, (xsiz, ysiz), weights, scales)
 
-    mask_field = stacked_field(vs, (xsiz, ysiz), weights, scales)
-    mask_field = _clip_field(mask_field, frac_clear, plot=False)
+    clear_mask = stacked_field(vs, (xsiz, ysiz), weights, scales)
+    clear_mask = _clip_field(clear_mask, frac_clear, plot=False)  # 0 is cloudy, 1 is clear
+
 
     # Clear Everywhere
     out_field = np.ones_like(cfield)
     # Where it's cloudy, mask in the clouds
-    out_field[mask_field == 0] = cfield[mask_field == 0]
+    out_field[clear_mask == 0] = cfield[clear_mask == 0]
 
     # plt.imshow(out_field, extent=(0, ysiz, 0, xsiz))
     # plt.show()
 
-    edges, smoothed = _find_edges(3)
+    edges, smoothed = _find_edges(clear_mask, 3, plot=False)
 
     # field_final = shift_mean_lave(out_field, ktmean)
-    lave_scaling_exact(out_field, ktmean)
+    field_final = lave_scaling_exact(cfield, clear_mask, smoothed, ktmean, ktmax, kt1pct, plot=False)
 
     plt.plot(field_final[1,:])
     plt.show()