Delivery 1.3.1 updates (#92)

cassiestuurman · Rui Wei · Cassie Stuurman · GitHub Enterprise · commit 22b4fa0dfc61 · 2024-08-27T11:09:58.000-07:00
* Compute and output node-level WSE uncertainties from Bayes reconst

* Fix wse and slope random uncertainties/unit conversions

* Move unc scaling factors into b and c vectors

* Keep degraded area pixels that overlie prior water mask

* Move prior water check up to pixel mask

* Fix logic for finding degraded_and_no_prior_water

* Add w_opt and w_opt_r_u to river product

* Output wse_opt only for populated nodes

* Fix reconst random uncert comment

* Update area flg to use all pixels

* Do not merge non-contiguous land-near-water pix into dominant label

* Consider degraded pixels for area flag

* Change default scaling to 5 instead of 20 if ext_dist is missing

* Reduce initial pixassgn search width to 1/3 max width

* KCV-465_area_wse_fix (#91)

Add the computation of the total area of pixels that were used in the WSE calculation for area_wse

* Add specular ringing frac to product

* put &gt;0 inside the parentheses for is_area_suspect mask

* Pull updated reach plotters from pix subset

* Use reach mask when computing reach width

* Do not change degraded class qual to suspect where there's prior water

* Do not publish sring_frac or w_opt to product

* Check that mask has any good to avoid warning

---------

Co-authored-by: Rui Wei &lt;rui.wei@jpl.nasa.gov&gt;
Co-authored-by: Cassie Stuurman &lt;stuurman@frode.jpl.nasa.gov&gt;
diff --git a/src/RiverObs/RiverNode.py b/src/RiverObs/RiverNode.py
@@ -203,8 +203,11 @@ def percentile(self, var, q, goodvar='good'):
     def sum(self, var, goodvar='good'):
         """Return the sum of all variable values (e.g., for area)."""
         good = getattr(self, goodvar)
-        Sum = np.sum(getattr(self, var)[good])
-        return Sum
+        if good.any():
+            Sum = np.sum(getattr(self, var)[good])
+            return Sum
+        else:
+            return 0
 
     def cdf(self, var, goodvar='good'):
         """Get the cdf for a variable."""
diff --git a/src/RiverObs/RiverObs.py b/src/RiverObs/RiverObs.py
@@ -160,8 +160,8 @@ def flag_out_channel_and_label(
         that are unconnected to the dominant label but within the original
         max_dist threshold.
 
-        If no extreme distance value is given it will use a very wide threshold
-        of 20x the input max_width for all nodes.
+        If no extreme distance value is given it will use a threshold of 5x
+        the input max_width for all nodes.
 
         Parameters
         ----------
@@ -201,17 +201,18 @@ def flag_out_channel_and_label(
         # Find the dominant label and include pixels up to the extreme dist
         self.dominant_label = None
         if seg_label is not None and self.in_channel.any():
-            class_mask = np.logical_and(self.in_channel, seg_label > 0)
-            if class_mask.any():
-                # find the largest (i.e. dominant) label within the
-                # max_distance bounds
+            # remove unattached land-near-water pixels
+            self.in_channel = np.logical_and(self.in_channel, seg_label > 0)
+            # find the largest (i.e. dominant) label within the
+            # max_distance bounds
+            if self.in_channel.any():
                 try:
                     dominant_label = scipy.stats.mode(
-                        seg_label[class_mask], keepdims=False)[0]
+                        seg_label[self.in_channel], keepdims=False)[0]
                 except TypeError:
                     # Try previous syntax if TypeError raised (older scipy)
                     dominant_label = scipy.stats.mode(
-                        seg_label[class_mask])[0][0]
+                        seg_label[self.in_channel])[0][0]
 
                 self.dominant_label = dominant_label
 
@@ -229,7 +230,7 @@ def flag_out_channel_and_label(
 
                         # search for segment in this node which has smallest
                         # absolute value of n coordinate
-                        min_n_seg_label, min_n = -1, 9999999999999
+                        min_n_seg_label, min_n = 0, 9999999999999
                         for this_seg_label in np.unique(
                                 seg_label[this_node_mask]):
 
@@ -244,7 +245,8 @@ def flag_out_channel_and_label(
 
                         # merge seg label of segment with min abs n coordinate
                         # with dominant label
-                        if min_n_seg_label != -1:
+                        if min_n_seg_label != 0 \
+                                and min_n_seg_label != dominant_label:
                             seg_label[seg_label == min_n_seg_label] = (
                                 dominant_label)
 
@@ -258,9 +260,6 @@ def flag_out_channel_and_label(
                                        dst0 <= extreme_dist,
                                        abs(self.n) <= extreme_dist)))
 
-            else:
-                self.in_channel = class_mask
-
         self.index = self.index[self.in_channel]
         self.d = self.d[self.in_channel]
         self.x = self.x[self.in_channel]
@@ -291,16 +290,19 @@ def get_ext_dist_threshold(self, max_width, ext_dist_coef):
         lakes
         """
         if np.iterable(max_width):
-            max_distance = max_width[self.index] / 2.
+            max_distance = max_width[self.index] / 3.  # was 2
         else:
-            max_distance = max_width / 2.
+            max_distance = max_width / 3.  # was 2
 
         node_spacing = abs(self.ds[self.index])
         if ext_dist_coef is None:
-            scale_factor = 20.0
+            scale_factor = 5.0
         else:
             scale_factor = ext_dist_coef[self.index]
-        extreme_dist = scale_factor * np.maximum(node_spacing, max_distance)
+
+        extreme_dist = scale_factor * np.maximum(
+            node_spacing, max_distance * 3/2
+        )  # includes 3/2 to make ext_dist operate on river half-width
         return max_distance, extreme_dist
 
     def flag_out_channel(self, max_width):
@@ -309,9 +311,9 @@ def flag_out_channel(self, max_width):
         and remove the points from the list of observations.
         """
         if np.iterable(max_width):
-            max_distance = max_width[self.index] / 2.
+            max_distance = max_width[self.index] / 3.  # was 2
         else:
-            max_distance = max_width / 2.
+            max_distance = max_width / 3.  # was 2
 
         self.in_channel = np.abs(self.n) <= max_distance
 
@@ -485,8 +487,8 @@ def get_node_agg(
         """
         outputs = {key: [] for key in [
             'h', 'h_std', 'h_u', 'lat_u', 'lon_u', 'area', 'area_u',
-            'area_det', 'area_det_u', 'width_area', 'width_area_u', 'sig0',
-            'sig0_u', 'sig0_std']}
+            'area_det', 'area_det_u', 'area_of_ht', 'area_of_ht_u',
+            'width_area', 'width_area_u', 'sig0', 'sig0_u', 'sig0_std']}
 
         for node in self.all_nodes:
             if node in self.populated_nodes:
@@ -502,6 +504,10 @@ def get_node_agg(
                     river_node.area_with_uncert(
                         method=area_method, goodvar=goodvar_area)
 
+                area_of_ht, _, area_of_ht_u, _, _, _ = \
+                    river_node.area_with_uncert(
+                        method = area_method, goodvar = goodvar_wse)
+
                 local_vars = locals()
                 for key in outputs:
                     value = local_vars[key]
diff --git a/src/SWOTRiver/SWOTRiverEstimator.py b/src/SWOTRiver/SWOTRiverEstimator.py
@@ -420,6 +420,19 @@ def __init__(self,
             mask, class_qual_area_bad & classification_qual > 0,
             geo_qual_wse_bad & geolocation_qual > 0])
 
+        # Remove degraded class_qual where there is no prior water
+        PIXC_CLASS_QUAL_NO_PRIOR = 2**2
+        degraded_class_mask = class_qual_area_degraded \
+                              & classification_qual > 0
+        no_prior_water_mask = classification_qual \
+                              & PIXC_CLASS_QUAL_NO_PRIOR > 0
+        degraded_and_no_prior_water = degraded_class_mask & no_prior_water_mask
+        degraded_and_prior_water = degraded_class_mask & ~no_prior_water_mask
+        # uncomment to make make degraded class_qual good anywhere there is
+        # prior water
+        # degraded_class_mask[degraded_and_prior_water] = False
+        mask = np.logical_or(mask, degraded_and_no_prior_water)
+
         # HACK in avoidance of bad water_frac values
         water_frac = self.get(fractional_inundation_kwd)
         mask = np.logical_or(mask, water_frac.mask)
@@ -429,7 +442,6 @@ def __init__(self,
             mask = np.logical_or(mask, bright_land_flag > 0)
 
         # skip NaNs in dheight_dphase
-
         good = ~mask
         if good.sum() == 0:
             LOGGER.warning("No usable pixels found in input PIXC file")
@@ -491,15 +503,18 @@ def __init__(self,
         # later in node aggregation.
         PIXC_GEO_QUAL_XOVR_SUSPECT = 2**6
         PIXC_GEO_QUAL_XOVR_BAD = 2**23
+        PIXC_CLASS_QUAL_RINGING = 2**19
 
         if self.classification_qual is not None:
-            self.is_area_degraded = (
-                class_qual_area_degraded & self.classification_qual > 0)
+            self.is_area_degraded = degraded_class_mask[good]
             self.is_area_suspect = (
-                class_qual_area_suspect & self.classification_qual > 0)
+                    class_qual_area_suspect & self.classification_qual > 0)
+            self.sring_flg = (
+                self.classification_qual & PIXC_CLASS_QUAL_RINGING) > 0
         else:
             self.is_area_degraded = np.zeros(self.lat.shape, dtype='bool')
             self.is_area_suspect = np.zeros(self.lat.shape, dtype='bool')
+            self.sring_flg = np.zeros(self.lat.shape, dtype='bool')
 
         if self.geolocation_qual is not None:
             self.is_wse_degraded = (
@@ -1265,7 +1280,7 @@ def process_node(self,
         # self.river_obs.
         dsets = [
             'h_noise', 'h_flg', 'wse_class_flg', 'area_flg', 'sig0_flg', 'lon',
-            'lat', 'inundated_area', 'klass', 'pixel_area',
+            'lat', 'inundated_area', 'klass', 'pixel_area', 'sring_flg',
             'xtrack', 'sig0', 'sig0_uncert', 'water_frac', 'water_frac_uncert',
             'ifgram', 'power1', 'power2', 'phase_noise_std', 'dh_dphi',
             'dlat_dphi', 'dlon_dphi', 'num_rare_looks', 'num_med_looks',
@@ -1398,6 +1413,10 @@ def process_node(self,
                 self.river_obs.get_node_stat(
                     'sum', 'inundated_area', goodvar='good')
                 )[~mask_good_sus_area]
+            # compute the area of all pixels flagged specular ringing
+            area_sring = np.asarray(
+                self.river_obs.get_node_stat('sum', 'inundated_area',
+                goodvar='sring_flg'))
 
         # area of pixels used to compute heights
         with warnings.catch_warnings():
@@ -1461,7 +1480,7 @@ def process_node(self,
                 area_u = node_aggs['area_u']
                 area_det = node_aggs['area_det']
                 area_det_u = node_aggs['area_det_u']
-                area_of_ht = node_aggs['area']
+                area_of_ht = node_aggs['area_of_ht']
 
                 # Use degraded pix as well when not enough good/suspect pix
                 width_area[~mask_good_sus_area] = node_aggs_w_degraded[
@@ -1477,7 +1496,7 @@ def process_node(self,
                 area_det_u[~mask_good_sus_area] = node_aggs_w_degraded[
                     'area_det_u'][~mask_good_sus_area]
                 area_of_ht[~mask_good_sus_area] = node_aggs_w_degraded[
-                    'area'][~mask_good_sus_area]
+                    'area_of_ht'][~mask_good_sus_area]
 
             if self.height_agg_method != 'orig':
                 wse = node_aggs['h']
@@ -1494,8 +1513,6 @@ def process_node(self,
                 wse_r_u[~mask_good_sus_wse] = node_aggs_w_degraded[
                     'h_u'][~mask_good_sus_wse]
 
-                area_of_ht = area
-
         # geoid heights and tide corrections weighted by height uncertainty
         try:
             geoid_hght = np.asarray(
@@ -1624,6 +1641,9 @@ def process_node(self,
         dark_frac = MISSING_VALUE_FLT * np.ones(area.shape)
         dark_frac[area > 0] = 1 - area_det[area > 0] / area[area > 0]
         dark_frac[np.logical_and(dark_frac > 1, area > 0)] = 1  # clip to <= 1
+        sring_frac = MISSING_VALUE_FLT * np.ones(area.shape)
+        sring_frac[area > 0] = area_sring[area > 0] / area[area > 0]
+        sring_frac[np.logical_and(sring_frac > 1, area > 0)] = 1
 
         # Compute flow direction relative to along-track
         tangent = self.river_obs.centerline.tangent[
@@ -1677,6 +1697,12 @@ def process_node(self,
         n_pix_area_degraded = np.array(self.river_obs.get_node_stat(
             'sum', 'is_area_degraded'))
 
+        # create empty arrays for the reconst WSE & uncertainty (for later)
+        w_opt = np.ones(wse.shape,
+                        dtype=np.float64) * self.river_obs.missing_value
+        w_opt_r_u = np.ones(wse.shape,
+                        dtype=np.float64) * self.river_obs.missing_value
+
         # Create node_q_b quality bitwise flag
         node_q_b = np.zeros(lat_median.shape, dtype='i4')
 
@@ -1822,9 +1848,12 @@ def process_node(self,
             'area_det': area_det.astype('float64'),
             'area_det_u': area_det_u.astype('float64'),
             'area_of_ht': area_of_ht.astype('float64'),
+            'area_sring': area_sring.astype('float64'),
             'wse': wse.astype('float64'),
             'wse_std': wse_std.astype('float64'),
             'wse_r_u': wse_r_u.astype('float64'),
+            'w_opt': w_opt.astype('float64'),
+            'w_opt_r_u': w_opt_r_u.astype('float64'),
             'nobs': nobs.astype('int32'),
             'nobs_h': nobs_h.astype('int32'),
             'n_good_pix': n_pix_wse.astype('int32'),
@@ -1842,6 +1871,7 @@ def process_node(self,
             'pole_tide': pole_tide.astype('float64'),
             'node_blocked': is_blocked.astype('uint8'),
             'dark_frac': dark_frac,
+            'sring_frac': sring_frac,
             'x_prior': x_prior.astype('float64'),
             'y_prior': y_prior.astype('float64'),
             'lon_prior': lon_prior.astype('float64'),
@@ -1939,9 +1969,17 @@ def process_reach(self, river_reach_collection, river_reach, reach,
 
         reach_stats['area_of_ht'] = np.sum(river_reach.area_of_ht)
 
-        reach_stats['width'] = np.sum(river_reach.area)/reach_stats['length']
-        reach_stats['width_u'] = np.sqrt(np.sum(
-            river_reach.area_u**2)) / reach_stats['length']
+        pct_unmasked = 100*np.sum(river_reach.mask) / len(river_reach.mask)
+        if pct_unmasked >= self.reach_pct_good_sus_thresh:
+            reach_stats['width'] = np.sum(river_reach.area[river_reach.mask])\
+                                   /np.sum(river_reach.p_length[river_reach.mask])
+            reach_stats['width_u'] = np.sqrt(
+                np.sum(river_reach.area_u[river_reach.mask]**2)) / np.sum(
+                river_reach.p_length[river_reach.mask])
+        else:
+            reach_stats['width'] = reach_stats['area'] / reach_stats['length']
+            reach_stats['width_u'] = np.sqrt(
+                np.sum(river_reach.area_u**2))/ reach_stats['length']
         reach_stats['layovr_val'] = np.sqrt(np.sum(
             river_reach.layovr_val[river_reach.mask]**2))
 
@@ -2020,20 +2058,28 @@ def process_reach(self, river_reach_collection, river_reach, reach,
                     hh_opt, wse_r_u_opt, mask_opt, ss_opt = \
                         hh, wse_r_u, mask, ss
                 # get the optimal reconstruction (Bayes estimate)
-                wse_opt, height_u, slope_u = self.optimal_reconstruct(
-                    river_reach_collection,
-                    river_reach, reach_id,
-                    ss_opt, hh_opt,
-                    wse_r_u_opt, mask_opt,
-                    min_fit_points,
-                    method='Bayes',
-                )
+                wse_opt, wse_opt_r_u, height_u, slope_u,  = \
+                    self.optimal_reconstruct(
+                        river_reach_collection,
+                        river_reach, reach_id,
+                        ss_opt, hh_opt,
+                        wse_r_u_opt, mask_opt,
+                        min_fit_points,
+                        method='Bayes',
+                    )
+                # Store the reconstructed node heights in river reach object.
+                # Currently, only store populated nodes (node_indx clipped to
+                # nodes with minobs observations as specified by L2_HR_Param
+                # file).
+                river_reach.w_opt = wse_opt[self.river_obs.populated_nodes]
+                river_reach.w_opt_r_u = wse_opt_r_u[
+                    self.river_obs.populated_nodes]
                 # Use reconstruction height and slope for reach outputs
                 dx = ss_opt[0] - ss_opt[-1]  # along-reach dist
                 reach_stats['slope'] = (wse_opt[0] - wse_opt[-1]) / dx
                 reach_stats['height'] = np.mean(wse_opt)
-                reach_stats['slope_r_u'] = slope_u * 0.0001    # m/m
-                reach_stats['height_r_u'] = height_u * 0.01    # m
+                reach_stats['slope_r_u'] = slope_u
+                reach_stats['height_r_u'] = height_u
                 reach_stats['slope_u'] = np.sqrt(
                     SLOPE_SYS_UNCERT**2 + reach_stats['slope_r_u']**2)
                 reach_stats['height_u'] = np.sqrt(
@@ -2101,6 +2147,9 @@ def process_reach(self, river_reach_collection, river_reach, reach,
             dark_frac = (
                 1-np.sum(river_reach.area_det)/np.sum(river_reach.area))
             reach_stats['dark_frac'] = min(dark_frac, 1)  # clip to <= 1
+            sring_frac = (
+                np.sum(river_reach.area_sring)/np.sum(river_reach.area))
+            reach_stats['sring_frac'] = min(sring_frac, 1)  # clip to <= 1
 
         reach_stats['n_reach_up'] = (reach_stats['rch_id_up'] > 0).sum()
         reach_stats['n_reach_dn'] = (reach_stats['rch_id_dn'] > 0).sum()
@@ -2529,7 +2578,12 @@ def get_reach_mask(self, ss, hh, ww, node_q, min_fit_points,
             mask = np.zeros(len(hh), dtype=bool)
 
         wse_outlier_mask = mask.copy()
-        pct_good_sus = 100 * (node_q[mask] < 2).sum() / mask.sum()
+
+        if mask.sum() > 0:
+            pct_good_sus = 100 * (node_q[mask] < 2).sum() / mask.sum()
+        else:
+            pct_good_sus = 0
+
         if pct_good_sus > self.reach_pct_good_sus_thresh:
             mask[node_q >= 2] = False
 
@@ -2904,9 +2958,11 @@ def optimal_reconstruct(
         # define vectors b and c for uncertainty estimates later
         this_reach_mask_b = np.zeros_like(ss)
         this_reach_mask_b[first_node:first_node+this_len] = 1
+        this_reach_mask_b = this_reach_mask_b / np.sum(this_reach_mask_b)
         first_and_last_node_c = np.zeros_like(ss)
-        first_and_last_node_c[first_node] = -1
-        first_and_last_node_c[first_node+this_len-1] = 1
+        reach_length = ss[first_node + this_len - 1] - ss[first_node]
+        first_and_last_node_c[first_node] = -1 / reach_length
+        first_and_last_node_c[first_node+this_len-1] = 1 / reach_length
 
         # create a wse prior if flagged
         if self.prior_wse_method == 'fit':
@@ -2983,15 +3039,15 @@ def optimal_reconstruct(
         wse_out0 = np.matmul(K, wse_reg)
         # apply the prior term
         wse_out = wse_out0 + np.matmul(K_bar, prior_wse)
-        height_u = this_reach_mask_b @ A_inv @ np.atleast_2d(
-            this_reach_mask_b).T
-        slope_u = first_and_last_node_c @ A_inv @ np.atleast_2d(
-            first_and_last_node_c).T
+        wse_out_std = np.sqrt(np.diag(A_inv))  # node-level height uncertainty
+        height_u = np.sqrt(this_reach_mask_b @ A_inv @ this_reach_mask_b.T)
+        slope_u = np.sqrt(first_and_last_node_c @ A_inv @ np.atleast_2d(
+            first_and_last_node_c).T)
         if self.use_multiple_reaches:
             wse_out = wse_out[first_node:first_node+this_len]
-
-        # Return height_u and slope_u as scalars
-        return wse_out, height_u.item(), slope_u.item()
+            wse_out_std = wse_out_std[first_node:first_node+this_len]
+        # Return node WSEs & stdevs, and reach height_u and slope_u as scalars
+        return wse_out, wse_out_std, height_u.item(), slope_u.item()
 
     @staticmethod
     def compute_bayes_estimator(Ry, Rv, H):
diff --git a/src/SWOTRiver/products/rivertile.py b/src/SWOTRiver/products/rivertile.py