From 4ee1d86b6da1f23f5a55c8ba6f4e872fc6cb59fc Mon Sep 17 00:00:00 2001 From: Jungkyo Jung Date: Thu, 8 Aug 2024 02:55:58 -0700 Subject: [PATCH 1/8] add mgrs_tile_collection_id in input group --- src/dswx_sar/defaults/dswx_s1.yaml | 3 ++ src/dswx_sar/save_mgrs_tiles.py | 55 +++++++++++++++++++++++++----- src/dswx_sar/schemas/dswx_s1.yaml | 3 ++ 3 files changed, 53 insertions(+), 8 deletions(-) diff --git a/src/dswx_sar/defaults/dswx_s1.yaml b/src/dswx_sar/defaults/dswx_s1.yaml index 3c641a4..3825cb3 100644 --- a/src/dswx_sar/defaults/dswx_s1.yaml +++ b/src/dswx_sar/defaults/dswx_s1.yaml @@ -12,6 +12,9 @@ runconfig: # for open water input_file_path: + # Specify the MGRS tile collection ID + input_mgrs_collection_id: + dynamic_ancillary_file_group: # Digital elevation model (Required) dem_file: diff --git a/src/dswx_sar/save_mgrs_tiles.py b/src/dswx_sar/save_mgrs_tiles.py index bb9ed5d..680c00a 100644 --- a/src/dswx_sar/save_mgrs_tiles.py +++ b/src/dswx_sar/save_mgrs_tiles.py @@ -343,7 +343,7 @@ def get_intersecting_mgrs_tiles_list_from_db( MGRS tile collection Returns - ---------- + ------- mgrs_list: list List of intersecting MGRS tiles. most_overlapped : GeoSeries @@ -425,6 +425,32 @@ def get_intersecting_mgrs_tiles_list_from_db( return list(set(mgrs_list)), most_overlapped +def get_mgrs_tiles_list_from_db(mgrs_collection_file, + mgrs_tile_collection_id): + """Retrieve a list of MGRS tiles from a specified MGRS tile collection. + + Parameters + ---------- + mgrs_collection_file : str + Path to the file containing the MGRS tile collection. + This file should be readable by GeoPandas. + mgrs_tile_collection_id : str + The ID of the MGRS tile collection from which to retrieve the MGRS tiles. + + Returns + ------- + mgrs_list : list + List of MGRS tile identifiers from the specified collection. + """ + vector_gdf = gpd.read_file(mgrs_collection_file) + most_overlapped = vector_gdf[ + vector_gdf['mgrs_set_id'] == mgrs_tile_collection_id].iloc[0] + print(most_overlapped) + mgrs_list = ast.literal_eval(most_overlapped['mgrs_tiles']) + + return list(set(mgrs_list)), most_overlapped + + def get_intersecting_mgrs_tiles_list(image_tif: str): """Find and return a list of MGRS tiles that intersect a reference GeoTIFF file. @@ -435,7 +461,7 @@ def get_intersecting_mgrs_tiles_list(image_tif: str): Path to the input GeoTIFF file. Returns - ---------- + ------- mgrs_list: list List of intersecting MGRS tiles. """ @@ -555,6 +581,8 @@ def run(cfg): cross_pol = processing_cfg.crosspol input_list = cfg.groups.input_file_group.input_file_path + input_mgrs_collection_id = \ + cfg.groups.input_file_group.input_mgrs_collection_id dswx_workflow = processing_cfg.dswx_workflow hand_mask_value = processing_cfg.hand.mask_value @@ -955,12 +983,23 @@ def run(cfg): mgrs_meta_dict = {} if database_bool: - mgrs_tile_list, most_overlapped = \ - get_intersecting_mgrs_tiles_list_from_db( - mgrs_collection_file=mgrs_collection_db_path, - image_tif=paths['final_water'], - track_number=track_number - ) + # In the case that mgrs_tile_collection_id is given + # from input, then extract the MGRS list from database + if input_mgrs_collection_id is not None: + mgrs_tile_list, most_overlapped = \ + get_mgrs_tiles_list_from_db( + mgrs_collection_file=mgrs_collection_db_path, + mgrs_tile_collection_id=input_mgrs_collection_id) + # In the case that mgrs_tile_collection_id is not given + # from input, then extract the MGRS list from database + # using track number and area intersecting with image_tif + else: + mgrs_tile_list, most_overlapped = \ + get_intersecting_mgrs_tiles_list_from_db( + mgrs_collection_file=mgrs_collection_db_path, + image_tif=paths['final_water'], + track_number=track_number + ) maximum_burst = most_overlapped['number_of_bursts'] # convert string to list expected_burst_list = ast.literal_eval(most_overlapped['bursts']) diff --git a/src/dswx_sar/schemas/dswx_s1.yaml b/src/dswx_sar/schemas/dswx_s1.yaml index 4c6e05c..4f504d4 100644 --- a/src/dswx_sar/schemas/dswx_s1.yaml +++ b/src/dswx_sar/schemas/dswx_s1.yaml @@ -9,6 +9,9 @@ runconfig: # REQUIRED - list of RTC products (directory or files) input_file_path: list(str(), min=1) + # Specify the MGRS tile collection ID + input_mgrs_collection_id: str(required=False) + dynamic_ancillary_file_group: # Digital elevation model dem_file: str(required=True) From a211647a1be333baefe24e0d0388f40fc51f2e0b Mon Sep 17 00:00:00 2001 From: Jungkyo Jung Date: Thu, 15 Aug 2024 19:37:36 -0700 Subject: [PATCH 2/8] refine track number and add one step checking burst_list --- src/dswx_sar/save_mgrs_tiles.py | 81 +++++++++++++++++++++------------ 1 file changed, 53 insertions(+), 28 deletions(-) diff --git a/src/dswx_sar/save_mgrs_tiles.py b/src/dswx_sar/save_mgrs_tiles.py index 680c00a..f7f50d3 100644 --- a/src/dswx_sar/save_mgrs_tiles.py +++ b/src/dswx_sar/save_mgrs_tiles.py @@ -7,6 +7,7 @@ import os import time +from collections import Counter import geopandas as gpd import mgrs import numpy as np @@ -327,7 +328,8 @@ def crop_and_save_mgrs_tile( def get_intersecting_mgrs_tiles_list_from_db( image_tif, mgrs_collection_file, - track_number=None): + track_number=None, + burst_list=None): """Find and return a list of MGRS tiles that intersect a reference GeoTIFF file By searching in database @@ -341,6 +343,8 @@ def get_intersecting_mgrs_tiles_list_from_db( track_number : int, optional Track number (or relative orbit number) to specify MGRS tile collection + burst_list : list, optional + List of burst IDs to filter the MGRS tiles. Returns ------- @@ -349,10 +353,40 @@ def get_intersecting_mgrs_tiles_list_from_db( most_overlapped : GeoSeries The record of the MGRS tile with the maximum overlap area. """ + vector_gdf = gpd.read_file(mgrs_collection_file) + # Step 1: Filter by burst_list if provided + if burst_list is not None: + def burst_overlap(row): + row_bursts = ast.literal_eval(row['bursts']) + return len(set(burst_list).intersection(set(row_bursts))) + + vector_gdf['burst_overlap_count'] = vector_gdf.apply(burst_overlap, axis=1) + max_burst_overlap = vector_gdf['burst_overlap_count'].max() + vector_gdf = vector_gdf[vector_gdf['burst_overlap_count'] == max_burst_overlap] + + # If only one record matches, return it immediately + if len(vector_gdf) == 1: + logger.info('MGRS collection ID found from burst_list', vector_gdf['burst_overlap_count'] ) + mgrs_list = ast.literal_eval(vector_gdf.iloc[0]['mgrs_tiles']) + return list(set(mgrs_list)), vector_gdf.iloc[0] + + # Step 2: Filter by track_number, track_number + 1, and track_number - 1 if provided + if track_number is not None: + valid_track_numbers = [track_number, track_number + 1, track_number - 1] + vector_gdf = vector_gdf[ + vector_gdf['relative_orbit_number'].isin(valid_track_numbers) + ].to_crs("EPSG:4326") + + # If only one record matches, return it immediately + if len(vector_gdf) == 1: + mgrs_list = ast.literal_eval(vector_gdf.iloc[0]['mgrs_tiles']) + return list(set(mgrs_list)), vector_gdf.iloc[0] + else: + vector_gdf = vector_gdf.to_crs("EPSG:4326") + # Load the raster data with rasterio.open(image_tif) as src: epsg_code = src.crs.to_epsg() or 4326 - # Get bounds of the raster data left, bottom, right, top = src.bounds # Reproject to EPSG 4326 if the current EPSG is not 4326 @@ -371,7 +405,6 @@ def get_intersecting_mgrs_tiles_list_from_db( logger.info('The mosaic image crosses the antimeridian.') # Create a GeoDataFrame from the raster polygon if antimeridian_crossing_flag: - # Create a Polygon from the bounds raster_polygon_left = Polygon( [(left, bottom), (left, top), @@ -387,7 +420,6 @@ def get_intersecting_mgrs_tiles_list_from_db( raster_polygon_right], crs=4326) else: - # Create a Polygon from the bounds raster_polygon = Polygon( [(left, bottom), (left, top), @@ -397,18 +429,6 @@ def get_intersecting_mgrs_tiles_list_from_db( geometry=[raster_polygon], crs=4326) - # Load the vector data - vector_gdf = gpd.read_file(mgrs_collection_file) - - # If track number is given, then search MGRS tile collection with - # track number - if track_number is not None: - vector_gdf = vector_gdf[ - vector_gdf['relative_orbit_number'] == - track_number].to_crs("EPSG:4326") - else: - vector_gdf = vector_gdf.to_crs("EPSG:4326") - # Calculate the intersection intersection = gpd.overlay(raster_gdf, vector_gdf, @@ -416,11 +436,10 @@ def get_intersecting_mgrs_tiles_list_from_db( # Add a new column with the intersection area intersection['Area'] = intersection.to_crs(epsg=epsg_code).geometry.area + sorted_intersection = intersection.sort_values(by='Area', ascending=False) - # Find the polygon with the maximum intersection area - most_overlapped = intersection.loc[intersection['Area'].idxmax()] - - mgrs_list = ast.literal_eval(most_overlapped['mgrs_tiles']) + most_overlapped = sorted_intersection.iloc[0] if len(sorted_intersection) > 0 else None + mgrs_list = ast.literal_eval(most_overlapped['mgrs_tiles']) if most_overlapped is not None else [] return list(set(mgrs_list)), most_overlapped @@ -432,7 +451,7 @@ def get_mgrs_tiles_list_from_db(mgrs_collection_file, Parameters ---------- mgrs_collection_file : str - Path to the file containing the MGRS tile collection. + Path to the file containing the MGRS tile collection. This file should be readable by GeoPandas. mgrs_tile_collection_id : str The ID of the MGRS tile collection from which to retrieve the MGRS tiles. @@ -445,8 +464,7 @@ def get_mgrs_tiles_list_from_db(mgrs_collection_file, vector_gdf = gpd.read_file(mgrs_collection_file) most_overlapped = vector_gdf[ vector_gdf['mgrs_set_id'] == mgrs_tile_collection_id].iloc[0] - print(most_overlapped) - mgrs_list = ast.literal_eval(most_overlapped['mgrs_tiles']) + mgrs_list = ast.literal_eval(most_overlapped['mgrs_tiles']) return list(set(mgrs_list)), most_overlapped @@ -634,6 +652,7 @@ def run(cfg): logger.info(f'Number of bursts to process: {num_input_path}') date_str_list = [] + track_number_list = [] for input_dir in input_list: # Find HDF5 metadata metadata_path_iter = glob.iglob(f'{input_dir}/*{co_pol}*.tif') @@ -645,6 +664,10 @@ def run(cfg): track_number = int(tags['TRACK_NUMBER']) resolution = src.transform[0] date_str_list.append(date_str) + track_number_list.append(track_number) + counter = Counter(np.array(track_number_list)) + most_common = counter.most_common() + track_number = most_common[0][0] input_date_format = "%Y-%m-%dT%H:%M:%S" output_date_format = "%Y%m%dT%H%M%SZ" @@ -983,14 +1006,16 @@ def run(cfg): mgrs_meta_dict = {} if database_bool: - # In the case that mgrs_tile_collection_id is given + actual_burst_id = collect_burst_id(input_list, + DSWX_S1_POL_DICT['CO_POL']) + # In the case that mgrs_tile_collection_id is given # from input, then extract the MGRS list from database if input_mgrs_collection_id is not None: mgrs_tile_list, most_overlapped = \ get_mgrs_tiles_list_from_db( mgrs_collection_file=mgrs_collection_db_path, mgrs_tile_collection_id=input_mgrs_collection_id) - # In the case that mgrs_tile_collection_id is not given + # In the case that mgrs_tile_collection_id is not given # from input, then extract the MGRS list from database # using track number and area intersecting with image_tif else: @@ -998,14 +1023,14 @@ def run(cfg): get_intersecting_mgrs_tiles_list_from_db( mgrs_collection_file=mgrs_collection_db_path, image_tif=paths['final_water'], - track_number=track_number + track_number=track_number, + burst_list=actual_burst_id ) + track_number = most_overlapped['relative_orbit_number'] maximum_burst = most_overlapped['number_of_bursts'] # convert string to list expected_burst_list = ast.literal_eval(most_overlapped['bursts']) logger.info(f"Input RTCs are within {most_overlapped['mgrs_set_id']}") - actual_burst_id = collect_burst_id(input_list, - DSWX_S1_POL_DICT['CO_POL']) number_burst = len(actual_burst_id) mgrs_meta_dict['MGRS_COLLECTION_EXPECTED_NUMBER_OF_BURSTS'] = \ maximum_burst From 8e33c3ae73df7a9437f5ddc4872d75b3bdf3d483 Mon Sep 17 00:00:00 2001 From: Jungkyo Jung Date: Fri, 16 Aug 2024 11:10:47 -0700 Subject: [PATCH 3/8] version update --- README.md | 2 +- build_docker.sh | 2 +- docker/Dockerfile | 4 ++-- src/dswx_sar/defaults/algorithm_parameter_s1.yaml | 2 +- src/dswx_sar/save_mgrs_tiles.py | 2 +- src/dswx_sar/version.py | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index eef2ac0..3a10702 100644 --- a/README.md +++ b/README.md @@ -63,7 +63,7 @@ Then, from inside the cloned repository, build the Docker image: Load the Docker container image onto your computer: ```bash -docker load -i docker/dockerimg_dswx_s1_final_1.0.tar +docker load -i docker/dockerimg_dswx_s1_final_patch_1.1.tar ``` See DSWx-SAR Science Algorithm Software (SAS) User Guide for instructions on processing via Docker. diff --git a/build_docker.sh b/build_docker.sh index e88d3a7..e94acda 100644 --- a/build_docker.sh +++ b/build_docker.sh @@ -1,7 +1,7 @@ #!/bin/bash IMAGE=opera/dswx-s1 -tag=final_1.0 +tag=final_patch_1.1 echo "IMAGE is $IMAGE:$tag" # fail on any non-zero exit codes diff --git a/docker/Dockerfile b/docker/Dockerfile index 32e917d..25e1222 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,8 +1,8 @@ FROM oraclelinux:8 LABEL author="OPERA ADT" \ - description="DSWX-SAR final release R4" \ - version="1.0-final" + description="DSWX-SAR final patch release R4.1" \ + version="1.1-final-patch" RUN yum -y update &&\ yum -y install curl &&\ diff --git a/src/dswx_sar/defaults/algorithm_parameter_s1.yaml b/src/dswx_sar/defaults/algorithm_parameter_s1.yaml index ac46128..32ca3d8 100644 --- a/src/dswx_sar/defaults/algorithm_parameter_s1.yaml +++ b/src/dswx_sar/defaults/algorithm_parameter_s1.yaml @@ -198,7 +198,7 @@ runconfig: number_cpu: 1 refine_with_bimodality: - minimum_pixel: 4 + minimum_pixel: 40 lines_per_block: 500 number_cpu: 1 thresholds: diff --git a/src/dswx_sar/save_mgrs_tiles.py b/src/dswx_sar/save_mgrs_tiles.py index f7f50d3..7bf8183 100644 --- a/src/dswx_sar/save_mgrs_tiles.py +++ b/src/dswx_sar/save_mgrs_tiles.py @@ -366,7 +366,7 @@ def burst_overlap(row): # If only one record matches, return it immediately if len(vector_gdf) == 1: - logger.info('MGRS collection ID found from burst_list', vector_gdf['burst_overlap_count'] ) + logger.info(f"MGRS collection ID found from burst_list {vector_gdf}") mgrs_list = ast.literal_eval(vector_gdf.iloc[0]['mgrs_tiles']) return list(set(mgrs_list)), vector_gdf.iloc[0] diff --git a/src/dswx_sar/version.py b/src/dswx_sar/version.py index a9fe560..a57ff2b 100644 --- a/src/dswx_sar/version.py +++ b/src/dswx_sar/version.py @@ -1 +1 @@ -VERSION = '1.0' +VERSION = '1.1' From ef870756b8785313955d36321d32dc5ba12c2200 Mon Sep 17 00:00:00 2001 From: Bo Huang Date: Mon, 7 Oct 2024 13:22:34 -0700 Subject: [PATCH 4/8] Updated interface to determine NiSAR mode of operation. --- src/dswx_sar/dswx_ni_runconfig.py | 170 ++++++++++++++++++++++++++++-- 1 file changed, 161 insertions(+), 9 deletions(-) diff --git a/src/dswx_sar/dswx_ni_runconfig.py b/src/dswx_sar/dswx_ni_runconfig.py index 0207aa4..4b2500e 100644 --- a/src/dswx_sar/dswx_ni_runconfig.py +++ b/src/dswx_sar/dswx_ni_runconfig.py @@ -6,6 +6,7 @@ import sys import logging from types import SimpleNamespace +import numpy as np import yamale from ruamel.yaml import YAML @@ -33,6 +34,18 @@ 'SH_POL': ['HH'], } +# 2nd dictionary is for single frame only +DSWX_NI_SINGLE_FRAME_POL_DICT = { + 'SH_POL': ['HH'], + 'SV_POL': ['VV'], + 'DH_POL': ['HH', 'HV'], + 'DV_POL': ['VV', 'VH'], + 'CO_POL': ['HH', 'VV'], + 'CROSS_POL': ['HV', 'VH'], + 'QP_POL': ['HH', 'VV', 'HV', 'VH'], + 'DV_SH_POL': ['VV', 'VH', 'HH'], + 'DH_SV_POL': ['HH', 'HV', 'VV'], + } def _get_parser(): parser = argparse.ArgumentParser( @@ -190,19 +203,137 @@ def check_file_path(file_path: str) -> None: raise FileNotFoundError(err_str) -def get_pol_rtc_hdf5(path): +def get_pol_rtc_hdf5(input_rtc, freq_group): # convenience function to get polarization from RTC file path # basename separates file name from directory in path string # splitext removes the file extension from basename # split('_')[-1] gets polarization - path_pol = '/science/LSAR/GCOV/grids/frequencyA/listOfPolarizations' - with h5py.File(path) as src: + path_pol = f'/science/LSAR/GCOV/grids/frequency{freq_group}/listOfPolarizations' + + with h5py.File(input_rtc) as src: pols = src[path_pol][()] pols = [pol.decode('utf-8') for pol in pols] return pols +def get_freq_rtc_hdf5(input_rtc): + path_freq = f'/science/LSAR/identification/listOfFrequencies' + + with h5py.File(input_rtc) as src_h5: + freq_group_list = src_h5[path_freq][()] + freq = [freq_group.decode('utf-8') for freq_group in freq_group_list] + + return freq + + +def check_rtc_frequency(input_h5_list): + """Read Frequency group information from input RTC file(s) + + Parameters + ---------- + input_h5_list : list of strings + input RTC files in a list + + Returns + ------- + freq_list: list + List of frequency group(s) in each of the files + e.g. A, B, or A and B + flag_freq_equal: bool + Flag that indicates whether the frequency groups are equal + among input files + """ + + num_input_files = len(input_h5_list) + freq_list = np.empty(num_input_files , dtype=object) + flag_pol_equal = True + + for input_idx, input_h5 in enumerate(input_h5_list): + freq_list[input_idx] = get_freq_rtc_hdf5(input_h5) + + for idx in range(num_input_files - 1): + if freq_list[idx] == freq_list[idx + 1]: + flag_freq_equal = True + else: + # Frequency groups between frames are different. + flag_freq_equal = False + break + + return flag_freq_equal, freq_list + + +def read_rtc_polarization(input_h5_list, freq_list): + num_input_files = len(input_h5_list) + pol_list = np.empty((num_input_files, 2) , dtype=object) + + for input_idx, input_h5 in enumerate(input_h5_list): + # Check to see if frequency group of an input file is empty + if freq_list[input_idx]: + for freq_idx, freq_group in enumerate(freq_list[input_idx]): + pol_list[input_idx, freq_idx] = get_pol_rtc_hdf5(input_h5, freq_group) + + return pol_list + + +def compare_rtc_polarization(pol_list): + num_input_files = len(pol_list) + + pol_freq_a = pol_list[:, 0] + pol_freq_b = pol_list[:, 1] + + pol_freq_a_first = pol_freq_a[0] + pol_freq_b_first = pol_freq_b[0] + + flag_pol_freq_a_equal = True + flag_pol_freq_b_equal = True + + # Compare Frequency A of all inputs + for pol in pol_freq_a: + if isinstance(pol_freq_a_first, list) and isinstance(pol, list): + if sorted(pol_freq_a_first) != sorted(pol): + flag_pol_freq_a_equal = False + break + elif pol_freq_a_first is None and pol is not None: + flag_pol_freq_a_equal = False + break + elif pol_freq_a_first is None and pol is None: + continue + + # Compare Frequency B of all inputs + for pol in pol_freq_b: + if isinstance(pol_freq_b_first, list) and isinstance(pol, list): + if sorted(pol_freq_b_first) != sorted(pol): + flag_pol_freq_b_equal = False + break + elif pol_freq_b_first is None and pol is not None: + flag_pol_freq_b_equal = False + break + elif pol_freq_b_first is None and pol is None: + continue + + return flag_pol_freq_a_equal, flag_pol_freq_b_equal + + +def verify_nisar_mode(input_dir_list): + # Extract Frequency Groups of input files + flag_freq_equal, freq_list = check_rtc_frequency(input_dir_list) + + # Extract polarizations of each frequency group of the input files + pol_list = read_rtc_polarization(input_dir_list, freq_list) + + # Compare polariztions of frequency groups among input files + flag_pol_freq_a_equal, flag_pol_freq_b_equal = compare_rtc_polarization(pol_list) + + # Determine NiSAR input RTC mode of operation + if flag_freq_equal and flag_pol_freq_a_equal and flag_pol_freq_b_equal: + nisar_uni_mode = True + else: + nisar_uni_mode = False + + return flag_freq_equal, flag_pol_freq_a_equal, flag_pol_freq_b_equal, nisar_uni_mode + + def _find_polarization_from_data_dirs(input_h5_list): """ This function walks through each directory in the given list of input directories, @@ -224,9 +355,20 @@ def _find_polarization_from_data_dirs(input_h5_list): This list contains the polarization identifiers (like 'HH', 'VV', etc.) found in the filenames. """ - extracted_strings = [] - for input_h5 in input_h5_list: - extracted_strings += get_pol_rtc_hdf5(input_h5) + num_input_rtc = len(input_h5_list) + + for input_idx, input_h5 in enumerate(input_h5_list): + extracted_strings = [] + freq_strings = get_freq_rtc_hdf5(input_h5) + + for freq_idx, input_freq in enumerate(freq_strings): + extracted_strings += get_pol_rtc_hdf5(input_h5, input_freq) + + if input_idx == 0: + pol_list_all_freq = extracted_strings.copy() + else: + if extracted_strings != pol_list_all_freq: + break # if nothing found raise error if not extracted_strings: @@ -237,7 +379,7 @@ def _find_polarization_from_data_dirs(input_h5_list): return list(set(extracted_strings)) -def check_polarizations(pol_list, input_dir_list): +def check_polarizations(pol_list, input_dir_list, DSWX_NI_POL_DICT): """ Sort polarizations so that co-polarizations are preceded. This function identifies the common polarizations between the requested polarizations @@ -432,8 +574,10 @@ def load_from_yaml(cls, yaml_path: str, workflow_name: str, args): cfg['runconfig']['groups']['input_file_group']['input_file_path'] requested_pol = algorithm_cfg[ 'runconfig']['processing']['polarizations'] + + DSWX_NI_POL_DICT = DSWX_NI_SINGLE_FRAME_POL_DICT co_pol, cross_pol, pol_list, pol_mode = check_polarizations( - requested_pol, input_dir_list) + requested_pol, input_dir_list, DSWX_NI_POL_DICT) # update the polarizations algorithm_cfg['runconfig']['processing']['polarizations'] = pol_list @@ -460,6 +604,14 @@ def load_from_yaml(cls, yaml_path: str, workflow_name: str, args): f' has precedence over runconfig visualization "{debug_mode}"') sns.processing.debug_mode = args.debug_mode + # Determine NISAR input RTC mode of operation + ( + flag_freq_equal, + flag_pol_freq_a_equal, + flag_pol_freq_b_equal, + nisar_uni_mode + ) = verify_nisar_mode(input_dir_list) + log_file = sns.log_file if args.log_file is not None: logger.warning( @@ -512,4 +664,4 @@ def as_dict(self): def to_yaml(self): self_as_dict = self.as_dict() yaml = YAML(typ='safe') - yaml.dump(self_as_dict, sys.stdout, indent=4) \ No newline at end of file + yaml.dump(self_as_dict, sys.stdout, indent=4) From e16194c52dabd19e16a0a45fe50379927a8a646c Mon Sep 17 00:00:00 2001 From: Jungkyo Jung Date: Wed, 9 Oct 2024 19:45:35 -0700 Subject: [PATCH 5/8] fix bugs --- src/dswx_sar/dswx_ni_runconfig.py | 8 +++++++- src/dswx_sar/refine_with_bimodality.py | 6 ++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/dswx_sar/dswx_ni_runconfig.py b/src/dswx_sar/dswx_ni_runconfig.py index 4b2500e..c140655 100644 --- a/src/dswx_sar/dswx_ni_runconfig.py +++ b/src/dswx_sar/dswx_ni_runconfig.py @@ -244,8 +244,14 @@ def check_rtc_frequency(input_h5_list): Flag that indicates whether the frequency groups are equal among input files """ - + print(input_h5_list) num_input_files = len(input_h5_list) + + # Handle the case when there is only one file + if num_input_files == 1: + freq_list = [get_freq_rtc_hdf5(input_h5_list[0])] + return True, freq_list # If only one file, frequencies are trivially equal + freq_list = np.empty(num_input_files , dtype=object) flag_pol_equal = True diff --git a/src/dswx_sar/refine_with_bimodality.py b/src/dswx_sar/refine_with_bimodality.py index dff2c99..d6b3a81 100644 --- a/src/dswx_sar/refine_with_bimodality.py +++ b/src/dswx_sar/refine_with_bimodality.py @@ -631,7 +631,7 @@ def process_dark_land_component(args): water_label = water_label_block[ y_off-startline:y_off+height-startline, x_off:x_off+width] - + print(np.shape(bands), 'band', pol_ind) if bands.ndim == 2: bands = bands[np.newaxis, :, :] # Identify out of boundary areas. @@ -664,7 +664,7 @@ def process_dark_land_component(args): mask_buffer = ndimage.binary_dilation(watermask == 1, iterations=margin, mask=out_boundary) - single_band = bands[pol_ind, ...] + single_band = bands[0, ...] # compute median value for polygons intensity_center = np.nanmedian(single_band[watermask == 1]) @@ -926,6 +926,8 @@ def remove_false_water_bimodality_parallel(water_mask_path, intensity_block = dswx_sar_util.get_raster_block( input_dict['intensity'], block_param) + print(input_dict['intensity']) + print(np.shape(intensity_block)) refland_block = dswx_sar_util.get_raster_block( input_dict['ref_land'], block_param) From 84dbfb25e40e2d18c6c1fcada3474cc4f963714c Mon Sep 17 00:00:00 2001 From: Jungkyo Jung Date: Wed, 9 Oct 2024 19:48:05 -0700 Subject: [PATCH 6/8] remove print --- src/dswx_sar/dswx_ni_runconfig.py | 1 - src/dswx_sar/refine_with_bimodality.py | 4 +--- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/dswx_sar/dswx_ni_runconfig.py b/src/dswx_sar/dswx_ni_runconfig.py index c140655..ba6549c 100644 --- a/src/dswx_sar/dswx_ni_runconfig.py +++ b/src/dswx_sar/dswx_ni_runconfig.py @@ -244,7 +244,6 @@ def check_rtc_frequency(input_h5_list): Flag that indicates whether the frequency groups are equal among input files """ - print(input_h5_list) num_input_files = len(input_h5_list) # Handle the case when there is only one file diff --git a/src/dswx_sar/refine_with_bimodality.py b/src/dswx_sar/refine_with_bimodality.py index d6b3a81..5e28022 100644 --- a/src/dswx_sar/refine_with_bimodality.py +++ b/src/dswx_sar/refine_with_bimodality.py @@ -631,7 +631,6 @@ def process_dark_land_component(args): water_label = water_label_block[ y_off-startline:y_off+height-startline, x_off:x_off+width] - print(np.shape(bands), 'band', pol_ind) if bands.ndim == 2: bands = bands[np.newaxis, :, :] # Identify out of boundary areas. @@ -926,8 +925,7 @@ def remove_false_water_bimodality_parallel(water_mask_path, intensity_block = dswx_sar_util.get_raster_block( input_dict['intensity'], block_param) - print(input_dict['intensity']) - print(np.shape(intensity_block)) + refland_block = dswx_sar_util.get_raster_block( input_dict['ref_land'], block_param) From 568694deaac39ab8dbb49eb1e6a3bfe052dfe0bb Mon Sep 17 00:00:00 2001 From: Bo Huang Date: Thu, 24 Oct 2024 11:17:52 -0700 Subject: [PATCH 7/8] Added bandwidth flag, reconciled new and existing functions. --- src/dswx_sar/dswx_ni_runconfig.py | 93 ++++++++++++++++++++++++++++--- 1 file changed, 85 insertions(+), 8 deletions(-) diff --git a/src/dswx_sar/dswx_ni_runconfig.py b/src/dswx_sar/dswx_ni_runconfig.py index ba6549c..b1d68ae 100644 --- a/src/dswx_sar/dswx_ni_runconfig.py +++ b/src/dswx_sar/dswx_ni_runconfig.py @@ -273,11 +273,12 @@ def read_rtc_polarization(input_h5_list, freq_list): pol_list = np.empty((num_input_files, 2) , dtype=object) for input_idx, input_h5 in enumerate(input_h5_list): + extracted_strings = [] # Check to see if frequency group of an input file is empty if freq_list[input_idx]: for freq_idx, freq_group in enumerate(freq_list[input_idx]): pol_list[input_idx, freq_idx] = get_pol_rtc_hdf5(input_h5, freq_group) - + return pol_list @@ -319,6 +320,31 @@ def compare_rtc_polarization(pol_list): return flag_pol_freq_a_equal, flag_pol_freq_b_equal +def compare_rtc_bandwidth(bandwidth_list): + num_input_files = len(bandwidth_list) + bw_freq_a = bandwidth_list[:, 0] + bw_freq_b = bandwidth_list[:, 1] + + if np.all(bw_freq_a == bw_freq_a[0]): + flag_bw_freq_a_equal = True + max_bw_freq_a = bw_freq_a[0] + else: + flag_bw_freq_a_equal = False + + # Need to identify highest and lowest bandwidth (resolution) + max_bw_freq_a = np.max(bw_freq_a) + + if np.all(bw_freq_b == bw_freq_b[0]): + flag_bw_freq_b_equal = True + max_bw_freq_b = bw_freq_b[0] + else: + flag_bw_freq_b_equal = False + + # Need to identify highest and lowest bandwidth (resolution) + max_bw_freq_b = np.max(bw_freq_b) + + + return flag_bw_freq_a_equal, flag_bw_freq_b_equal, max_bw_freq_a, max_bw_freq_b def verify_nisar_mode(input_dir_list): # Extract Frequency Groups of input files @@ -338,6 +364,13 @@ def verify_nisar_mode(input_dir_list): return flag_freq_equal, flag_pol_freq_a_equal, flag_pol_freq_b_equal, nisar_uni_mode +def find_unique_polarizations(pol_list): + pol_list_flatten = pol_list.copy() + pol_list_flatten = np.concatenate(pol_list_flatten.ravel()).ravel() + pol_list_unique = set(pol_list_flatten) + + return pol_list_unique + def _find_polarization_from_data_dirs(input_h5_list): """ @@ -361,20 +394,14 @@ def _find_polarization_from_data_dirs(input_h5_list): (like 'HH', 'VV', etc.) found in the filenames. """ num_input_rtc = len(input_h5_list) + extracted_strings = [] for input_idx, input_h5 in enumerate(input_h5_list): - extracted_strings = [] freq_strings = get_freq_rtc_hdf5(input_h5) for freq_idx, input_freq in enumerate(freq_strings): extracted_strings += get_pol_rtc_hdf5(input_h5, input_freq) - if input_idx == 0: - pol_list_all_freq = extracted_strings.copy() - else: - if extracted_strings != pol_list_all_freq: - break - # if nothing found raise error if not extracted_strings: err_str = 'Failed to find polarizations from RTC files.' @@ -397,6 +424,8 @@ def check_polarizations(pol_list, input_dir_list, DSWX_NI_POL_DICT): List of polarizations. input_dir_list : list List of the input directories with RTC GeoTIFF files. + DSWX_NI_POL_DICT: dictionary + Dictionary which captures possible polarization scenarios for NISAR RTC inputs Returns ------- @@ -459,6 +488,44 @@ def custom_sort(pol): return co_pol_list, cross_pol_list, sorted_pol_list, pol_mode +def extract_bandwidth(input_dir_list): + """ + This function extracts bandwidth of each frequency group of each input RTC. + + Parameters + ---------- + input_dir_list : list + List of the input RTC files. + + Returns + ------- + bandwidth_list + List of bandwidth from both frequency A and B of all input RTC files. + """ + + num_input_files = len(input_dir_list) + freq_path_list = '/science/LSAR/identification/listOfFrequencies' + bandwidth_list = np.zeros((num_input_files, 2), dtype=float) + + for input_idx, input_rtc in enumerate(input_dir_list): + # Check if the file exists + if not os.path.exists(input_rtc): + raise FileNotFoundError( + f"The file '{input_rtc}' does not exist.") + + with h5py.File(input_rtc, 'r') as src_h5: + freq_group_list = src_h5[freq_path_list][()] + freq_group_list = [freq_group.decode('utf-8') for freq_group in freq_group_list] + + for freq_idx, freq_group in enumerate(freq_group_list): + bw_path = f'/science/LSAR/GCOV/metadata/sourceData/swaths/frequency{freq_group}/rangeBandwidth' + bandwidth = float(src_h5[bw_path][()]) + + bandwidth_list[input_idx, freq_idx] = bandwidth + + return bandwidth_list + + def validate_group_dict(group_cfg: dict) -> None: """Check and validate runconfig entries. Parameters @@ -584,6 +651,16 @@ def load_from_yaml(cls, yaml_path: str, workflow_name: str, args): co_pol, cross_pol, pol_list, pol_mode = check_polarizations( requested_pol, input_dir_list, DSWX_NI_POL_DICT) + # Extract bandwidth from input RTC + bandwidth_list = extract_bandwidth(input_dir_list) + + ( + flag_bw_freq_a_equal, + flag_bw_freq_b_equal, + max_bw_freq_a, + max_bw_freq_b + ) = compare_rtc_bandwidth(bandwidth_list) + # update the polarizations algorithm_cfg['runconfig']['processing']['polarizations'] = pol_list algorithm_cfg[ From 6014cddc7ed1d089ab861aab1110d3b001b818b7 Mon Sep 17 00:00:00 2001 From: Bo Huang Date: Thu, 24 Oct 2024 15:10:03 -0700 Subject: [PATCH 8/8] Updated extract_bandwidth function. --- src/dswx_sar/dswx_ni_runconfig.py | 44 ++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/src/dswx_sar/dswx_ni_runconfig.py b/src/dswx_sar/dswx_ni_runconfig.py index b1d68ae..4a04589 100644 --- a/src/dswx_sar/dswx_ni_runconfig.py +++ b/src/dswx_sar/dswx_ni_runconfig.py @@ -327,24 +327,34 @@ def compare_rtc_bandwidth(bandwidth_list): if np.all(bw_freq_a == bw_freq_a[0]): flag_bw_freq_a_equal = True - max_bw_freq_a = bw_freq_a[0] + min_bw_freq_a = bw_freq_a[0] else: flag_bw_freq_a_equal = False # Need to identify highest and lowest bandwidth (resolution) - max_bw_freq_a = np.max(bw_freq_a) + bw_freq_a_valid = bw_freq_a > 5 + + if bw_freq_a_valid.size > 0: + min_bw_freq_a = np.min(bw_freq_a_valid) + else: + min_bw_freq_a = bw_freq_a[0] if np.all(bw_freq_b == bw_freq_b[0]): flag_bw_freq_b_equal = True - max_bw_freq_b = bw_freq_b[0] + min_bw_freq_b = bw_freq_b[0] else: flag_bw_freq_b_equal = False # Need to identify highest and lowest bandwidth (resolution) - max_bw_freq_b = np.max(bw_freq_b) + bw_freq_b_valid = bw_freq_b > 5 + + if bw_freq_b_valid.size > 0: + min_bw_freq_b = np.min(bw_freq_b_valid) + else: + min_bw_freq_b = bw_freq_b[0] - return flag_bw_freq_a_equal, flag_bw_freq_b_equal, max_bw_freq_a, max_bw_freq_b + return flag_bw_freq_a_equal, flag_bw_freq_b_equal, min_bw_freq_a, min_bw_freq_b def verify_nisar_mode(input_dir_list): # Extract Frequency Groups of input files @@ -651,16 +661,6 @@ def load_from_yaml(cls, yaml_path: str, workflow_name: str, args): co_pol, cross_pol, pol_list, pol_mode = check_polarizations( requested_pol, input_dir_list, DSWX_NI_POL_DICT) - # Extract bandwidth from input RTC - bandwidth_list = extract_bandwidth(input_dir_list) - - ( - flag_bw_freq_a_equal, - flag_bw_freq_b_equal, - max_bw_freq_a, - max_bw_freq_b - ) = compare_rtc_bandwidth(bandwidth_list) - # update the polarizations algorithm_cfg['runconfig']['processing']['polarizations'] = pol_list algorithm_cfg[ @@ -694,6 +694,20 @@ def load_from_yaml(cls, yaml_path: str, workflow_name: str, args): nisar_uni_mode ) = verify_nisar_mode(input_dir_list) + # Update NiSAR processing mode + algorithm_cfg[ + 'runconfig']['processing']['nisar_uni_mode'] = nisar_uni_mode + + # Extract bandwidth from input RTC + bandwidth_list = extract_bandwidth(input_dir_list) + + ( + flag_bw_freq_a_equal, + flag_bw_freq_b_equal, + min_bw_freq_a, + min_bw_freq_b + ) = compare_rtc_bandwidth(bandwidth_list) + log_file = sns.log_file if args.log_file is not None: logger.warning(