From bd443a48c20ca01d1457ae0fbe6d8f90b23e3139 Mon Sep 17 00:00:00 2001 From: ctuguinay Date: Tue, 27 Aug 2024 20:58:23 +0000 Subject: [PATCH 01/11] init commit --- echopype/convert/set_groups_ek80.py | 3 +++ echopype/tests/convert/test_convert_ek80.py | 21 +++++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/echopype/convert/set_groups_ek80.py b/echopype/convert/set_groups_ek80.py index fdf2b86f3..57fe682e6 100644 --- a/echopype/convert/set_groups_ek80.py +++ b/echopype/convert/set_groups_ek80.py @@ -1145,6 +1145,9 @@ def set_beam(self) -> List[xr.Dataset]: ds_data = self._attach_vars_to_ds_data(ds_data, ch, rs_size=ds_data.range_sample.size) + # Drop any duplicate ping times + ds_data = ds_data.drop_duplicates(dim="ping_time") + if ch in self.sorted_channel["complex"]: ds_complex.append(ds_data) else: diff --git a/echopype/tests/convert/test_convert_ek80.py b/echopype/tests/convert/test_convert_ek80.py index b9bc7bdd3..2d37485fe 100644 --- a/echopype/tests/convert/test_convert_ek80.py +++ b/echopype/tests/convert/test_convert_ek80.py @@ -482,3 +482,24 @@ def test_parse_mru0_mru1(ek80_path): ] for mru_var_name in mru_var_names: assert not np.any(np.isnan(echodata["Platform"][mru_var_name])) + + +@pytest.mark.unit +@pytest.mark.parametrize( + "raw_path", + [ + ("echopype/test_data/ek80_duplicate_ping_times/Hake-D20210913-T130612.raw"), + ("echopype/test_data/ek80_duplicate_ping_times/Hake-D20210913-T225435.raw"), + ] +) +def test_duplicate_ping_times(raw_path): + """ + Tests that RAW files that contain duplicate ping times can be parsed. + """ + # Open RAW + ed = open_raw(raw_path,sonar_model="EK80") + + # Check that there are no ping time duplicates in Beam group + assert ed["Sonar/Beam_group1"].equals( + ed["Sonar/Beam_group1"].drop_duplicates(dim="ping_time") + ) From 0f21dbcbbbd142d0fd91fbd1a6c24343f556e392 Mon Sep 17 00:00:00 2001 From: ctuguinay Date: Tue, 31 Dec 2024 00:23:45 +0000 Subject: [PATCH 02/11] revert change to fix merge conflict --- echopype/tests/convert/test_convert_ek80.py | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/echopype/tests/convert/test_convert_ek80.py b/echopype/tests/convert/test_convert_ek80.py index 2d37485fe..b9bc7bdd3 100644 --- a/echopype/tests/convert/test_convert_ek80.py +++ b/echopype/tests/convert/test_convert_ek80.py @@ -482,24 +482,3 @@ def test_parse_mru0_mru1(ek80_path): ] for mru_var_name in mru_var_names: assert not np.any(np.isnan(echodata["Platform"][mru_var_name])) - - -@pytest.mark.unit -@pytest.mark.parametrize( - "raw_path", - [ - ("echopype/test_data/ek80_duplicate_ping_times/Hake-D20210913-T130612.raw"), - ("echopype/test_data/ek80_duplicate_ping_times/Hake-D20210913-T225435.raw"), - ] -) -def test_duplicate_ping_times(raw_path): - """ - Tests that RAW files that contain duplicate ping times can be parsed. - """ - # Open RAW - ed = open_raw(raw_path,sonar_model="EK80") - - # Check that there are no ping time duplicates in Beam group - assert ed["Sonar/Beam_group1"].equals( - ed["Sonar/Beam_group1"].drop_duplicates(dim="ping_time") - ) From f283eea89411e2aa214fdb6856887c587df3f2db Mon Sep 17 00:00:00 2001 From: ctuguinay Date: Tue, 31 Dec 2024 01:01:23 +0000 Subject: [PATCH 03/11] test only one file --- echopype/tests/convert/test_convert_ek80.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/echopype/tests/convert/test_convert_ek80.py b/echopype/tests/convert/test_convert_ek80.py index c84116c68..e92bfcd29 100644 --- a/echopype/tests/convert/test_convert_ek80.py +++ b/echopype/tests/convert/test_convert_ek80.py @@ -531,3 +531,15 @@ def test_parse_ek80_with_invalid_env_datagrams(): env_var = ed["Environment"][var] assert env_var.notnull().all() and env_var.dtype == np.float64 + +@pytest.mark.unit +def test_duplicate_ping_times(): + """ + Tests that RAW file with duplicate ping times can be parsed. + """ + # Open RAW + ed = open_raw("echopype/test_data/ek80_duplicate_ping_times/Hake-D20210913-T225435.raw", sonar_model="EK80") + # Check that there are no ping time duplicates in Beam group + assert ed["Sonar/Beam_group1"].equals( + ed["Sonar/Beam_group1"].drop_duplicates(dim="ping_time") + ) From 4db27e9d2693042d53679384e91657636e0fa0b3 Mon Sep 17 00:00:00 2001 From: ctuguinay Date: Tue, 31 Dec 2024 01:13:57 +0000 Subject: [PATCH 04/11] use other file --- echopype/tests/convert/test_convert_ek80.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/echopype/tests/convert/test_convert_ek80.py b/echopype/tests/convert/test_convert_ek80.py index e92bfcd29..5090be774 100644 --- a/echopype/tests/convert/test_convert_ek80.py +++ b/echopype/tests/convert/test_convert_ek80.py @@ -538,7 +538,7 @@ def test_duplicate_ping_times(): Tests that RAW file with duplicate ping times can be parsed. """ # Open RAW - ed = open_raw("echopype/test_data/ek80_duplicate_ping_times/Hake-D20210913-T225435.raw", sonar_model="EK80") + ed = open_raw("echopype/test_data/ek80_duplicate_ping_times/Hake-D20210913-T130612.raw", sonar_model="EK80") # Check that there are no ping time duplicates in Beam group assert ed["Sonar/Beam_group1"].equals( ed["Sonar/Beam_group1"].drop_duplicates(dim="ping_time") From aa8c74133a3d2b8b1da1a39ec617fc1d5395d601 Mon Sep 17 00:00:00 2001 From: ctuguinay Date: Tue, 21 Jan 2025 18:20:22 +0000 Subject: [PATCH 05/11] move test duplicate to test convert ek --- echopype/tests/convert/test_convert_ek.py | 12 ++++++++++++ echopype/tests/convert/test_convert_ek80.py | 13 ------------- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/echopype/tests/convert/test_convert_ek.py b/echopype/tests/convert/test_convert_ek.py index b73163021..b609a49c1 100644 --- a/echopype/tests/convert/test_convert_ek.py +++ b/echopype/tests/convert/test_convert_ek.py @@ -208,3 +208,15 @@ def test_pad_short_complex_pings(): expected_output, equal_nan=True ) + +@pytest.mark.unit +def test_duplicate_ping_times(): + """ + Tests that RAW file with duplicate ping times can be parsed. + """ + # Open RAW + ed = open_raw("echopype/test_data/ek80_duplicate_ping_times/Hake-D20210913-T130612.raw", sonar_model="EK80") + # Check that there are no ping time duplicates in Beam group + assert ed["Sonar/Beam_group1"].equals( + ed["Sonar/Beam_group1"].drop_duplicates(dim="ping_time") + ) diff --git a/echopype/tests/convert/test_convert_ek80.py b/echopype/tests/convert/test_convert_ek80.py index 5090be774..e89494daa 100644 --- a/echopype/tests/convert/test_convert_ek80.py +++ b/echopype/tests/convert/test_convert_ek80.py @@ -530,16 +530,3 @@ def test_parse_ek80_with_invalid_env_datagrams(): for var in ["acidity", "depth", "salinity", "temperature", "sound_speed_indicative"]: env_var = ed["Environment"][var] assert env_var.notnull().all() and env_var.dtype == np.float64 - - -@pytest.mark.unit -def test_duplicate_ping_times(): - """ - Tests that RAW file with duplicate ping times can be parsed. - """ - # Open RAW - ed = open_raw("echopype/test_data/ek80_duplicate_ping_times/Hake-D20210913-T130612.raw", sonar_model="EK80") - # Check that there are no ping time duplicates in Beam group - assert ed["Sonar/Beam_group1"].equals( - ed["Sonar/Beam_group1"].drop_duplicates(dim="ping_time") - ) From 527d49ca54b9fa1f43a5c1b08ed17b9b851f6901 Mon Sep 17 00:00:00 2001 From: ctuguinay Date: Tue, 21 Jan 2025 18:22:30 +0000 Subject: [PATCH 06/11] add extra line --- echopype/tests/convert/test_convert_ek80.py | 1 + 1 file changed, 1 insertion(+) diff --git a/echopype/tests/convert/test_convert_ek80.py b/echopype/tests/convert/test_convert_ek80.py index e89494daa..c84116c68 100644 --- a/echopype/tests/convert/test_convert_ek80.py +++ b/echopype/tests/convert/test_convert_ek80.py @@ -530,3 +530,4 @@ def test_parse_ek80_with_invalid_env_datagrams(): for var in ["acidity", "depth", "salinity", "temperature", "sound_speed_indicative"]: env_var = ed["Environment"][var] assert env_var.notnull().all() and env_var.dtype == np.float64 + From 12286016ef394991c0048745c40945d916fc5abd Mon Sep 17 00:00:00 2001 From: ctuguinay Date: Tue, 21 Jan 2025 18:24:45 +0000 Subject: [PATCH 07/11] move test back to ek80 convert --- echopype/tests/convert/test_convert_ek.py | 12 ------------ echopype/tests/convert/test_convert_ek80.py | 14 ++++++++++++++ 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/echopype/tests/convert/test_convert_ek.py b/echopype/tests/convert/test_convert_ek.py index b609a49c1..b73163021 100644 --- a/echopype/tests/convert/test_convert_ek.py +++ b/echopype/tests/convert/test_convert_ek.py @@ -208,15 +208,3 @@ def test_pad_short_complex_pings(): expected_output, equal_nan=True ) - -@pytest.mark.unit -def test_duplicate_ping_times(): - """ - Tests that RAW file with duplicate ping times can be parsed. - """ - # Open RAW - ed = open_raw("echopype/test_data/ek80_duplicate_ping_times/Hake-D20210913-T130612.raw", sonar_model="EK80") - # Check that there are no ping time duplicates in Beam group - assert ed["Sonar/Beam_group1"].equals( - ed["Sonar/Beam_group1"].drop_duplicates(dim="ping_time") - ) diff --git a/echopype/tests/convert/test_convert_ek80.py b/echopype/tests/convert/test_convert_ek80.py index c84116c68..25291619d 100644 --- a/echopype/tests/convert/test_convert_ek80.py +++ b/echopype/tests/convert/test_convert_ek80.py @@ -513,6 +513,20 @@ def test_parse_missing_sound_velocity_profile(): shutil.rmtree(save_path) +@pytest.mark.unit +def test_duplicate_ping_times(): + """ + Tests that RAW file with duplicate ping times can be parsed. + """ + # Open RAW + ed = open_raw("echopype/test_data/ek80_duplicate_ping_times/Hake-D20210913-T130612.raw", sonar_model="EK80") + # Check that there are no ping time duplicates in Beam group + assert ed["Sonar/Beam_group1"].equals( + ed["Sonar/Beam_group1"].drop_duplicates(dim="ping_time") + ) + + + @pytest.mark.unit def test_parse_ek80_with_invalid_env_datagrams(): """ From 2ac44c201f91d240854cc91e5d68ef0edf3622bc Mon Sep 17 00:00:00 2001 From: ctuguinay Date: Tue, 28 Jan 2025 02:22:29 +0000 Subject: [PATCH 08/11] pin zarr and add check unique ping time duplicates and tests --- echopype/convert/set_groups_ek80.py | 13 +++++- echopype/convert/utils/ek_duplicates.py | 44 ++++++++++++++++++ echopype/tests/convert/test_convert_ek80.py | 49 ++++++++++++++++++++- requirements.txt | 2 +- 4 files changed, 103 insertions(+), 5 deletions(-) create mode 100644 echopype/convert/utils/ek_duplicates.py diff --git a/echopype/convert/set_groups_ek80.py b/echopype/convert/set_groups_ek80.py index 66799e91a..b8dead05c 100644 --- a/echopype/convert/set_groups_ek80.py +++ b/echopype/convert/set_groups_ek80.py @@ -8,6 +8,7 @@ from ..utils.coding import set_time_encodings from ..utils.log import _init_logger from .set_groups_base import SetGroupsBase +from .utils.ek_duplicates import check_unique_ping_time_duplicates logger = _init_logger(__name__) @@ -1145,8 +1146,16 @@ def set_beam(self) -> List[xr.Dataset]: ds_data = self._attach_vars_to_ds_data(ds_data, ch, rs_size=ds_data.range_sample.size) - # Drop any duplicate ping times - ds_data = ds_data.drop_duplicates(dim="ping_time") + # Access the 'ping_time' coordinate as a NumPy array + ping_times = ds_data["ping_time"].values + + # Check if ping time duplicates exist + if len(ping_times) > len(np.unique(ping_times)): + # Check for unique ping time duplicates and if they are not unique, raise warning. + check_unique_ping_time_duplicates(ds_data, logger) + + # Drop duplicates + ds_data = ds_data.drop_duplicates(dim="ping_time") if ch in self.sorted_channel["complex"]: ds_complex.append(ds_data) diff --git a/echopype/convert/utils/ek_duplicates.py b/echopype/convert/utils/ek_duplicates.py new file mode 100644 index 000000000..a2c6aafa4 --- /dev/null +++ b/echopype/convert/utils/ek_duplicates.py @@ -0,0 +1,44 @@ +import logging + +import xarray as xr + + +def check_unique_ping_time_duplicates(ds_data: xr.Dataset, logger: logging.Logger) -> None: + """ + Raises a warning if the data stored in duplicate pings is not unique. + + Parameters + ---------- + ds_data : xr.Dataset + Single freq beam dataset being processed in the `SetGroupsEK80.set_beams` class function. + logger : logging.Logger + Warning logger initialized in `SetGroupsEK80` file. + """ + # Group the dataset by the "ping_time" coordinate + groups = ds_data.groupby("ping_time") + + # Loop through each ping_time group + for ping_time_val, group in groups: + # Extract all data variable names to check + data_vars = list(group.data_vars) + + # Use the first duplicate ping time index as a reference + ref_duplicate_ping_time_index = 0 + + # Iterate over each data variable in the group + for var in data_vars: + # Extract data array corresponding to the iterated variable + data_array = group[var] + + # Use the slice corresponding to the reference index as the reference slice + ref_slice = data_array.isel({"ping_time": ref_duplicate_ping_time_index}) + + # Iterate over the remaining entries + for i in range(1, data_array.sizes["ping_time"]): + if not ref_slice.equals(data_array.isel({"ping_time": i})): + logger.warning( + f"Duplicate slices in variable '{var}' corresponding to " + f"ping_time {ping_time_val} differ in data. Data will be lost since we " + "will be dropping all duplicate ping times." + ) + break diff --git a/echopype/tests/convert/test_convert_ek80.py b/echopype/tests/convert/test_convert_ek80.py index 25291619d..c53b84bdd 100644 --- a/echopype/tests/convert/test_convert_ek80.py +++ b/echopype/tests/convert/test_convert_ek80.py @@ -4,11 +4,14 @@ import numpy as np import pandas as pd from scipy.io import loadmat +import xarray as xr from echopype import open_raw, open_converted from echopype.testing import TEST_DATA_FOLDER from echopype.convert.parse_ek80 import ParseEK80 from echopype.convert.set_groups_ek80 import WIDE_BAND_TRANS, PULSE_COMPRESS, FILTER_IMAG, FILTER_REAL, DECIMATION +from echopype.utils import log +from echopype.convert.utils.ek_duplicates import check_unique_ping_time_duplicates @pytest.fixture @@ -514,17 +517,59 @@ def test_parse_missing_sound_velocity_profile(): @pytest.mark.unit -def test_duplicate_ping_times(): +def test_duplicate_ping_times(caplog): """ - Tests that RAW file with duplicate ping times can be parsed. + Tests that RAW file with duplicate ping times can be parsed and that the correct warning has been raised. """ + # Turn on logger verbosity + log.verbose(override=False) + # Open RAW ed = open_raw("echopype/test_data/ek80_duplicate_ping_times/Hake-D20210913-T130612.raw", sonar_model="EK80") + # Check that there are no ping time duplicates in Beam group assert ed["Sonar/Beam_group1"].equals( ed["Sonar/Beam_group1"].drop_duplicates(dim="ping_time") ) + # Check that no warning is logged since the data for all duplicate pings is unique + not_expected_warning = ("Data will be lost since we will be dropping all duplicate ping times.") + assert not any(not_expected_warning in record.message for record in caplog.records) + + # Turn off logger verbosity + log.verbose(override=True) + + +@pytest.mark.unit +def test_check_unique_ping_time_duplicates(caplog): + """ + Checks that `check_unique_ping_time_duplicates` raises a warning when the data for duplicate ping times is not unique. + """ + # Initialize logger + logger = log._init_logger(__name__) + + # Turn on logger verbosity + log.verbose(override=False) + + # Open duplicate ping time beam dataset + ds_data = xr.open_zarr("echopype/test_data/ek80_duplicate_ping_times/duplicate_beam_ds.zarr") + + # Modify a single entry to ensure that there exists duplicate ping times that do not share the same backscatter data + ds_data["backscatter_r"][0,0,0] = 0 + + # Check for ping time duplicates + check_unique_ping_time_duplicates(ds_data, logger) + + # Turn off logger verbosity + log.verbose(override=True) + + # Check if the expected warning is logged + expected_warning = ( + "Duplicate slices in variable 'backscatter_r' corresponding to ping_time " + f"{str(ds_data['ping_time'].values[0])} differ in data. Data will be lost since " + "we will be dropping all duplicate ping times." + ) + assert any(expected_warning in record.message for record in caplog.records) @pytest.mark.unit diff --git a/requirements.txt b/requirements.txt index baef477cd..a89974058 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,7 +9,7 @@ pytz scipy xarray pandas -zarr +zarr>=2,<3 fsspec s3fs requests From dd46e46a9fd3cc58bc39dc82b4f9a7dbc21101db Mon Sep 17 00:00:00 2001 From: ctuguinay Date: Tue, 28 Jan 2025 02:36:50 +0000 Subject: [PATCH 09/11] fix test message --- echopype/convert/utils/ek_duplicates.py | 6 +++--- echopype/tests/convert/test_convert_ek80.py | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/echopype/convert/utils/ek_duplicates.py b/echopype/convert/utils/ek_duplicates.py index a2c6aafa4..8a44988f6 100644 --- a/echopype/convert/utils/ek_duplicates.py +++ b/echopype/convert/utils/ek_duplicates.py @@ -37,8 +37,8 @@ def check_unique_ping_time_duplicates(ds_data: xr.Dataset, logger: logging.Logge for i in range(1, data_array.sizes["ping_time"]): if not ref_slice.equals(data_array.isel({"ping_time": i})): logger.warning( - f"Duplicate slices in variable '{var}' corresponding to " - f"ping_time {ping_time_val} differ in data. Data will be lost since we " - "will be dropping all duplicate ping times." + f"Duplicate slices in variable '{var}' corresponding to 'ping_time' " + f"{ping_time_val} differ in data. All duplicate 'ping_time' entries " + "will be removed, which will result in data loss." ) break diff --git a/echopype/tests/convert/test_convert_ek80.py b/echopype/tests/convert/test_convert_ek80.py index c53b84bdd..761f786f6 100644 --- a/echopype/tests/convert/test_convert_ek80.py +++ b/echopype/tests/convert/test_convert_ek80.py @@ -533,7 +533,7 @@ def test_duplicate_ping_times(caplog): ) # Check that no warning is logged since the data for all duplicate pings is unique - not_expected_warning = ("Data will be lost since we will be dropping all duplicate ping times.") + not_expected_warning = ("All duplicate ping_time entries' will be removed, resulting in potential data loss.") assert not any(not_expected_warning in record.message for record in caplog.records) # Turn off logger verbosity @@ -565,9 +565,9 @@ def test_check_unique_ping_time_duplicates(caplog): # Check if the expected warning is logged expected_warning = ( - "Duplicate slices in variable 'backscatter_r' corresponding to ping_time " - f"{str(ds_data['ping_time'].values[0])} differ in data. Data will be lost since " - "we will be dropping all duplicate ping times." + "Duplicate slices in variable 'backscatter_r' corresponding to 'ping_time' " + f"{str(ds_data['ping_time'].values[0])} differ in data. All duplicate " + "'ping_time' entries will be removed, which will result in data loss." ) assert any(expected_warning in record.message for record in caplog.records) From 340ef93bffb5efe9027d45105a72d1d682c39ae1 Mon Sep 17 00:00:00 2001 From: ctuguinay Date: Tue, 28 Jan 2025 03:07:57 +0000 Subject: [PATCH 10/11] test remove zarr pin --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index a89974058..baef477cd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,7 +9,7 @@ pytz scipy xarray pandas -zarr>=2,<3 +zarr fsspec s3fs requests From a466b26d17b1f08b9ef9ff9e072c64e98cae7131 Mon Sep 17 00:00:00 2001 From: ctuguinay Date: Tue, 28 Jan 2025 03:25:39 +0000 Subject: [PATCH 11/11] add back zarr pin --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index baef477cd..a89974058 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,7 +9,7 @@ pytz scipy xarray pandas -zarr +zarr>=2,<3 fsspec s3fs requests