Skip to content

Commit

Permalink
Drop Ping Time Duplicates (OSOceanAcoustics#1382)
Browse files Browse the repository at this point in the history
* init commit

* revert change to fix merge conflict

* test only one file

* use other file

* move test duplicate to test convert ek

* add extra line

* move test back to ek80 convert

* pin zarr and add check unique ping time duplicates and tests

* fix test message

* test remove zarr pin

* add back zarr pin
  • Loading branch information
ctuguinay authored and oftfrfbf committed Feb 5, 2025
1 parent 9f8a406 commit 15a1bcd
Show file tree
Hide file tree
Showing 3 changed files with 115 additions and 0 deletions.
12 changes: 12 additions & 0 deletions echopype/convert/set_groups_ek80.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from ..utils.coding import set_time_encodings
from ..utils.log import _init_logger
from .set_groups_base import SetGroupsBase
from .utils.ek_duplicates import check_unique_ping_time_duplicates

logger = _init_logger(__name__)

Expand Down Expand Up @@ -1145,6 +1146,17 @@ def set_beam(self) -> List[xr.Dataset]:

ds_data = self._attach_vars_to_ds_data(ds_data, ch, rs_size=ds_data.range_sample.size)

# Access the 'ping_time' coordinate as a NumPy array
ping_times = ds_data["ping_time"].values

# Check if ping time duplicates exist
if len(ping_times) > len(np.unique(ping_times)):
# Check for unique ping time duplicates and if they are not unique, raise warning.
check_unique_ping_time_duplicates(ds_data, logger)

# Drop duplicates
ds_data = ds_data.drop_duplicates(dim="ping_time")

if ch in self.sorted_channel["complex"]:
ds_complex.append(ds_data)
else:
Expand Down
44 changes: 44 additions & 0 deletions echopype/convert/utils/ek_duplicates.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import logging

import xarray as xr


def check_unique_ping_time_duplicates(ds_data: xr.Dataset, logger: logging.Logger) -> None:
"""
Raises a warning if the data stored in duplicate pings is not unique.
Parameters
----------
ds_data : xr.Dataset
Single freq beam dataset being processed in the `SetGroupsEK80.set_beams` class function.
logger : logging.Logger
Warning logger initialized in `SetGroupsEK80` file.
"""
# Group the dataset by the "ping_time" coordinate
groups = ds_data.groupby("ping_time")

# Loop through each ping_time group
for ping_time_val, group in groups:
# Extract all data variable names to check
data_vars = list(group.data_vars)

# Use the first duplicate ping time index as a reference
ref_duplicate_ping_time_index = 0

# Iterate over each data variable in the group
for var in data_vars:
# Extract data array corresponding to the iterated variable
data_array = group[var]

# Use the slice corresponding to the reference index as the reference slice
ref_slice = data_array.isel({"ping_time": ref_duplicate_ping_time_index})

# Iterate over the remaining entries
for i in range(1, data_array.sizes["ping_time"]):
if not ref_slice.equals(data_array.isel({"ping_time": i})):
logger.warning(
f"Duplicate slices in variable '{var}' corresponding to 'ping_time' "
f"{ping_time_val} differ in data. All duplicate 'ping_time' entries "
"will be removed, which will result in data loss."
)
break
59 changes: 59 additions & 0 deletions echopype/tests/convert/test_convert_ek80.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,14 @@
import numpy as np
import pandas as pd
from scipy.io import loadmat
import xarray as xr

from echopype import open_raw, open_converted
from echopype.testing import TEST_DATA_FOLDER
from echopype.convert.parse_ek80 import ParseEK80
from echopype.convert.set_groups_ek80 import WIDE_BAND_TRANS, PULSE_COMPRESS, FILTER_IMAG, FILTER_REAL, DECIMATION
from echopype.utils import log
from echopype.convert.utils.ek_duplicates import check_unique_ping_time_duplicates


@pytest.fixture
Expand Down Expand Up @@ -512,6 +515,62 @@ def test_parse_missing_sound_velocity_profile():
shutil.rmtree(save_path)


@pytest.mark.unit
def test_duplicate_ping_times(caplog):
"""
Tests that RAW file with duplicate ping times can be parsed and that the correct warning has been raised.
"""
# Turn on logger verbosity
log.verbose(override=False)

# Open RAW
ed = open_raw("echopype/test_data/ek80_duplicate_ping_times/Hake-D20210913-T130612.raw", sonar_model="EK80")

# Check that there are no ping time duplicates in Beam group
assert ed["Sonar/Beam_group1"].equals(
ed["Sonar/Beam_group1"].drop_duplicates(dim="ping_time")
)

# Check that no warning is logged since the data for all duplicate pings is unique
not_expected_warning = ("All duplicate ping_time entries' will be removed, resulting in potential data loss.")
assert not any(not_expected_warning in record.message for record in caplog.records)

# Turn off logger verbosity
log.verbose(override=True)


@pytest.mark.unit
def test_check_unique_ping_time_duplicates(caplog):
"""
Checks that `check_unique_ping_time_duplicates` raises a warning when the data for duplicate ping times is not unique.
"""
# Initialize logger
logger = log._init_logger(__name__)

# Turn on logger verbosity
log.verbose(override=False)

# Open duplicate ping time beam dataset
ds_data = xr.open_zarr("echopype/test_data/ek80_duplicate_ping_times/duplicate_beam_ds.zarr")

# Modify a single entry to ensure that there exists duplicate ping times that do not share the same backscatter data
ds_data["backscatter_r"][0,0,0] = 0

# Check for ping time duplicates
check_unique_ping_time_duplicates(ds_data, logger)

# Turn off logger verbosity
log.verbose(override=True)

# Check if the expected warning is logged
expected_warning = (
"Duplicate slices in variable 'backscatter_r' corresponding to 'ping_time' "
f"{str(ds_data['ping_time'].values[0])} differ in data. All duplicate "
"'ping_time' entries will be removed, which will result in data loss."
)
assert any(expected_warning in record.message for record in caplog.records)


@pytest.mark.unit
def test_parse_ek80_with_invalid_env_datagrams():
"""
Expand Down

0 comments on commit 15a1bcd

Please sign in to comment.