From 2dc9f0a54deebdca03e939a0a5013d5f5ba4a19e Mon Sep 17 00:00:00 2001 From: Kevin Santana Date: Tue, 12 Mar 2024 16:14:21 -0400 Subject: [PATCH 01/13] initial commit --- clouddrift/adapters/gdp1h.py | 13 ++--- clouddrift/ragged.py | 104 +++++++++++++++++++++++++---------- tests/example_tests.py | 8 +++ 3 files changed, 88 insertions(+), 37 deletions(-) create mode 100644 tests/example_tests.py diff --git a/clouddrift/adapters/gdp1h.py b/clouddrift/adapters/gdp1h.py index 23a3936b..4555124b 100644 --- a/clouddrift/adapters/gdp1h.py +++ b/clouddrift/adapters/gdp1h.py @@ -144,10 +144,8 @@ def preprocess(index: int, **kwargs) -> xr.Dataset: # parse the date with custom function ds["deploy_date"].data = gdp.decode_date(np.array([ds.deploy_date.data[0]])) ds["end_date"].data = gdp.decode_date(np.array([ds.end_date.data[0]])) - ds["drogue_lost_date"].data = gdp.decode_date( - np.array([ds.drogue_lost_date.data[0]]) - ) - ds["time"].data = gdp.decode_date(np.array([ds.time.data[0]])) + ds["drogue_lost_date"].data = gdp.decode_date(np.array([ds.drogue_lost_date.data[0]])).astype("datetime64[s]") + ds["time"].data = gdp.decode_date(np.array([ds.time.data[0]])).astype("datetime64[s]") # convert fill values to nan for var in [ @@ -199,7 +197,8 @@ def preprocess(index: int, **kwargs) -> xr.Dataset: for var in target_dtype.keys(): if var in ds.keys(): - ds[var].data = ds[var].data.astype(target_dtype[var]) + dtype = target_dtype[var] + ds[var].data = ds[var].data.astype(dtype) else: warnings.warn(f"Variable {var} not found in upstream data; skipping.") @@ -611,10 +610,10 @@ def to_raggedarray( # set dynamic global attributes if ra.attrs_global: ra.attrs_global["time_coverage_start"] = ( - f"{datetime(1970,1,1) + timedelta(seconds=int(np.min(ra.coords['time']))):%Y-%m-%d:%H:%M:%SZ}" + f"{datetime(1970,1,1) + timedelta(seconds=int(np.min(ra.coords['time'].astype("datetime64[s]").astype("int64")))):%Y-%m-%d:%H:%M:%SZ}" ) ra.attrs_global["time_coverage_end"] = ( - f"{datetime(1970,1,1) + timedelta(seconds=int(np.max(ra.coords['time']))):%Y-%m-%d:%H:%M:%SZ}" + f"{datetime(1970,1,1) + timedelta(seconds=int(np.max(ra.coords['time'].astype("datetime64[s]").astype("int64")))):%Y-%m-%d:%H:%M:%SZ}" ) return ra diff --git a/clouddrift/ragged.py b/clouddrift/ragged.py index 4ef49b3f..4967e67c 100644 --- a/clouddrift/ragged.py +++ b/clouddrift/ragged.py @@ -77,22 +77,29 @@ def apply_ragged( multiple particles, the coordinates of which are found in the ragged arrays x, y, and t that share row sizes 2, 3, and 4: + >>> from clouddrift.kinematics import velocity_from_position >>> rowsize = [2, 3, 4] >>> x = np.array([1, 2, 10, 12, 14, 30, 33, 36, 39]) >>> y = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8]) >>> t = np.array([1, 2, 1, 2, 3, 1, 2, 3, 4]) >>> u1, v1 = apply_ragged(velocity_from_position, [x, y, t], rowsize, coord_system="cartesian") - array([1., 1., 2., 2., 2., 3., 3., 3., 3.]), - array([1., 1., 1., 1., 1., 1., 1., 1., 1.])) + >>> u1 + array([1., 1., 2., 2., 2., 3., 3., 3., 3.]) + >>> v1 + array([1., 1., 1., 1., 1., 1., 1., 1., 1.]) To apply ``func`` to only a subset of rows, use the ``rows`` argument: >>> u1, v1 = apply_ragged(velocity_from_position, [x, y, t], rowsize, rows=0, coord_system="cartesian") - array([1., 1.]), - array([1., 1.])) + >>> u1 + array([1., 1.]) + >>> v1 + array([1., 1.]) >>> u1, v1 = apply_ragged(velocity_from_position, [x, y, t], rowsize, rows=[0, 1], coord_system="cartesian") - array([1., 1., 2., 2., 2.]), - array([1., 1., 1., 1., 1.])) + >>> u1 + array([1., 1., 2., 2., 2.]) + >>> v1 + array([1., 1., 1., 1., 1.]) Raises ------ @@ -286,7 +293,7 @@ def prune( Examples -------- >>> prune(np.array([1, 2, 3, 0, -1, -2]), np.array([3, 1, 2]),2) - (array([1, 2, 3, -1, -2]), array([3, 2])) + (array([ 1, 2, 3, -1, -2]), array([3, 2])) Raises ------ @@ -357,9 +364,9 @@ def ragged_to_regular( You can specify an alternative fill value: >>> ragged_to_regular(np.array([1, 2, 3, 4, 5]), np.array([2, 1, 2]), fill_value=999) - array([[ 1., 2.], - [ 3., -999.], - [ 4., 5.]]) + array([[ 1, 2], + [ 3, 999], + [ 4, 5]]) See Also -------- @@ -401,7 +408,7 @@ def regular_to_ragged( Alternatively, a different fill value can be specified: >>> regular_to_ragged(np.array([[1, 2], [3, -999], [4, 5]]), fill_value=-999) - (array([1., 2., 3., 4., 5.]), array([2, 1, 2])) + (array([1, 2, 3, 4, 5]), array([2, 1, 2])) See Also -------- @@ -435,7 +442,7 @@ def rowsize_to_index(rowsize: list | np.ndarray | xr.DataArray) -> np.ndarray: To obtain the indices within a ragged array of three consecutive rows of sizes 100, 202, and 53: >>> rowsize_to_index([100, 202, 53]) - array([0, 100, 302, 355]) + array([ 0, 100, 302, 355]) """ return np.cumsum(np.insert(np.array(rowsize), 0, 0)) @@ -502,10 +509,10 @@ def segment( If the input array contains time objects, the tolerance must be a time interval: >>> x = np.array([np.datetime64("2023-01-01"), np.datetime64("2023-01-02"), - np.datetime64("2023-01-03"), np.datetime64("2023-02-01"), - np.datetime64("2023-02-02")]) + ... np.datetime64("2023-01-03"), np.datetime64("2023-02-01"), + ... np.datetime64("2023-02-02")]) >>> segment(x, np.timedelta64(1, "D")) - np.array([3, 2]) + array([3, 2]) """ # for compatibility with datetime list or np.timedelta64 arrays @@ -590,30 +597,49 @@ def subset( a single drifter trajectory and the `row_dim_name` is "traj" and the `obs_dim_name` is "obs". Retrieve a region, like the Gulf of Mexico, using ranges of latitude and longitude: + >>> from clouddrift.adapters import gdp1h + >>> ds = gdp1h.to_raggedarray(n_random_id=5).to_xarray() + ... >>> subset(ds, {"lat": (21, 31), "lon": (-98, -78)}, row_dim_name="traj") + + ... The parameter `full_rows` can be used to retrieve trajectories passing through a region, for example all trajectories passing through the Gulf of Mexico: >>> subset(ds, {"lat": (21, 31), "lon": (-98, -78)}, full_rows=True, row_dim_name="traj") + + ... Retrieve drogued trajectory segments: >>> subset(ds, {"drogue_status": True}, row_dim_name="traj") + + Dimensions: (traj: ..., obs: ...) + Coordinates: + id (traj) int64 ... + time (obs) datetime64[ns] ... + ... Retrieve trajectory segments with temperature higher than 25°C (303.15K): >>> subset(ds, {"sst": (303.15, np.inf)}, row_dim_name="traj") + + ... You can use the same approach to return only the trajectories that are shorter than some number of observations (similar to :func:`prune` but for the entire dataset): >>> subset(ds, {"rowsize": (0, 1000)}, row_dim_name="traj") + + ... Retrieve specific drifters using their IDs: >>> subset(ds, {"id": [2578, 2582, 2583]}, row_dim_name="traj") + + ... Sometimes, you may want to retrieve specific rows of a ragged array. You can do that by filtering along the trajectory dimension directly, since @@ -621,6 +647,8 @@ def subset( >>> rows = [5, 6, 7] >>> subset(ds, {"traj": rows}, row_dim_name="traj") + + ... Retrieve a specific time period: @@ -640,19 +668,37 @@ def subset( >>> func = (lambda arr: ((arr - arr[0]) % 2) == 0) >>> subset(ds, {"time": func}, row_dim_name="traj") + + Dimensions: (traj: ..., obs: ...) + Coordinates: + id (traj) int64 ... + time (obs) float64 ... + ... The filtering function can accept several input variables passed as a tuple. For example, retrieve drifters released in the Mediterranean Sea, but exclude those released in the Bay of Biscay and the Black Sea: - >>> def mediterranean_mask(lon: xr.DataArray, lat: xr.DataArray) -> xr.DataArray: - >>> # Mediterranean Sea bounding box - >>> in_med = np.logical_and(-6.0327 <= lon, np.logical_and(lon <= 36.2173, - >>> np.logical_and(30.2639 <= lat, lat <= 45.7833))) - >>> # Bay of Biscay - >>> in_biscay = np.logical_and(lon <= -0.1462, lat >= 43.2744) - >>> # Black Sea - >>> in_blacksea = np.logical_and(lon >= 27.4437, lat >= 40.9088) - >>> return np.logical_and(in_med, np.logical_not(np.logical_or(in_biscay, in_blacksea))) + >>> in_med = lambda lat, lon: np.all( + ... -6.0327 <= lon <= 36.2173, + ... 30.2639 <= lat <= 45.7833 + ... ) + >>> in_biscay = lambda lon, lat: np.all( + ... lon <= -0.1462, + ... lat >= 43.2744, + ... ) + >>> in_blacksea = lambda lon, lat: np.all( + ... lon >= 27.4437, + ... lat >= 40.9088 + ... ) + >>> mediterranean_mask = lambda lon, lat: np.logical_and( + ... in_med, + ... np.logical_not( + ... np.logical_and( + ... in_biscay, + ... in_blacksea + ... ) + ... ) + ... ) >>> subset(ds, {("start_lon", "start_lat"): mediterranean_mask}, row_dim_name="traj") Raises @@ -837,26 +883,24 @@ def _mask_var( Dimensions without coordinates: dim_0 >>> _mask_var(x, [0, 2, 4]) - - array([ True, False, True, False, True]) - Dimensions without coordinates: dim_0 + array([ True, False, True, False, True]) >>> _mask_var(x, 4) - array([False, False, False, True, False]) + array([False, False, False, False, True]) Dimensions without coordinates: dim_0 >>> rowsize = xr.DataArray(data=[2, 3]) >>> _mask_var(x, lambda arr: arr==arr[0]+1, rowsize, "dim_0") - array([False, True, False, True, False]) + array([False, True, False, True, False]) Dimensions without coordinates: dim_0 >>> y = xr.DataArray(data=np.arange(0, 5)+2) >>> rowsize = xr.DataArray(data=[2, 3]) >>> _mask_var([x, y], lambda var1, var2: ((var1 * var2) % 2) == 0, rowsize, "dim_0") - array([True, False, True, False, True]) + array([ True, False, True, False, True]) Dimensions without coordinates: dim_0 Returns diff --git a/tests/example_tests.py b/tests/example_tests.py new file mode 100644 index 00000000..4d1c9b6d --- /dev/null +++ b/tests/example_tests.py @@ -0,0 +1,8 @@ +import doctest + +import clouddrift.ragged as ragged + + +def load_tests(loader, tests, ignore): + tests.addTests(doctest.DocTestSuite(ragged, {"optionflag": doctest.ELLIPSIS | doctest.FAIL_FAST, "globs": ragged.__dict__})) + return tests \ No newline at end of file From f0d9d3085ab76355800131c265fecbf1f6752087 Mon Sep 17 00:00:00 2001 From: Kevin Santana Date: Sun, 17 Mar 2024 00:57:36 -0400 Subject: [PATCH 02/13] formatting --- clouddrift/adapters/gdp1h.py | 20 ++++++++++++-------- clouddrift/ragged.py | 6 +++--- tests/example_tests.py | 12 ++++++++++-- 3 files changed, 25 insertions(+), 13 deletions(-) diff --git a/clouddrift/adapters/gdp1h.py b/clouddrift/adapters/gdp1h.py index 4555124b..06c3d3de 100644 --- a/clouddrift/adapters/gdp1h.py +++ b/clouddrift/adapters/gdp1h.py @@ -144,8 +144,12 @@ def preprocess(index: int, **kwargs) -> xr.Dataset: # parse the date with custom function ds["deploy_date"].data = gdp.decode_date(np.array([ds.deploy_date.data[0]])) ds["end_date"].data = gdp.decode_date(np.array([ds.end_date.data[0]])) - ds["drogue_lost_date"].data = gdp.decode_date(np.array([ds.drogue_lost_date.data[0]])).astype("datetime64[s]") - ds["time"].data = gdp.decode_date(np.array([ds.time.data[0]])).astype("datetime64[s]") + ds["drogue_lost_date"].data = gdp.decode_date( + np.array([ds.drogue_lost_date.data[0]]) + ).astype("datetime64[s]") + ds["time"].data = gdp.decode_date(np.array([ds.time.data[0]])).astype( + "datetime64[s]" + ) # convert fill values to nan for var in [ @@ -609,11 +613,11 @@ def to_raggedarray( # set dynamic global attributes if ra.attrs_global: - ra.attrs_global["time_coverage_start"] = ( - f"{datetime(1970,1,1) + timedelta(seconds=int(np.min(ra.coords['time'].astype("datetime64[s]").astype("int64")))):%Y-%m-%d:%H:%M:%SZ}" - ) - ra.attrs_global["time_coverage_end"] = ( - f"{datetime(1970,1,1) + timedelta(seconds=int(np.max(ra.coords['time'].astype("datetime64[s]").astype("int64")))):%Y-%m-%d:%H:%M:%SZ}" - ) + ra.attrs_global[ + "time_coverage_start" + ] = f"{datetime(1970,1,1) + timedelta(seconds=int(np.min(ra.coords['time'].astype("datetime64[s]").astype("int64")))):%Y-%m-%d:%H:%M:%SZ}" + ra.attrs_global[ + "time_coverage_end" + ] = f"{datetime(1970,1,1) + timedelta(seconds=int(np.max(ra.coords['time'].astype("datetime64[s]").astype("int64")))):%Y-%m-%d:%H:%M:%SZ}" return ra diff --git a/clouddrift/ragged.py b/clouddrift/ragged.py index 4967e67c..fe4df61e 100644 --- a/clouddrift/ragged.py +++ b/clouddrift/ragged.py @@ -683,7 +683,7 @@ def subset( ... 30.2639 <= lat <= 45.7833 ... ) >>> in_biscay = lambda lon, lat: np.all( - ... lon <= -0.1462, + ... lon <= -0.1462, ... lat >= 43.2744, ... ) >>> in_blacksea = lambda lon, lat: np.all( @@ -691,10 +691,10 @@ def subset( ... lat >= 40.9088 ... ) >>> mediterranean_mask = lambda lon, lat: np.logical_and( - ... in_med, + ... in_med, ... np.logical_not( ... np.logical_and( - ... in_biscay, + ... in_biscay, ... in_blacksea ... ) ... ) diff --git a/tests/example_tests.py b/tests/example_tests.py index 4d1c9b6d..e7254973 100644 --- a/tests/example_tests.py +++ b/tests/example_tests.py @@ -4,5 +4,13 @@ def load_tests(loader, tests, ignore): - tests.addTests(doctest.DocTestSuite(ragged, {"optionflag": doctest.ELLIPSIS | doctest.FAIL_FAST, "globs": ragged.__dict__})) - return tests \ No newline at end of file + tests.addTests( + doctest.DocTestSuite( + ragged, + { + "optionflag": doctest.ELLIPSIS | doctest.FAIL_FAST, + "globs": ragged.__dict__, + }, + ) + ) + return tests From 930747bba308b59295139056ccad6e0373f8813a Mon Sep 17 00:00:00 2001 From: Kevin Santana Date: Sun, 17 Mar 2024 00:58:26 -0400 Subject: [PATCH 03/13] rename --- tests/{example_tests.py => docexamples_tests.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/{example_tests.py => docexamples_tests.py} (100%) diff --git a/tests/example_tests.py b/tests/docexamples_tests.py similarity index 100% rename from tests/example_tests.py rename to tests/docexamples_tests.py From 470510dda33736c3a80d5f9c2ff8d4ab8a5f1ed3 Mon Sep 17 00:00:00 2001 From: Kevin Santana Date: Tue, 9 Apr 2024 00:39:42 -0400 Subject: [PATCH 04/13] Update examples --- clouddrift/ragged.py | 110 +++++++++++++++++++------------------ tests/docexamples_tests.py | 5 +- 2 files changed, 59 insertions(+), 56 deletions(-) diff --git a/clouddrift/ragged.py b/clouddrift/ragged.py index fe4df61e..887f488f 100644 --- a/clouddrift/ragged.py +++ b/clouddrift/ragged.py @@ -292,6 +292,8 @@ def prune( Examples -------- + >>> from clouddrift.ragged import prune + >>> import numpy as np >>> prune(np.array([1, 2, 3, 0, -1, -2]), np.array([3, 1, 2]),2) (array([ 1, 2, 3, -1, -2]), array([3, 2])) @@ -475,6 +477,8 @@ def segment( -------- The simplest use of ``segment`` is to provide a tolerance value that is used to divide an array into segments: + >>> from clouddrift.ragged import segment, subset + >>> import numpy as np >>> x = [0, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4] >>> segment(x, 0.5) @@ -597,24 +601,27 @@ def subset( a single drifter trajectory and the `row_dim_name` is "traj" and the `obs_dim_name` is "obs". Retrieve a region, like the Gulf of Mexico, using ranges of latitude and longitude: - >>> from clouddrift.adapters import gdp1h - >>> ds = gdp1h.to_raggedarray(n_random_id=5).to_xarray() + >>> from clouddrift.datasets import gdp1h + >>> from clouddrift.ragged import subset + >>> import numpy as np + + >>> ds = gdp1h() ... >>> subset(ds, {"lat": (21, 31), "lon": (-98, -78)}, row_dim_name="traj") - + ... ... The parameter `full_rows` can be used to retrieve trajectories passing through a region, for example all trajectories passing through the Gulf of Mexico: >>> subset(ds, {"lat": (21, 31), "lon": (-98, -78)}, full_rows=True, row_dim_name="traj") - + ... ... Retrieve drogued trajectory segments: >>> subset(ds, {"drogue_status": True}, row_dim_name="traj") - + ... Dimensions: (traj: ..., obs: ...) Coordinates: id (traj) int64 ... @@ -624,7 +631,7 @@ def subset( Retrieve trajectory segments with temperature higher than 25°C (303.15K): >>> subset(ds, {"sst": (303.15, np.inf)}, row_dim_name="traj") - + ... ... You can use the same approach to return only the trajectories that are @@ -632,13 +639,13 @@ def subset( the entire dataset): >>> subset(ds, {"rowsize": (0, 1000)}, row_dim_name="traj") - + ... ... Retrieve specific drifters using their IDs: >>> subset(ds, {"id": [2578, 2582, 2583]}, row_dim_name="traj") - + ... ... Sometimes, you may want to retrieve specific rows of a ragged array. @@ -647,12 +654,14 @@ def subset( >>> rows = [5, 6, 7] >>> subset(ds, {"traj": rows}, row_dim_name="traj") - + ... ... Retrieve a specific time period: >>> subset(ds, {"time": (np.datetime64("2000-01-01"), np.datetime64("2020-01-31"))}, row_dim_name="traj") + ... + ... Note that to subset time variable, the range has to be defined as a function type of the variable. By default, ``xarray`` uses ``np.datetime64`` to @@ -662,44 +671,36 @@ def subset( Those criteria can also be combined: >>> subset(ds, {"lat": (21, 31), "lon": (-98, -78), "drogue_status": True, "sst": (303.15, np.inf), "time": (np.datetime64("2000-01-01"), np.datetime64("2020-01-31"))}, row_dim_name="traj") + ... + ... You can also use a function to filter the data. For example, retrieve every other observation of each trajectory: >>> func = (lambda arr: ((arr - arr[0]) % 2) == 0) - >>> subset(ds, {"time": func}, row_dim_name="traj") - - Dimensions: (traj: ..., obs: ...) - Coordinates: - id (traj) int64 ... - time (obs) float64 ... + >>> subset(ds, {"id": func}, row_dim_name="traj") + ... ... The filtering function can accept several input variables passed as a tuple. For example, retrieve drifters released in the Mediterranean Sea, but exclude those released in the Bay of Biscay and the Black Sea: - >>> in_med = lambda lat, lon: np.all( - ... -6.0327 <= lon <= 36.2173, - ... 30.2639 <= lat <= 45.7833 - ... ) - >>> in_biscay = lambda lon, lat: np.all( - ... lon <= -0.1462, - ... lat >= 43.2744, - ... ) - >>> in_blacksea = lambda lon, lat: np.all( - ... lon >= 27.4437, - ... lat >= 40.9088 - ... ) - >>> mediterranean_mask = lambda lon, lat: np.logical_and( - ... in_med, - ... np.logical_not( - ... np.logical_and( - ... in_biscay, - ... in_blacksea - ... ) - ... ) - ... ) + >>> def mediterranean_mask(lon: xr.DataArray, lat: xr.DataArray) -> xr.DataArray: + ... # Mediterranean Sea bounding box + ... in_med = np.logical_and(-6.0327 <= lon, np.logical_and(lon <= 36.2173, + ... np.logical_and(30.2639 <= lat, lat <= 45.7833))) + ... # Bay of Biscay + ... in_biscay = np.logical_and(lon <= -0.1462, lat >= 43.2744) + ... # Black Sea + ... in_blacksea = np.logical_and(lon >= 27.4437, lat >= 40.9088) + ... return np.logical_and(in_med, np.logical_not(np.logical_or(in_biscay, in_blacksea))) >>> subset(ds, {("start_lon", "start_lat"): mediterranean_mask}, row_dim_name="traj") + Size: ... + Dimensions: (traj: ..., obs: ...) + Coordinates: + id (traj) int64 ... + time (obs) datetime64[ns] ... + ... Raises ------ @@ -819,25 +820,27 @@ def unpack( -------- Unpacking longitude arrays from a ragged Xarray Dataset: + >>> from clouddrift.ragged import unpack + >>> from clouddrift.datasets import gdp1h - .. code-block:: python + >>> ds = gdp1h() - lon = unpack(ds.lon, ds["rowsize"]) # return a list[xr.DataArray] (slower) - lon = unpack(ds.lon.values, ds["rowsize"]) # return a list[np.ndarray] (faster) - first_lon = unpack(ds.lon.values, ds["rowsize"], rows=0) # return only the first row - first_two_lons = unpack(ds.lon.values, ds["rowsize"], rows=[0, 1]) # return first two rows + >>> lon = unpack(ds.lon, ds["rowsize"]) # return a list[xr.DataArray] (slower) + >>> lon = unpack(ds.lon.values, ds["rowsize"]) # return a list[np.ndarray] (faster) + >>> first_lon = unpack(ds.lon.values, ds["rowsize"], rows=0) # return only the first row + >>> first_two_lons = unpack(ds.lon.values, ds["rowsize"], rows=[0, 1]) # return first two rows Looping over trajectories in a ragged Xarray Dataset to compute velocities for each: - .. code-block:: python + >>> from clouddrift.kinematics import velocity_from_position - for lon, lat, time in list(zip( - unpack(ds.lon.values, ds["rowsize"]), - unpack(ds.lat.values, ds["rowsize"]), - unpack(ds.time.values, ds["rowsize"]) - )): - u, v = velocity_from_position(lon, lat, time) + >>> for lon, lat, time in list(zip( + ... unpack(ds.lon.values, ds["rowsize"]), + ... unpack(ds.lat.values, ds["rowsize"]), + ... unpack(ds.time.values, ds["rowsize"]) + ... )): + ... u, v = velocity_from_position(lon, lat, time) """ indices = rowsize_to_index(rowsize) @@ -876,9 +879,12 @@ def _mask_var( Examples -------- + >>> import xarray as xr + >>> from clouddrift.ragged import _mask_var + >>> x = xr.DataArray(data=np.arange(0, 5)) >>> _mask_var(x, (2, 4)) - + ... array([False, False, True, True, True]) Dimensions without coordinates: dim_0 @@ -886,20 +892,20 @@ def _mask_var( array([ True, False, True, False, True]) >>> _mask_var(x, 4) - + ... array([False, False, False, False, True]) Dimensions without coordinates: dim_0 >>> rowsize = xr.DataArray(data=[2, 3]) >>> _mask_var(x, lambda arr: arr==arr[0]+1, rowsize, "dim_0") - + ... array([False, True, False, True, False]) Dimensions without coordinates: dim_0 >>> y = xr.DataArray(data=np.arange(0, 5)+2) >>> rowsize = xr.DataArray(data=[2, 3]) >>> _mask_var([x, y], lambda var1, var2: ((var1 * var2) % 2) == 0, rowsize, "dim_0") - + ... array([ True, False, True, False, True]) Dimensions without coordinates: dim_0 diff --git a/tests/docexamples_tests.py b/tests/docexamples_tests.py index e7254973..096c2ec3 100644 --- a/tests/docexamples_tests.py +++ b/tests/docexamples_tests.py @@ -7,10 +7,7 @@ def load_tests(loader, tests, ignore): tests.addTests( doctest.DocTestSuite( ragged, - { - "optionflag": doctest.ELLIPSIS | doctest.FAIL_FAST, - "globs": ragged.__dict__, - }, + optionflags=doctest.ELLIPSIS | doctest.FAIL_FAST, ) ) return tests From 400a4d6369ce12a675429de4fa5427b683520703 Mon Sep 17 00:00:00 2001 From: Kevin Santana Date: Tue, 9 Apr 2024 01:03:59 -0400 Subject: [PATCH 05/13] Run doc examples separate from unittest suite --- tests/{docexamples_tests.py => docexamples.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/{docexamples_tests.py => docexamples.py} (100%) diff --git a/tests/docexamples_tests.py b/tests/docexamples.py similarity index 100% rename from tests/docexamples_tests.py rename to tests/docexamples.py From c6ecc02abff35462c8bb6cddfa356466d0675626 Mon Sep 17 00:00:00 2001 From: Kevin Santana Date: Tue, 9 Apr 2024 01:13:28 -0400 Subject: [PATCH 06/13] revert change --- clouddrift/adapters/gdp1h.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clouddrift/adapters/gdp1h.py b/clouddrift/adapters/gdp1h.py index 06c3d3de..6a175b82 100644 --- a/clouddrift/adapters/gdp1h.py +++ b/clouddrift/adapters/gdp1h.py @@ -615,9 +615,9 @@ def to_raggedarray( if ra.attrs_global: ra.attrs_global[ "time_coverage_start" - ] = f"{datetime(1970,1,1) + timedelta(seconds=int(np.min(ra.coords['time'].astype("datetime64[s]").astype("int64")))):%Y-%m-%d:%H:%M:%SZ}" + ] = f"{datetime(1970,1,1) + timedelta(seconds=int(np.min(ra.coords['time']))):%Y-%m-%d:%H:%M:%SZ}" ra.attrs_global[ "time_coverage_end" - ] = f"{datetime(1970,1,1) + timedelta(seconds=int(np.max(ra.coords['time'].astype("datetime64[s]").astype("int64")))):%Y-%m-%d:%H:%M:%SZ}" + ] = f"{datetime(1970,1,1) + timedelta(seconds=int(np.max(ra.coords['time']))):%Y-%m-%d:%H:%M:%SZ}" return ra From 42473113cf181e700717ae9033c795e519f5893f Mon Sep 17 00:00:00 2001 From: Kevin Santana Date: Tue, 9 Apr 2024 01:16:45 -0400 Subject: [PATCH 07/13] revert --- clouddrift/adapters/gdp1h.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/clouddrift/adapters/gdp1h.py b/clouddrift/adapters/gdp1h.py index 6a175b82..a51f1422 100644 --- a/clouddrift/adapters/gdp1h.py +++ b/clouddrift/adapters/gdp1h.py @@ -146,10 +146,8 @@ def preprocess(index: int, **kwargs) -> xr.Dataset: ds["end_date"].data = gdp.decode_date(np.array([ds.end_date.data[0]])) ds["drogue_lost_date"].data = gdp.decode_date( np.array([ds.drogue_lost_date.data[0]]) - ).astype("datetime64[s]") - ds["time"].data = gdp.decode_date(np.array([ds.time.data[0]])).astype( - "datetime64[s]" ) + ds["time"].data = gdp.decode_date(np.array([ds.time.data[0]])) # convert fill values to nan for var in [ @@ -201,8 +199,7 @@ def preprocess(index: int, **kwargs) -> xr.Dataset: for var in target_dtype.keys(): if var in ds.keys(): - dtype = target_dtype[var] - ds[var].data = ds[var].data.astype(dtype) + ds[var].data = ds[var].data.astype(target_dtype[var]) else: warnings.warn(f"Variable {var} not found in upstream data; skipping.") From 98a83e08e5c8f1c006451a347dbc07ed0d009c93 Mon Sep 17 00:00:00 2001 From: Kevin Santana Date: Tue, 9 Apr 2024 01:27:50 -0400 Subject: [PATCH 08/13] separate unit tests and doc tests as jobs to run parallel --- .github/workflows/ci.yml | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ede3bd6d..70412259 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -13,7 +13,7 @@ on: # A workflow run is made up of one or more jobs that can run sequentially or in parallel jobs: - test: + unittest: runs-on: ${{ matrix.os }} strategy: fail-fast: false @@ -43,3 +43,19 @@ jobs: uses: codecov/codecov-action@v3 with: file: ./coverage.xml + doctest: + strategy: + fail-fast: false + matrix: + os: ["ubuntu-latest"] + python-version: ["3.9"] + steps: + - uses: actions/checkout@v4 + - uses: mamba-org/setup-micromamba@v1 + with: + environment-file: environment.yml + environment-name: clouddrift + - name: Run doc tests + shell: bash -l {0} + run: | + python -m unittest tests/docexamples.py From e3d8284ba442ee6d3a6b0e9caae2d94901a89125 Mon Sep 17 00:00:00 2001 From: Kevin Santana Date: Tue, 9 Apr 2024 01:29:51 -0400 Subject: [PATCH 09/13] update with runs-on --- .github/workflows/ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 70412259..37e3310c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -44,6 +44,7 @@ jobs: with: file: ./coverage.xml doctest: + runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: From cf9c1008e1db43b5506c94d517ad9eca5e426ef8 Mon Sep 17 00:00:00 2001 From: Kevin Santana Date: Tue, 9 Apr 2024 02:05:25 -0400 Subject: [PATCH 10/13] set swap --- .github/workflows/ci.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 37e3310c..693ed4bd 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -51,6 +51,10 @@ jobs: os: ["ubuntu-latest"] python-version: ["3.9"] steps: + - name: Set Swap Space + uses: pierotofy/set-swap-space@master + with: + swap-size-gb: 10 - uses: actions/checkout@v4 - uses: mamba-org/setup-micromamba@v1 with: From d06bf4780faece7688a1e4eed3b51873eadb246c Mon Sep 17 00:00:00 2001 From: Kevin Santana Date: Tue, 9 Apr 2024 22:41:33 -0400 Subject: [PATCH 11/13] use 6h over 1h as the latter was causing OOM issue with the github runners --- clouddrift/ragged.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/clouddrift/ragged.py b/clouddrift/ragged.py index 887f488f..c06b597f 100644 --- a/clouddrift/ragged.py +++ b/clouddrift/ragged.py @@ -601,11 +601,11 @@ def subset( a single drifter trajectory and the `row_dim_name` is "traj" and the `obs_dim_name` is "obs". Retrieve a region, like the Gulf of Mexico, using ranges of latitude and longitude: - >>> from clouddrift.datasets import gdp1h + >>> from clouddrift.datasets import gdp6h >>> from clouddrift.ragged import subset >>> import numpy as np - >>> ds = gdp1h() + >>> ds = gdp6h() ... >>> subset(ds, {"lat": (21, 31), "lon": (-98, -78)}, row_dim_name="traj") @@ -630,7 +630,7 @@ def subset( Retrieve trajectory segments with temperature higher than 25°C (303.15K): - >>> subset(ds, {"sst": (303.15, np.inf)}, row_dim_name="traj") + >>> subset(ds, {"temp": (303.15, np.inf)}, row_dim_name="traj") ... ... @@ -670,7 +670,7 @@ def subset( Those criteria can also be combined: - >>> subset(ds, {"lat": (21, 31), "lon": (-98, -78), "drogue_status": True, "sst": (303.15, np.inf), "time": (np.datetime64("2000-01-01"), np.datetime64("2020-01-31"))}, row_dim_name="traj") + >>> subset(ds, {"lat": (21, 31), "lon": (-98, -78), "drogue_status": True, "temp": (303.15, np.inf), "time": (np.datetime64("2000-01-01"), np.datetime64("2020-01-31"))}, row_dim_name="traj") ... ... @@ -821,9 +821,9 @@ def unpack( Unpacking longitude arrays from a ragged Xarray Dataset: >>> from clouddrift.ragged import unpack - >>> from clouddrift.datasets import gdp1h + >>> from clouddrift.datasets import gdp6h - >>> ds = gdp1h() + >>> ds = gdp6h() >>> lon = unpack(ds.lon, ds["rowsize"]) # return a list[xr.DataArray] (slower) >>> lon = unpack(ds.lon.values, ds["rowsize"]) # return a list[np.ndarray] (faster) From 0d1d0e703996ff99df0e9eaa0d8229ad67f7ed00 Mon Sep 17 00:00:00 2001 From: Kevin Santana Date: Tue, 9 Apr 2024 22:45:50 -0400 Subject: [PATCH 12/13] dont use matrix for doc tests and remove swap action --- .github/workflows/ci.yml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 693ed4bd..1586d1dd 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -44,17 +44,13 @@ jobs: with: file: ./coverage.xml doctest: - runs-on: ${{ matrix.os }} + runs-on: ubuntu-latest strategy: fail-fast: false matrix: os: ["ubuntu-latest"] python-version: ["3.9"] steps: - - name: Set Swap Space - uses: pierotofy/set-swap-space@master - with: - swap-size-gb: 10 - uses: actions/checkout@v4 - uses: mamba-org/setup-micromamba@v1 with: From 89c339e88c2a654c7d03f53521dd56e04d7694ce Mon Sep 17 00:00:00 2001 From: Kevin Santana Date: Thu, 11 Apr 2024 11:44:31 -0400 Subject: [PATCH 13/13] update --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1586d1dd..aca65809 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -33,7 +33,7 @@ jobs: cartopy - name: Run unit tests shell: bash -l {0} - run: coverage run -m unittest discover -s tests -p "*.py" + run: coverage run -m unittest discover -s tests -p "*_tests.py" - name: Create coverage report shell: bash -l {0} run: | @@ -49,7 +49,7 @@ jobs: fail-fast: false matrix: os: ["ubuntu-latest"] - python-version: ["3.9"] + python-version: ["3.10"] steps: - uses: actions/checkout@v4 - uses: mamba-org/setup-micromamba@v1