From 2dc9f0a54deebdca03e939a0a5013d5f5ba4a19e Mon Sep 17 00:00:00 2001
From: Kevin Santana <kevinsantana11@gmail.com>
Date: Tue, 12 Mar 2024 16:14:21 -0400
Subject: [PATCH 01/13] initial commit

---
 clouddrift/adapters/gdp1h.py |  13 ++---
 clouddrift/ragged.py         | 104 +++++++++++++++++++++++++----------
 tests/example_tests.py       |   8 +++
 3 files changed, 88 insertions(+), 37 deletions(-)
 create mode 100644 tests/example_tests.py

diff --git a/clouddrift/adapters/gdp1h.py b/clouddrift/adapters/gdp1h.py
index 23a3936b..4555124b 100644
--- a/clouddrift/adapters/gdp1h.py
+++ b/clouddrift/adapters/gdp1h.py
@@ -144,10 +144,8 @@ def preprocess(index: int, **kwargs) -> xr.Dataset:
     # parse the date with custom function
     ds["deploy_date"].data = gdp.decode_date(np.array([ds.deploy_date.data[0]]))
     ds["end_date"].data = gdp.decode_date(np.array([ds.end_date.data[0]]))
-    ds["drogue_lost_date"].data = gdp.decode_date(
-        np.array([ds.drogue_lost_date.data[0]])
-    )
-    ds["time"].data = gdp.decode_date(np.array([ds.time.data[0]]))
+    ds["drogue_lost_date"].data = gdp.decode_date(np.array([ds.drogue_lost_date.data[0]])).astype("datetime64[s]")
+    ds["time"].data = gdp.decode_date(np.array([ds.time.data[0]])).astype("datetime64[s]")
 
     # convert fill values to nan
     for var in [
@@ -199,7 +197,8 @@ def preprocess(index: int, **kwargs) -> xr.Dataset:
 
     for var in target_dtype.keys():
         if var in ds.keys():
-            ds[var].data = ds[var].data.astype(target_dtype[var])
+            dtype = target_dtype[var]
+            ds[var].data = ds[var].data.astype(dtype)
         else:
             warnings.warn(f"Variable {var} not found in upstream data; skipping.")
 
@@ -611,10 +610,10 @@ def to_raggedarray(
     # set dynamic global attributes
     if ra.attrs_global:
         ra.attrs_global["time_coverage_start"] = (
-            f"{datetime(1970,1,1) + timedelta(seconds=int(np.min(ra.coords['time']))):%Y-%m-%d:%H:%M:%SZ}"
+            f"{datetime(1970,1,1) + timedelta(seconds=int(np.min(ra.coords['time'].astype("datetime64[s]").astype("int64")))):%Y-%m-%d:%H:%M:%SZ}"
         )
         ra.attrs_global["time_coverage_end"] = (
-            f"{datetime(1970,1,1) + timedelta(seconds=int(np.max(ra.coords['time']))):%Y-%m-%d:%H:%M:%SZ}"
+            f"{datetime(1970,1,1) + timedelta(seconds=int(np.max(ra.coords['time'].astype("datetime64[s]").astype("int64")))):%Y-%m-%d:%H:%M:%SZ}"
         )
 
     return ra
diff --git a/clouddrift/ragged.py b/clouddrift/ragged.py
index 4ef49b3f..4967e67c 100644
--- a/clouddrift/ragged.py
+++ b/clouddrift/ragged.py
@@ -77,22 +77,29 @@ def apply_ragged(
     multiple particles, the coordinates of which are found in the ragged arrays x, y, and t
     that share row sizes 2, 3, and 4:
 
+    >>> from clouddrift.kinematics import velocity_from_position
     >>> rowsize = [2, 3, 4]
     >>> x = np.array([1, 2, 10, 12, 14, 30, 33, 36, 39])
     >>> y = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8])
     >>> t = np.array([1, 2, 1, 2, 3, 1, 2, 3, 4])
     >>> u1, v1 = apply_ragged(velocity_from_position, [x, y, t], rowsize, coord_system="cartesian")
-    array([1., 1., 2., 2., 2., 3., 3., 3., 3.]),
-    array([1., 1., 1., 1., 1., 1., 1., 1., 1.]))
+    >>> u1
+    array([1., 1., 2., 2., 2., 3., 3., 3., 3.])
+    >>> v1
+    array([1., 1., 1., 1., 1., 1., 1., 1., 1.])
 
     To apply ``func`` to only a subset of rows, use the ``rows`` argument:
 
     >>> u1, v1 = apply_ragged(velocity_from_position, [x, y, t], rowsize, rows=0, coord_system="cartesian")
-    array([1., 1.]),
-    array([1., 1.]))
+    >>> u1
+    array([1., 1.])
+    >>> v1
+    array([1., 1.])
     >>> u1, v1 = apply_ragged(velocity_from_position, [x, y, t], rowsize, rows=[0, 1], coord_system="cartesian")
-    array([1., 1., 2., 2., 2.]),
-    array([1., 1., 1., 1., 1.]))
+    >>> u1
+    array([1., 1., 2., 2., 2.])
+    >>> v1
+    array([1., 1., 1., 1., 1.])
 
     Raises
     ------
@@ -286,7 +293,7 @@ def prune(
     Examples
     --------
     >>> prune(np.array([1, 2, 3, 0, -1, -2]), np.array([3, 1, 2]),2)
-    (array([1, 2, 3, -1, -2]), array([3, 2]))
+    (array([ 1,  2,  3, -1, -2]), array([3, 2]))
 
     Raises
     ------
@@ -357,9 +364,9 @@ def ragged_to_regular(
     You can specify an alternative fill value:
 
     >>> ragged_to_regular(np.array([1, 2, 3, 4, 5]), np.array([2, 1, 2]), fill_value=999)
-    array([[ 1.,    2.],
-           [ 3., -999.],
-           [ 4.,    5.]])
+    array([[  1,   2],
+           [  3, 999],
+           [  4,   5]])
 
     See Also
     --------
@@ -401,7 +408,7 @@ def regular_to_ragged(
     Alternatively, a different fill value can be specified:
 
     >>> regular_to_ragged(np.array([[1, 2], [3, -999], [4, 5]]), fill_value=-999)
-    (array([1., 2., 3., 4., 5.]), array([2, 1, 2]))
+    (array([1, 2, 3, 4, 5]), array([2, 1, 2]))
 
     See Also
     --------
@@ -435,7 +442,7 @@ def rowsize_to_index(rowsize: list | np.ndarray | xr.DataArray) -> np.ndarray:
     To obtain the indices within a ragged array of three consecutive rows of sizes 100, 202, and 53:
 
     >>> rowsize_to_index([100, 202, 53])
-    array([0, 100, 302, 355])
+    array([  0, 100, 302, 355])
     """
     return np.cumsum(np.insert(np.array(rowsize), 0, 0))
 
@@ -502,10 +509,10 @@ def segment(
     If the input array contains time objects, the tolerance must be a time interval:
 
     >>> x = np.array([np.datetime64("2023-01-01"), np.datetime64("2023-01-02"),
-                      np.datetime64("2023-01-03"), np.datetime64("2023-02-01"),
-                      np.datetime64("2023-02-02")])
+    ...               np.datetime64("2023-01-03"), np.datetime64("2023-02-01"),
+    ...               np.datetime64("2023-02-02")])
     >>> segment(x, np.timedelta64(1, "D"))
-    np.array([3, 2])
+    array([3, 2])
     """
 
     # for compatibility with datetime list or np.timedelta64 arrays
@@ -590,30 +597,49 @@ def subset(
     a single drifter trajectory and the `row_dim_name` is "traj" and the `obs_dim_name` is "obs".
 
     Retrieve a region, like the Gulf of Mexico, using ranges of latitude and longitude:
+    >>> from clouddrift.adapters import gdp1h
+    >>> ds = gdp1h.to_raggedarray(n_random_id=5).to_xarray()
+    ...
 
     >>> subset(ds, {"lat": (21, 31), "lon": (-98, -78)}, row_dim_name="traj")
+    <xarray.Dataset>
+    ...
 
     The parameter `full_rows` can be used to retrieve trajectories passing through a region, for example all trajectories passing through the Gulf of Mexico:
 
     >>> subset(ds, {"lat": (21, 31), "lon": (-98, -78)}, full_rows=True, row_dim_name="traj")
+    <xarray.Dataset>
+    ...
 
     Retrieve drogued trajectory segments:
 
     >>> subset(ds, {"drogue_status": True}, row_dim_name="traj")
+    <xarray.Dataset>
+    Dimensions:                (traj: ..., obs: ...)
+    Coordinates:
+        id                     (traj) int64 ...
+        time                   (obs) datetime64[ns] ...
+    ...
 
     Retrieve trajectory segments with temperature higher than 25°C (303.15K):
 
     >>> subset(ds, {"sst": (303.15, np.inf)}, row_dim_name="traj")
+    <xarray.Dataset>
+    ...
 
     You can use the same approach to return only the trajectories that are
     shorter than some number of observations (similar to :func:`prune` but for
     the entire dataset):
 
     >>> subset(ds, {"rowsize": (0, 1000)}, row_dim_name="traj")
+    <xarray.Dataset>
+    ...
 
     Retrieve specific drifters using their IDs:
 
     >>> subset(ds, {"id": [2578, 2582, 2583]}, row_dim_name="traj")
+    <xarray.Dataset>
+    ...
 
     Sometimes, you may want to retrieve specific rows of a ragged array.
     You can do that by filtering along the trajectory dimension directly, since
@@ -621,6 +647,8 @@ def subset(
 
     >>> rows = [5, 6, 7]
     >>> subset(ds, {"traj": rows}, row_dim_name="traj")
+    <xarray.Dataset>
+    ...
 
     Retrieve a specific time period:
 
@@ -640,19 +668,37 @@ def subset(
 
     >>> func = (lambda arr: ((arr - arr[0]) % 2) == 0)
     >>> subset(ds, {"time": func}, row_dim_name="traj")
+    <xarray.Dataset>
+    Dimensions:                (traj: ..., obs: ...)
+    Coordinates:
+        id                     (traj) int64 ...
+        time                   (obs) float64 ...
+    ...
 
     The filtering function can accept several input variables passed as a tuple. For example, retrieve
     drifters released in the Mediterranean Sea, but exclude those released in the Bay of Biscay and the Black Sea:
 
-    >>> def mediterranean_mask(lon: xr.DataArray, lat: xr.DataArray) -> xr.DataArray:
-    >>>     # Mediterranean Sea bounding box
-    >>>     in_med = np.logical_and(-6.0327 <= lon, np.logical_and(lon <= 36.2173,
-    >>>                                                            np.logical_and(30.2639 <= lat, lat <= 45.7833)))
-    >>>     # Bay of Biscay
-    >>>     in_biscay = np.logical_and(lon <= -0.1462, lat >= 43.2744)
-    >>>     # Black Sea
-    >>>     in_blacksea = np.logical_and(lon >= 27.4437, lat >= 40.9088)
-    >>>     return np.logical_and(in_med, np.logical_not(np.logical_or(in_biscay, in_blacksea)))
+    >>> in_med = lambda lat, lon: np.all(
+    ...     -6.0327 <= lon <= 36.2173,
+    ...     30.2639 <= lat <= 45.7833
+    ... )
+    >>> in_biscay = lambda lon, lat: np.all(
+    ...     lon <= -0.1462, 
+    ...     lat >= 43.2744,
+    ... )
+    >>> in_blacksea = lambda lon, lat: np.all(
+    ...     lon >= 27.4437,
+    ...     lat >= 40.9088
+    ... )
+    >>> mediterranean_mask = lambda lon, lat: np.logical_and(
+    ...     in_med, 
+    ...     np.logical_not(
+    ...         np.logical_and(
+    ...             in_biscay, 
+    ...             in_blacksea
+    ...         )
+    ...     )
+    ... )
     >>> subset(ds, {("start_lon", "start_lat"): mediterranean_mask}, row_dim_name="traj")
 
     Raises
@@ -837,26 +883,24 @@ def _mask_var(
     Dimensions without coordinates: dim_0
 
     >>> _mask_var(x, [0, 2, 4])
-    <xarray.DataArray (dim_0: 5)>
-    array([ True, False, True,  False, True])
-    Dimensions without coordinates: dim_0
+    array([ True, False,  True, False,  True])
 
     >>> _mask_var(x, 4)
     <xarray.DataArray (dim_0: 5)>
-    array([False, False, False,  True, False])
+    array([False, False, False, False,  True])
     Dimensions without coordinates: dim_0
 
     >>> rowsize = xr.DataArray(data=[2, 3])
     >>> _mask_var(x, lambda arr: arr==arr[0]+1, rowsize, "dim_0")
     <xarray.DataArray (dim_0: 5)>
-    array([False, True, False,  True, False])
+    array([False,  True, False,  True, False])
     Dimensions without coordinates: dim_0
 
     >>> y = xr.DataArray(data=np.arange(0, 5)+2)
     >>> rowsize = xr.DataArray(data=[2, 3])
     >>> _mask_var([x, y], lambda var1, var2: ((var1 * var2) % 2) == 0, rowsize, "dim_0")
     <xarray.DataArray (dim_0: 5)>
-    array([True, False, True,  False, True])
+    array([ True, False,  True, False,  True])
     Dimensions without coordinates: dim_0
 
     Returns
diff --git a/tests/example_tests.py b/tests/example_tests.py
new file mode 100644
index 00000000..4d1c9b6d
--- /dev/null
+++ b/tests/example_tests.py
@@ -0,0 +1,8 @@
+import doctest
+
+import clouddrift.ragged as ragged
+
+
+def load_tests(loader, tests, ignore):
+    tests.addTests(doctest.DocTestSuite(ragged, {"optionflag": doctest.ELLIPSIS | doctest.FAIL_FAST, "globs": ragged.__dict__}))
+    return tests
\ No newline at end of file

From f0d9d3085ab76355800131c265fecbf1f6752087 Mon Sep 17 00:00:00 2001
From: Kevin Santana <kevinsantana11@gmail.com>
Date: Sun, 17 Mar 2024 00:57:36 -0400
Subject: [PATCH 02/13] formatting

---
 clouddrift/adapters/gdp1h.py | 20 ++++++++++++--------
 clouddrift/ragged.py         |  6 +++---
 tests/example_tests.py       | 12 ++++++++++--
 3 files changed, 25 insertions(+), 13 deletions(-)

diff --git a/clouddrift/adapters/gdp1h.py b/clouddrift/adapters/gdp1h.py
index 4555124b..06c3d3de 100644
--- a/clouddrift/adapters/gdp1h.py
+++ b/clouddrift/adapters/gdp1h.py
@@ -144,8 +144,12 @@ def preprocess(index: int, **kwargs) -> xr.Dataset:
     # parse the date with custom function
     ds["deploy_date"].data = gdp.decode_date(np.array([ds.deploy_date.data[0]]))
     ds["end_date"].data = gdp.decode_date(np.array([ds.end_date.data[0]]))
-    ds["drogue_lost_date"].data = gdp.decode_date(np.array([ds.drogue_lost_date.data[0]])).astype("datetime64[s]")
-    ds["time"].data = gdp.decode_date(np.array([ds.time.data[0]])).astype("datetime64[s]")
+    ds["drogue_lost_date"].data = gdp.decode_date(
+        np.array([ds.drogue_lost_date.data[0]])
+    ).astype("datetime64[s]")
+    ds["time"].data = gdp.decode_date(np.array([ds.time.data[0]])).astype(
+        "datetime64[s]"
+    )
 
     # convert fill values to nan
     for var in [
@@ -609,11 +613,11 @@ def to_raggedarray(
 
     # set dynamic global attributes
     if ra.attrs_global:
-        ra.attrs_global["time_coverage_start"] = (
-            f"{datetime(1970,1,1) + timedelta(seconds=int(np.min(ra.coords['time'].astype("datetime64[s]").astype("int64")))):%Y-%m-%d:%H:%M:%SZ}"
-        )
-        ra.attrs_global["time_coverage_end"] = (
-            f"{datetime(1970,1,1) + timedelta(seconds=int(np.max(ra.coords['time'].astype("datetime64[s]").astype("int64")))):%Y-%m-%d:%H:%M:%SZ}"
-        )
+        ra.attrs_global[
+            "time_coverage_start"
+        ] = f"{datetime(1970,1,1) + timedelta(seconds=int(np.min(ra.coords['time'].astype("datetime64[s]").astype("int64")))):%Y-%m-%d:%H:%M:%SZ}"
+        ra.attrs_global[
+            "time_coverage_end"
+        ] = f"{datetime(1970,1,1) + timedelta(seconds=int(np.max(ra.coords['time'].astype("datetime64[s]").astype("int64")))):%Y-%m-%d:%H:%M:%SZ}"
 
     return ra
diff --git a/clouddrift/ragged.py b/clouddrift/ragged.py
index 4967e67c..fe4df61e 100644
--- a/clouddrift/ragged.py
+++ b/clouddrift/ragged.py
@@ -683,7 +683,7 @@ def subset(
     ...     30.2639 <= lat <= 45.7833
     ... )
     >>> in_biscay = lambda lon, lat: np.all(
-    ...     lon <= -0.1462, 
+    ...     lon <= -0.1462,
     ...     lat >= 43.2744,
     ... )
     >>> in_blacksea = lambda lon, lat: np.all(
@@ -691,10 +691,10 @@ def subset(
     ...     lat >= 40.9088
     ... )
     >>> mediterranean_mask = lambda lon, lat: np.logical_and(
-    ...     in_med, 
+    ...     in_med,
     ...     np.logical_not(
     ...         np.logical_and(
-    ...             in_biscay, 
+    ...             in_biscay,
     ...             in_blacksea
     ...         )
     ...     )
diff --git a/tests/example_tests.py b/tests/example_tests.py
index 4d1c9b6d..e7254973 100644
--- a/tests/example_tests.py
+++ b/tests/example_tests.py
@@ -4,5 +4,13 @@
 
 
 def load_tests(loader, tests, ignore):
-    tests.addTests(doctest.DocTestSuite(ragged, {"optionflag": doctest.ELLIPSIS | doctest.FAIL_FAST, "globs": ragged.__dict__}))
-    return tests
\ No newline at end of file
+    tests.addTests(
+        doctest.DocTestSuite(
+            ragged,
+            {
+                "optionflag": doctest.ELLIPSIS | doctest.FAIL_FAST,
+                "globs": ragged.__dict__,
+            },
+        )
+    )
+    return tests

From 930747bba308b59295139056ccad6e0373f8813a Mon Sep 17 00:00:00 2001
From: Kevin Santana <kevinsantana11@gmail.com>
Date: Sun, 17 Mar 2024 00:58:26 -0400
Subject: [PATCH 03/13] rename

---
 tests/{example_tests.py => docexamples_tests.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename tests/{example_tests.py => docexamples_tests.py} (100%)

diff --git a/tests/example_tests.py b/tests/docexamples_tests.py
similarity index 100%
rename from tests/example_tests.py
rename to tests/docexamples_tests.py

From 470510dda33736c3a80d5f9c2ff8d4ab8a5f1ed3 Mon Sep 17 00:00:00 2001
From: Kevin Santana <kevinsantana11@gmail.com>
Date: Tue, 9 Apr 2024 00:39:42 -0400
Subject: [PATCH 04/13] Update examples

---
 clouddrift/ragged.py       | 110 +++++++++++++++++++------------------
 tests/docexamples_tests.py |   5 +-
 2 files changed, 59 insertions(+), 56 deletions(-)

diff --git a/clouddrift/ragged.py b/clouddrift/ragged.py
index fe4df61e..887f488f 100644
--- a/clouddrift/ragged.py
+++ b/clouddrift/ragged.py
@@ -292,6 +292,8 @@ def prune(
 
     Examples
     --------
+    >>> from clouddrift.ragged import prune
+    >>> import numpy as np
     >>> prune(np.array([1, 2, 3, 0, -1, -2]), np.array([3, 1, 2]),2)
     (array([ 1,  2,  3, -1, -2]), array([3, 2]))
 
@@ -475,6 +477,8 @@ def segment(
     --------
     The simplest use of ``segment`` is to provide a tolerance value that is
     used to divide an array into segments:
+    >>> from clouddrift.ragged import segment, subset
+    >>> import numpy as np
 
     >>> x = [0, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4]
     >>> segment(x, 0.5)
@@ -597,24 +601,27 @@ def subset(
     a single drifter trajectory and the `row_dim_name` is "traj" and the `obs_dim_name` is "obs".
 
     Retrieve a region, like the Gulf of Mexico, using ranges of latitude and longitude:
-    >>> from clouddrift.adapters import gdp1h
-    >>> ds = gdp1h.to_raggedarray(n_random_id=5).to_xarray()
+    >>> from clouddrift.datasets import gdp1h
+    >>> from clouddrift.ragged import subset
+    >>> import numpy as np
+
+    >>> ds = gdp1h()
     ...
 
     >>> subset(ds, {"lat": (21, 31), "lon": (-98, -78)}, row_dim_name="traj")
-    <xarray.Dataset>
+    <xarray.Dataset> ...
     ...
 
     The parameter `full_rows` can be used to retrieve trajectories passing through a region, for example all trajectories passing through the Gulf of Mexico:
 
     >>> subset(ds, {"lat": (21, 31), "lon": (-98, -78)}, full_rows=True, row_dim_name="traj")
-    <xarray.Dataset>
+    <xarray.Dataset> ...
     ...
 
     Retrieve drogued trajectory segments:
 
     >>> subset(ds, {"drogue_status": True}, row_dim_name="traj")
-    <xarray.Dataset>
+    <xarray.Dataset> ...
     Dimensions:                (traj: ..., obs: ...)
     Coordinates:
         id                     (traj) int64 ...
@@ -624,7 +631,7 @@ def subset(
     Retrieve trajectory segments with temperature higher than 25°C (303.15K):
 
     >>> subset(ds, {"sst": (303.15, np.inf)}, row_dim_name="traj")
-    <xarray.Dataset>
+    <xarray.Dataset> ...
     ...
 
     You can use the same approach to return only the trajectories that are
@@ -632,13 +639,13 @@ def subset(
     the entire dataset):
 
     >>> subset(ds, {"rowsize": (0, 1000)}, row_dim_name="traj")
-    <xarray.Dataset>
+    <xarray.Dataset> ...
     ...
 
     Retrieve specific drifters using their IDs:
 
     >>> subset(ds, {"id": [2578, 2582, 2583]}, row_dim_name="traj")
-    <xarray.Dataset>
+    <xarray.Dataset> ...
     ...
 
     Sometimes, you may want to retrieve specific rows of a ragged array.
@@ -647,12 +654,14 @@ def subset(
 
     >>> rows = [5, 6, 7]
     >>> subset(ds, {"traj": rows}, row_dim_name="traj")
-    <xarray.Dataset>
+    <xarray.Dataset> ...
     ...
 
     Retrieve a specific time period:
 
     >>> subset(ds, {"time": (np.datetime64("2000-01-01"), np.datetime64("2020-01-31"))}, row_dim_name="traj")
+    <xarray.Dataset> ...
+    ...
 
     Note that to subset time variable, the range has to be defined as a function
     type of the variable. By default, ``xarray`` uses ``np.datetime64`` to
@@ -662,44 +671,36 @@ def subset(
     Those criteria can also be combined:
 
     >>> subset(ds, {"lat": (21, 31), "lon": (-98, -78), "drogue_status": True, "sst": (303.15, np.inf), "time": (np.datetime64("2000-01-01"), np.datetime64("2020-01-31"))}, row_dim_name="traj")
+    <xarray.Dataset> ...
+    ...
 
     You can also use a function to filter the data. For example, retrieve every other observation
     of each trajectory:
 
     >>> func = (lambda arr: ((arr - arr[0]) % 2) == 0)
-    >>> subset(ds, {"time": func}, row_dim_name="traj")
-    <xarray.Dataset>
-    Dimensions:                (traj: ..., obs: ...)
-    Coordinates:
-        id                     (traj) int64 ...
-        time                   (obs) float64 ...
+    >>> subset(ds, {"id": func}, row_dim_name="traj")
+    <xarray.Dataset> ...
     ...
 
     The filtering function can accept several input variables passed as a tuple. For example, retrieve
     drifters released in the Mediterranean Sea, but exclude those released in the Bay of Biscay and the Black Sea:
 
-    >>> in_med = lambda lat, lon: np.all(
-    ...     -6.0327 <= lon <= 36.2173,
-    ...     30.2639 <= lat <= 45.7833
-    ... )
-    >>> in_biscay = lambda lon, lat: np.all(
-    ...     lon <= -0.1462,
-    ...     lat >= 43.2744,
-    ... )
-    >>> in_blacksea = lambda lon, lat: np.all(
-    ...     lon >= 27.4437,
-    ...     lat >= 40.9088
-    ... )
-    >>> mediterranean_mask = lambda lon, lat: np.logical_and(
-    ...     in_med,
-    ...     np.logical_not(
-    ...         np.logical_and(
-    ...             in_biscay,
-    ...             in_blacksea
-    ...         )
-    ...     )
-    ... )
+    >>> def mediterranean_mask(lon: xr.DataArray, lat: xr.DataArray) -> xr.DataArray:
+    ...    # Mediterranean Sea bounding box
+    ...    in_med = np.logical_and(-6.0327 <= lon, np.logical_and(lon <= 36.2173,
+    ...                                                           np.logical_and(30.2639 <= lat, lat <= 45.7833)))
+    ...    # Bay of Biscay
+    ...    in_biscay = np.logical_and(lon <= -0.1462, lat >= 43.2744)
+    ...    # Black Sea
+    ...    in_blacksea = np.logical_and(lon >= 27.4437, lat >= 40.9088)
+    ...    return np.logical_and(in_med, np.logical_not(np.logical_or(in_biscay, in_blacksea)))
     >>> subset(ds, {("start_lon", "start_lat"): mediterranean_mask}, row_dim_name="traj")
+    <xarray.Dataset> Size: ...
+    Dimensions:                (traj: ..., obs: ...)
+    Coordinates:
+        id                     (traj) int64 ...
+        time                   (obs) datetime64[ns] ...
+    ...
 
     Raises
     ------
@@ -819,25 +820,27 @@ def unpack(
     --------
 
     Unpacking longitude arrays from a ragged Xarray Dataset:
+    >>> from clouddrift.ragged import unpack
+    >>> from clouddrift.datasets import gdp1h
 
-    .. code-block:: python
+    >>> ds = gdp1h()
 
-        lon = unpack(ds.lon, ds["rowsize"]) # return a list[xr.DataArray] (slower)
-        lon = unpack(ds.lon.values, ds["rowsize"]) # return a list[np.ndarray] (faster)
-        first_lon = unpack(ds.lon.values, ds["rowsize"], rows=0) # return only the first row
-        first_two_lons = unpack(ds.lon.values, ds["rowsize"], rows=[0, 1]) # return first two rows
+    >>> lon = unpack(ds.lon, ds["rowsize"]) # return a list[xr.DataArray] (slower)
+    >>> lon = unpack(ds.lon.values, ds["rowsize"]) # return a list[np.ndarray] (faster)
+    >>> first_lon = unpack(ds.lon.values, ds["rowsize"], rows=0) # return only the first row
+    >>> first_two_lons = unpack(ds.lon.values, ds["rowsize"], rows=[0, 1]) # return first two rows
 
     Looping over trajectories in a ragged Xarray Dataset to compute velocities
     for each:
 
-    .. code-block:: python
+    >>> from clouddrift.kinematics import velocity_from_position
 
-        for lon, lat, time in list(zip(
-            unpack(ds.lon.values, ds["rowsize"]),
-            unpack(ds.lat.values, ds["rowsize"]),
-            unpack(ds.time.values, ds["rowsize"])
-        )):
-            u, v = velocity_from_position(lon, lat, time)
+    >>> for lon, lat, time in list(zip(
+    ...     unpack(ds.lon.values, ds["rowsize"]),
+    ...     unpack(ds.lat.values, ds["rowsize"]),
+    ...     unpack(ds.time.values, ds["rowsize"])
+    ... )):
+    ...     u, v = velocity_from_position(lon, lat, time)
     """
     indices = rowsize_to_index(rowsize)
 
@@ -876,9 +879,12 @@ def _mask_var(
 
     Examples
     --------
+    >>> import xarray as xr
+    >>> from clouddrift.ragged import _mask_var
+
     >>> x = xr.DataArray(data=np.arange(0, 5))
     >>> _mask_var(x, (2, 4))
-    <xarray.DataArray (dim_0: 5)>
+    <xarray.DataArray (dim_0: 5)> ...
     array([False, False,  True,  True,  True])
     Dimensions without coordinates: dim_0
 
@@ -886,20 +892,20 @@ def _mask_var(
     array([ True, False,  True, False,  True])
 
     >>> _mask_var(x, 4)
-    <xarray.DataArray (dim_0: 5)>
+    <xarray.DataArray (dim_0: 5)> ...
     array([False, False, False, False,  True])
     Dimensions without coordinates: dim_0
 
     >>> rowsize = xr.DataArray(data=[2, 3])
     >>> _mask_var(x, lambda arr: arr==arr[0]+1, rowsize, "dim_0")
-    <xarray.DataArray (dim_0: 5)>
+    <xarray.DataArray (dim_0: 5)> ...
     array([False,  True, False,  True, False])
     Dimensions without coordinates: dim_0
 
     >>> y = xr.DataArray(data=np.arange(0, 5)+2)
     >>> rowsize = xr.DataArray(data=[2, 3])
     >>> _mask_var([x, y], lambda var1, var2: ((var1 * var2) % 2) == 0, rowsize, "dim_0")
-    <xarray.DataArray (dim_0: 5)>
+    <xarray.DataArray (dim_0: 5)> ...
     array([ True, False,  True, False,  True])
     Dimensions without coordinates: dim_0
 
diff --git a/tests/docexamples_tests.py b/tests/docexamples_tests.py
index e7254973..096c2ec3 100644
--- a/tests/docexamples_tests.py
+++ b/tests/docexamples_tests.py
@@ -7,10 +7,7 @@ def load_tests(loader, tests, ignore):
     tests.addTests(
         doctest.DocTestSuite(
             ragged,
-            {
-                "optionflag": doctest.ELLIPSIS | doctest.FAIL_FAST,
-                "globs": ragged.__dict__,
-            },
+            optionflags=doctest.ELLIPSIS | doctest.FAIL_FAST,
         )
     )
     return tests

From 400a4d6369ce12a675429de4fa5427b683520703 Mon Sep 17 00:00:00 2001
From: Kevin Santana <kevinsantana11@gmail.com>
Date: Tue, 9 Apr 2024 01:03:59 -0400
Subject: [PATCH 05/13] Run doc examples separate from unittest suite

---
 tests/{docexamples_tests.py => docexamples.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename tests/{docexamples_tests.py => docexamples.py} (100%)

diff --git a/tests/docexamples_tests.py b/tests/docexamples.py
similarity index 100%
rename from tests/docexamples_tests.py
rename to tests/docexamples.py

From c6ecc02abff35462c8bb6cddfa356466d0675626 Mon Sep 17 00:00:00 2001
From: Kevin Santana <kevinsantana11@gmail.com>
Date: Tue, 9 Apr 2024 01:13:28 -0400
Subject: [PATCH 06/13] revert change

---
 clouddrift/adapters/gdp1h.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clouddrift/adapters/gdp1h.py b/clouddrift/adapters/gdp1h.py
index 06c3d3de..6a175b82 100644
--- a/clouddrift/adapters/gdp1h.py
+++ b/clouddrift/adapters/gdp1h.py
@@ -615,9 +615,9 @@ def to_raggedarray(
     if ra.attrs_global:
         ra.attrs_global[
             "time_coverage_start"
-        ] = f"{datetime(1970,1,1) + timedelta(seconds=int(np.min(ra.coords['time'].astype("datetime64[s]").astype("int64")))):%Y-%m-%d:%H:%M:%SZ}"
+        ] = f"{datetime(1970,1,1) + timedelta(seconds=int(np.min(ra.coords['time']))):%Y-%m-%d:%H:%M:%SZ}"
         ra.attrs_global[
             "time_coverage_end"
-        ] = f"{datetime(1970,1,1) + timedelta(seconds=int(np.max(ra.coords['time'].astype("datetime64[s]").astype("int64")))):%Y-%m-%d:%H:%M:%SZ}"
+        ] = f"{datetime(1970,1,1) + timedelta(seconds=int(np.max(ra.coords['time']))):%Y-%m-%d:%H:%M:%SZ}"
 
     return ra

From 42473113cf181e700717ae9033c795e519f5893f Mon Sep 17 00:00:00 2001
From: Kevin Santana <kevinsantana11@gmail.com>
Date: Tue, 9 Apr 2024 01:16:45 -0400
Subject: [PATCH 07/13] revert

---
 clouddrift/adapters/gdp1h.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/clouddrift/adapters/gdp1h.py b/clouddrift/adapters/gdp1h.py
index 6a175b82..a51f1422 100644
--- a/clouddrift/adapters/gdp1h.py
+++ b/clouddrift/adapters/gdp1h.py
@@ -146,10 +146,8 @@ def preprocess(index: int, **kwargs) -> xr.Dataset:
     ds["end_date"].data = gdp.decode_date(np.array([ds.end_date.data[0]]))
     ds["drogue_lost_date"].data = gdp.decode_date(
         np.array([ds.drogue_lost_date.data[0]])
-    ).astype("datetime64[s]")
-    ds["time"].data = gdp.decode_date(np.array([ds.time.data[0]])).astype(
-        "datetime64[s]"
     )
+    ds["time"].data = gdp.decode_date(np.array([ds.time.data[0]]))
 
     # convert fill values to nan
     for var in [
@@ -201,8 +199,7 @@ def preprocess(index: int, **kwargs) -> xr.Dataset:
 
     for var in target_dtype.keys():
         if var in ds.keys():
-            dtype = target_dtype[var]
-            ds[var].data = ds[var].data.astype(dtype)
+            ds[var].data = ds[var].data.astype(target_dtype[var])
         else:
             warnings.warn(f"Variable {var} not found in upstream data; skipping.")
 

From 98a83e08e5c8f1c006451a347dbc07ed0d009c93 Mon Sep 17 00:00:00 2001
From: Kevin Santana <kevinsantana11@gmail.com>
Date: Tue, 9 Apr 2024 01:27:50 -0400
Subject: [PATCH 08/13] separate unit tests and doc tests as jobs to run
 parallel

---
 .github/workflows/ci.yml | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index ede3bd6d..70412259 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -13,7 +13,7 @@ on:
 
 # A workflow run is made up of one or more jobs that can run sequentially or in parallel
 jobs:
-  test:
+  unittest:
     runs-on: ${{ matrix.os }}
     strategy:
       fail-fast: false
@@ -43,3 +43,19 @@ jobs:
         uses: codecov/codecov-action@v3
         with:
           file: ./coverage.xml
+  doctest:
+    strategy:
+      fail-fast: false
+      matrix:
+        os: ["ubuntu-latest"]
+        python-version: ["3.9"]
+    steps:
+      - uses: actions/checkout@v4
+      - uses: mamba-org/setup-micromamba@v1
+        with:
+          environment-file: environment.yml
+          environment-name: clouddrift
+      - name: Run doc tests
+        shell: bash -l {0}
+        run: |
+              python -m unittest tests/docexamples.py

From e3d8284ba442ee6d3a6b0e9caae2d94901a89125 Mon Sep 17 00:00:00 2001
From: Kevin Santana <kevinsantana11@gmail.com>
Date: Tue, 9 Apr 2024 01:29:51 -0400
Subject: [PATCH 09/13] update with runs-on

---
 .github/workflows/ci.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 70412259..37e3310c 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -44,6 +44,7 @@ jobs:
         with:
           file: ./coverage.xml
   doctest:
+    runs-on: ${{ matrix.os }}
     strategy:
       fail-fast: false
       matrix:

From cf9c1008e1db43b5506c94d517ad9eca5e426ef8 Mon Sep 17 00:00:00 2001
From: Kevin Santana <kevinsantana11@gmail.com>
Date: Tue, 9 Apr 2024 02:05:25 -0400
Subject: [PATCH 10/13] set swap

---
 .github/workflows/ci.yml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 37e3310c..693ed4bd 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -51,6 +51,10 @@ jobs:
         os: ["ubuntu-latest"]
         python-version: ["3.9"]
     steps:
+      - name: Set Swap Space
+        uses: pierotofy/set-swap-space@master
+        with:
+          swap-size-gb: 10
       - uses: actions/checkout@v4
       - uses: mamba-org/setup-micromamba@v1
         with:

From d06bf4780faece7688a1e4eed3b51873eadb246c Mon Sep 17 00:00:00 2001
From: Kevin Santana <kevinsantana11@gmail.com>
Date: Tue, 9 Apr 2024 22:41:33 -0400
Subject: [PATCH 11/13] use 6h over 1h as the latter was causing OOM issue with
 the github runners

---
 clouddrift/ragged.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/clouddrift/ragged.py b/clouddrift/ragged.py
index 887f488f..c06b597f 100644
--- a/clouddrift/ragged.py
+++ b/clouddrift/ragged.py
@@ -601,11 +601,11 @@ def subset(
     a single drifter trajectory and the `row_dim_name` is "traj" and the `obs_dim_name` is "obs".
 
     Retrieve a region, like the Gulf of Mexico, using ranges of latitude and longitude:
-    >>> from clouddrift.datasets import gdp1h
+    >>> from clouddrift.datasets import gdp6h
     >>> from clouddrift.ragged import subset
     >>> import numpy as np
 
-    >>> ds = gdp1h()
+    >>> ds = gdp6h()
     ...
 
     >>> subset(ds, {"lat": (21, 31), "lon": (-98, -78)}, row_dim_name="traj")
@@ -630,7 +630,7 @@ def subset(
 
     Retrieve trajectory segments with temperature higher than 25°C (303.15K):
 
-    >>> subset(ds, {"sst": (303.15, np.inf)}, row_dim_name="traj")
+    >>> subset(ds, {"temp": (303.15, np.inf)}, row_dim_name="traj")
     <xarray.Dataset> ...
     ...
 
@@ -670,7 +670,7 @@ def subset(
 
     Those criteria can also be combined:
 
-    >>> subset(ds, {"lat": (21, 31), "lon": (-98, -78), "drogue_status": True, "sst": (303.15, np.inf), "time": (np.datetime64("2000-01-01"), np.datetime64("2020-01-31"))}, row_dim_name="traj")
+    >>> subset(ds, {"lat": (21, 31), "lon": (-98, -78), "drogue_status": True, "temp": (303.15, np.inf), "time": (np.datetime64("2000-01-01"), np.datetime64("2020-01-31"))}, row_dim_name="traj")
     <xarray.Dataset> ...
     ...
 
@@ -821,9 +821,9 @@ def unpack(
 
     Unpacking longitude arrays from a ragged Xarray Dataset:
     >>> from clouddrift.ragged import unpack
-    >>> from clouddrift.datasets import gdp1h
+    >>> from clouddrift.datasets import gdp6h
 
-    >>> ds = gdp1h()
+    >>> ds = gdp6h()
 
     >>> lon = unpack(ds.lon, ds["rowsize"]) # return a list[xr.DataArray] (slower)
     >>> lon = unpack(ds.lon.values, ds["rowsize"]) # return a list[np.ndarray] (faster)

From 0d1d0e703996ff99df0e9eaa0d8229ad67f7ed00 Mon Sep 17 00:00:00 2001
From: Kevin Santana <kevinsantana11@gmail.com>
Date: Tue, 9 Apr 2024 22:45:50 -0400
Subject: [PATCH 12/13] dont use matrix for doc tests and remove swap action

---
 .github/workflows/ci.yml | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 693ed4bd..1586d1dd 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -44,17 +44,13 @@ jobs:
         with:
           file: ./coverage.xml
   doctest:
-    runs-on: ${{ matrix.os }}
+    runs-on: ubuntu-latest
     strategy:
       fail-fast: false
       matrix:
         os: ["ubuntu-latest"]
         python-version: ["3.9"]
     steps:
-      - name: Set Swap Space
-        uses: pierotofy/set-swap-space@master
-        with:
-          swap-size-gb: 10
       - uses: actions/checkout@v4
       - uses: mamba-org/setup-micromamba@v1
         with:

From 89c339e88c2a654c7d03f53521dd56e04d7694ce Mon Sep 17 00:00:00 2001
From: Kevin Santana <kevinsantana11@gmail.com>
Date: Thu, 11 Apr 2024 11:44:31 -0400
Subject: [PATCH 13/13] update

---
 .github/workflows/ci.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 1586d1dd..aca65809 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -33,7 +33,7 @@ jobs:
             cartopy
       - name: Run unit tests
         shell: bash -l {0}
-        run: coverage run -m unittest discover -s tests -p "*.py"
+        run: coverage run -m unittest discover -s tests -p "*_tests.py"
       - name: Create coverage report
         shell: bash -l {0}
         run: |
@@ -49,7 +49,7 @@ jobs:
       fail-fast: false
       matrix:
         os: ["ubuntu-latest"]
-        python-version: ["3.9"]
+        python-version: ["3.10"]
     steps:
       - uses: actions/checkout@v4
       - uses: mamba-org/setup-micromamba@v1