Cloud-Drift · kevinsantana11 · Mar 3, 2024 · Feb 22, 2024 · Feb 29, 2024 · Mar 1, 2024
diff --git a/clouddrift/adapters/gdp.py b/clouddrift/adapters/gdp.py
@@ -12,10 +12,11 @@
 import xarray as xr
 
 from clouddrift.adapters.utils import download_with_progress
+from clouddrift.raggedarray import DimNames
 
-GDP_COORDS = [
-    "ids",
-    "time",
+GDP_COORDS: list[tuple[str, DimNames]] = [
+    ("id", "traj"),
+    ("time", "obs"),
 ]
 
 GDP_METADATA = [
@@ -179,7 +180,7 @@
     idx : list
         Unique set of drifter IDs sorted by their start date.
     """
-    return df.ID[np.where(np.in1d(df.ID, idx))[0]].values
+    return df.ID[np.where(np.in1d(df.ID, idx))[0]].values  # type: ignore
 
 
 def fetch_netcdf(url: str, file: str):
@@ -281,7 +282,7 @@
     return charar
 
 
-def drogue_presence(lost_time, time) -> bool:
+def drogue_presence(lost_time, time) -> np.ndarray:
     """Create drogue status from the drogue lost time and the trajectory time.
 
     Parameters

diff --git a/clouddrift/adapters/gdp1h.py b/clouddrift/adapters/gdp1h.py
@@ -94,7 +94,7 @@
         filelist: Sequence[str] = re.compile(pattern).findall(string)  # noqa: F821
     else:
         filelist = [filename_pattern.format(id=did) for did in drifter_ids]
-    filelist = np.unique(filelist)
+    filelist = list(np.unique(filelist))
 
     # retrieve only a subset of n_random_id trajectories
     if n_random_id:
@@ -204,7 +204,6 @@
             warnings.warn(f"Variable {var} not found in upstream data; skipping.")
 
     # new variables
-    ds["ids"] = (["traj", "obs"], [np.repeat(ds.ID.values, ds.sizes["obs"])])
     ds["drogue_status"] = (
         ["traj", "obs"],
         [gdp.drogue_presence(ds.drogue_lost_date.data, ds.time.data[0])],
@@ -284,10 +283,6 @@
         "longitude": {"long_name": "Longitude", "units": "degrees_east"},
         "latitude": {"long_name": "Latitude", "units": "degrees_north"},
         "time": {"long_name": "Time", "units": "seconds since 1970-01-01 00:00:00"},
-        "ids": {
-            "long_name": "Global Drifter Program Buoy ID repeated along observations",
-            "units": "-",
-        },
         "rowsize": {
             "long_name": "Number of observations per trajectory",
             "sample_dimension": "obs",
@@ -501,7 +496,7 @@
     ds.attrs = attrs
 
     # rename variables
-    ds = ds.rename_vars({"longitude": "lon", "latitude": "lat"})
+    ds = ds.rename_vars({"longitude": "lon", "latitude": "lat", "ID": "id"})
 
     # Cast float64 variables to float32 to reduce memory footprint.
     ds = gdp.cast_float64_variables_to_float32(ds)
@@ -586,7 +581,7 @@
     ra = RaggedArray.from_files(
         indices=ids,
         preprocess_func=preprocess,
-        name_coords=gdp.GDP_COORDS,
+        coord_dim_map=gdp.GDP_COORDS,
         name_meta=gdp.GDP_METADATA,
         name_data=GDP_DATA,
         rowsize_func=gdp.rowsize,

diff --git a/clouddrift/adapters/gdp6h.py b/clouddrift/adapters/gdp6h.py
@@ -14,7 +14,6 @@
 
 import numpy as np
 import xarray as xr
-from numpy.typing import ArrayLike
 
 import clouddrift.adapters.gdp as gdp
 from clouddrift.adapters.utils import download_with_progress
@@ -80,12 +79,13 @@
     if drifter_ids is None:
         urlpath = urllib.request.urlopen(url)
         string = urlpath.read().decode("utf-8")
-        drifter_urls: ArrayLike = []
+        drifter_urls: list[str] = []
         for dir in directory_list:
             urlpath = urllib.request.urlopen(os.path.join(url, dir))
             string = urlpath.read().decode("utf-8")
             filelist = list(set(re.compile(pattern).findall(string)))
-            drifter_urls += [os.path.join(url, dir, f) for f in filelist]
+            for f in filelist:
+                drifter_urls.append(os.path.join(url, dir, f))
 
     # retrieve only a subset of n_random_id trajectories
     if n_random_id:
@@ -95,7 +95,7 @@
             )
         else:
             rng = np.random.RandomState(42)
-            drifter_urls = rng.choice(drifter_urls, n_random_id, replace=False)
+            drifter_urls = list(rng.choice(drifter_urls, n_random_id, replace=False))
 
     download_with_progress(
         [
@@ -204,7 +204,7 @@
     ds["BuoyTypeSensorArray"] = (("traj"), gdp.cut_str(ds.BuoyTypeSensorArray, 20))
     ds["CurrentProgram"] = (
         ("traj"),
-        np.int32([gdp.str_to_float(ds.CurrentProgram, -1)]),
+        np.int32(gdp.str_to_float(ds.CurrentProgram, -1)),
     )
     ds["PurchaserFunding"] = (("traj"), gdp.cut_str(ds.PurchaserFunding, 20))
     ds["SensorUpgrade"] = (("traj"), gdp.cut_str(ds.SensorUpgrade, 20))
@@ -218,16 +218,16 @@
     )  # remove non ascii char
     ds["ManufactureYear"] = (
         ("traj"),
-        np.int16([gdp.str_to_float(ds.ManufactureYear, -1)]),
+        np.int16(gdp.str_to_float(ds.ManufactureYear, -1)),
     )
     ds["ManufactureMonth"] = (
         ("traj"),
-        np.int16([gdp.str_to_float(ds.ManufactureMonth, -1)]),
+        np.int16(gdp.str_to_float(ds.ManufactureMonth, -1)),
     )
     ds["ManufactureSensorType"] = (("traj"), gdp.cut_str(ds.ManufactureSensorType, 20))
     ds["ManufactureVoltage"] = (
         ("traj"),
-        np.int16([gdp.str_to_float(ds.ManufactureVoltage[:-6], -1)]),
+        np.int16(gdp.str_to_float(ds.ManufactureVoltage[:-6], -1)),
     )  # e.g. 56 V
     ds["FloatDiameter"] = (
         ("traj"),
@@ -485,7 +485,7 @@
     ra = RaggedArray.from_files(
         indices=ids,
         preprocess_func=preprocess,
-        name_coords=gdp.GDP_COORDS,
+        coord_dim_map=gdp.GDP_COORDS,
         name_meta=gdp.GDP_METADATA,
         name_data=GDP_DATA,
         rowsize_func=gdp.rowsize,

diff --git a/clouddrift/adapters/subsurface_floats.py b/clouddrift/adapters/subsurface_floats.py
@@ -15,7 +15,7 @@
 import tempfile
 import warnings
 from datetime import datetime
-from typing import Union
+from typing import Hashable, List, Union
 
 import numpy as np
 import pandas as pd
@@ -49,7 +49,7 @@ def to_xarray(
     source_data = scipy.io.loadmat(local_file)
 
     # metadata
-    meta_variables = [
+    meta_variables: List[Hashable] = [
         "expList",
         "expName",
         "expOrg",

diff --git a/clouddrift/adapters/utils.py b/clouddrift/adapters/utils.py
@@ -19,14 +19,14 @@
 
 _CHUNK_SIZE = 1024
 _logger = logging.getLogger(__name__)
-_standard_retry_protocol = retry(
+_standard_retry_protocol: Callable[[WrappedFn], WrappedFn] = retry(
     retry=retry_if_exception(
         lambda ex: isinstance(ex, (requests.Timeout, requests.HTTPError))
     ),
     wait=wait_exponential_jitter(initial=0.25),
     stop=stop_after_attempt(10),
     before=lambda rcs: _logger.debug(
-        f"calling {rcs.fn.__module__}.{rcs.fn.__name__}, attempt: {rcs.attempt_number}"
+        f"calling {str(rcs.fn)}, attempt: {rcs.attempt_number}"
     ),
 )
 
@@ -42,7 +42,7 @@
     if custom_retry_protocol is None:
         retry_protocol = _standard_retry_protocol
     else:
-        retry_protocol = custom_retry_protocol
+        retry_protocol = custom_retry_protocol  # type: ignore
 
     executor = concurrent.futures.ThreadPoolExecutor()
     futures: dict[