Skip to content

Commit

Permalink
Cast GDP float64 data to float32 as an option (default) (Cloud-Drift#269
Browse files Browse the repository at this point in the history
)

* Cast GDP float64 data to float32 as an option (default)

* Update raggedarray.py

to_xarray() docstring estension.

* Move casting to adapters.gdp

---------

Co-authored-by: Shane Elipot <selipot@miami.edu>
  • Loading branch information
2 people authored and Philippe Miron committed Nov 16, 2023
1 parent 896b07d commit 465f26c
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 1 deletion.
27 changes: 27 additions & 0 deletions clouddrift/adapters/gdp.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,33 @@
]


def cast_float64_variables_to_float32(
ds: xr.Dataset, variables_to_skip: list[str] = ["time", "lat", "lon"]
) -> xr.Dataset:
"""Cast all float64 variables except ``variables_to_skip`` to float32.
Extra precision from float64 is not needed and takes up memory and disk
space.
Parameters
----------
ds : xr.Dataset
Dataset to modify
variables_to_skip : list[str]
List of variables to skip; default is ["time", "lat", "lon"].
Returns
-------
ds : xr.Dataset
Modified dataset
"""
for var in ds.variables:
if var in variables_to_skip:
continue
if ds[var].dtype == "float64":
ds[var] = ds[var].astype("float32")
return ds


def parse_directory_file(filename: str) -> pd.DataFrame:
"""Read a GDP directory file that contains metadata of drifter releases.
Expand Down
3 changes: 3 additions & 0 deletions clouddrift/adapters/gdp1h.py
Original file line number Diff line number Diff line change
Expand Up @@ -510,6 +510,9 @@ def preprocess(index: int, **kwargs) -> xr.Dataset:
# rename variables
ds = ds.rename_vars({"longitude": "lon", "latitude": "lat"})

# Cast float64 variables to float32 to reduce memory footprint.
ds = gdp.cast_float64_variables_to_float32(ds)

return ds


Expand Down
3 changes: 3 additions & 0 deletions clouddrift/adapters/gdp6h.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,9 @@ def preprocess(index: int, **kwargs) -> xr.Dataset:
# rename variables
ds = ds.rename_vars({"longitude": "lon", "latitude": "lat"})

# Cast float64 variables to float32 to reduce memory footprint.
ds = gdp.cast_float64_variables_to_float32(ds)

return ds


Expand Down
8 changes: 7 additions & 1 deletion clouddrift/raggedarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,9 +381,15 @@ def validate_attributes(self):
if key not in self.attrs_variables:
self.attrs_variables[key] = {}

def to_xarray(self):
def to_xarray(self, cast_to_float32: bool = True):
"""Convert ragged array object to a xarray Dataset.
Parameters
----------
cast_to_float32 : bool, optional
Cast all float64 variables to float32 (default is True). This option aims at
minimizing the size of the xarray dataset.
Returns
-------
xr.Dataset
Expand Down

0 comments on commit 465f26c

Please sign in to comment.