Skip to content

Commit

Permalink
yomaha and andro datasets (#322)
Browse files Browse the repository at this point in the history
* first implementation

* andro

* add citation

* add citation

* cleanup

* lint

* update

* fix download temp path

* typo

* update

* update datasets view

* fix license spelling, fix andro license

* docstring edits

* version bump

* added to doc

---------

Co-authored-by: Philippe Miron <philippe.miron@dtn.com>
Co-authored-by: Shane Elipot <selipot@miami.edu>
  • Loading branch information
3 people authored Dec 14, 2023
1 parent 1d77056 commit cc074b0
Show file tree
Hide file tree
Showing 10 changed files with 909 additions and 13 deletions.
2 changes: 2 additions & 0 deletions clouddrift/adapters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@
in the future.
"""

import clouddrift.adapters.andro
import clouddrift.adapters.gdp1h
import clouddrift.adapters.gdp6h
import clouddrift.adapters.glad
import clouddrift.adapters.mosaic
import clouddrift.adapters.subsurface_floats
import clouddrift.adapters.yomaha
338 changes: 338 additions & 0 deletions clouddrift/adapters/andro.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,338 @@
"""
This module defines functions used to adapt the ANDRO: An Argo-based
deep displacement dataset as a ragged-arrays dataset.
The dataset is hosted at https://www.seanoe.org/data/00360/47077/ and the user manual
is available at https://archimer.ifremer.fr/doc/00360/47126/.
Example
-------
>>> from clouddrift.adapters import andro
>>> ds = andro.to_xarray()
Reference
---------
Ollitrault Michel, Rannou Philippe, Brion Emilie, Cabanes Cecile, Piron Anne, Reverdin Gilles,
Kolodziejczyk Nicolas (2022). ANDRO: An Argo-based deep displacement dataset.
SEANOE. https://doi.org/10.17882/47077
"""

from clouddrift.adapters.yomaha import download_with_progress
from datetime import datetime
import numpy as np
import os
import pandas as pd
import tempfile
import xarray as xr
import warnings

# order of the URLs is important
ANDRO_URL = "https://www.seanoe.org/data/00360/47077/data/91950.dat"
ANDRO_TMP_PATH = os.path.join(tempfile.gettempdir(), "clouddrift", "andro")
ANDRO_VERSION = "2022-03-04"


def to_xarray(tmp_path: str = None):
if tmp_path is None:
tmp_path = ANDRO_TMP_PATH
os.makedirs(tmp_path, exist_ok=True)

# get or update dataset
local_file = f"{tmp_path}/{ANDRO_URL.split('/')[-1]}"
download_with_progress(ANDRO_URL, local_file)

# parse with panda
col_names = [
# depth
"lon_d",
"lat_d",
"pres_d",
"temp_d",
"sal_d",
"time_d",
"ve_d",
"vn_d",
"err_ve_d",
"err_vn_d",
# first surface velocity
"lon_s",
"lat_s",
"time_s",
"ve_s",
"vn_s",
"err_ve_s",
"err_vn_s",
# last surface velocity
"lon_ls",
"lat_ls",
"time_ls",
"ve_ls",
"vn_ls",
"err_ve_ls",
"err_vn_ls",
# last fix previous cycle
"lon_lp",
"lat_lp",
"time_lp",
# first fix current cycle
"lon_fc",
"lat_fc",
"time_fc",
# last fix current cycle
"lon_lc",
"lat_lc",
"time_lc",
"surf_fix",
"id",
"cycle",
"profile_id",
]

na_col = [
-999.9999,
-99.9999,
-999.9,
-99.999,
-99.999,
-9999.999,
-999.9,
-999.9,
-999.9,
-999.9,
-999.9999,
-99.999,
-9999.999,
-999.99,
-999.99,
-999.99,
-999.99,
-999.9999,
-99.9999,
-9999.999,
-999.99,
-999.99,
-999.99,
-999.99,
-999.9999,
-99.9999,
-9999.999,
-999.9999,
-99.9999,
-9999.999,
-999.9999,
-99.9999,
-9999.999,
np.nan,
np.nan,
np.nan,
-99,
]

# open with pandas
df = pd.read_csv(
local_file, names=col_names, sep="\s+", header=None, na_values=na_col
)

# convert to an Xarray Dataset
ds = xr.Dataset.from_dataframe(df)

unique_id, rowsize = np.unique(ds["id"], return_counts=True)

ds = (
ds.rename_dims({"index": "obs"})
.assign({"id": ("traj", unique_id)})
.assign({"rowsize": ("traj", rowsize)})
.set_coords(["id", "time_d", "time_s", "time_lp", "time_lc", "time_lp"])
.drop_vars(["index"])
)

# Cast double floats to singles
double_vars = [
"lat_d",
"lon_d",
"lat_s",
"lon_s",
"lat_ls",
"lon_ls",
"lat_lp",
"lon_lp",
"lat_fc",
"lon_fc",
"lat_lc",
"lon_lc",
]
for var in [v for v in ds.variables if v not in double_vars]:
if ds[var].dtype == "float64":
ds[var] = ds[var].astype("float32")

# define attributes
vars_attrs = {
"lon_d": {
"long_name": "Longitude of the location where the deep velocity is calculated",
"units": "degrees_east",
},
"lat_d": {
"long_name": "Latitude of the location where the deep velocity is calculated",
"units": "degrees_north",
},
"pres_d": {
"long_name": "Reference parking pressure for this cycle",
"units": "dbar",
},
"temp_d": {
"long_name": "Parking temperature (°C) for this cycle",
"units": "degree_C",
},
"sal_d": {
"long_name": "Parking salinity for this cycle",
"units": "psu",
},
"time_d": {
"long_name": "Julian time (days) when deep velocity is estimated",
"units": "days since 2000-01-01 00:00",
},
"ve_d": {
"long_name": "Eastward component of the deep velocity",
"units": "cm s-1",
},
"vn_d": {
"long_name": "Northward component of the deep velocity",
"units": "cm s-1",
},
"err_ve_d": {
"long_name": "Error on the eastward component of the deep velocity",
"units": "cm s-1",
},
"err_vn_d": {
"long_name": "Error on the northward component of the deep velocity",
"units": "cm s-1",
},
"lon_s": {
"long_name": "Longitude of the location where the first surface velocity is calculated (over the first 6 h at surface)",
"units": "degrees_east",
},
"lat_s": {
"long_name": "Latitude of the location where the first surface velocity is calculated",
"units": "degrees_north",
},
"time_s": {
"long_name": "Julian time (days) when the first surface velocity is calculated",
"units": "days since 2000-01-01 00:00",
},
"ve_s": {
"long_name": "Eastward component of first surface velocity",
"units": "cm s-1",
},
"vn_s": {
"long_name": "Northward component of first surface velocity",
"units": "cm s-1",
},
"err_ve_s": {
"long_name": "Error on the eastward component of the first surface velocity",
"units": "cm s-1",
},
"err_vn_s": {
"long_name": "Error on the northward component of the first surface velocity",
"units": "cm s-1",
},
"lon_ls": {
"long_name": "Longitude of the location where the last surface velocity is calculated (over the last 6 h at surface)",
"units": "degrees_east",
},
"lat_ls": {
"long_name": "Latitude of the location where the last surface velocity is calculated",
"units": "degrees_north",
},
"time_ls": {
"long_name": "Julian time (days) when the last surface velocity is calculated",
"units": "days since 2000-01-01 00:00",
},
"ve_ls": {
"long_name": "Eastward component of last surface velocity (cm s-1)",
"units": "cm s-1",
},
"vn_ls": {
"long_name": "Northward component of last surface velocity (cm s-1)",
"units": "cm s-1",
},
"err_ve_ls": {
"long_name": "Error on the eastward component of the last surface velocity",
"units": "cm s-1",
},
"err_vn_ls": {
"long_name": "Error on the northward component of the last surface velocity",
"units": "cm s-1",
},
"lon_lp": {
"long_name": "Longitude of the last fix at the sea surface during the previous cycle",
"units": "degrees_east",
},
"lat_lp": {
"long_name": "Latitude of the last fix at the sea surface during the previous cycle",
"units": "degrees_north",
},
"time_lp": {
"long_name": "Julian time of the last fix at the sea surface during the previous cycle",
"units": "days since 2000-01-01 00:00",
},
"lon_fc": {
"long_name": "Longitude of the first fix at the sea surface during the current cycle",
"units": "degrees_east",
},
"lat_fc": {
"long_name": "Latitude of the first fix at the sea surface during the current cycle",
"units": "degrees_north",
},
"time_fc": {
"long_name": "Julian time of the first fix at the sea surface during the current cycle",
"units": "days since 2000-01-01 00:00",
},
"lon_lc": {
"long_name": "Longitude of the last fix at the sea surface during the current cycle",
"units": "degrees_east",
},
"lat_lc": {
"long_name": "Latitude of the last fix at the sea surface during the current cycle",
"units": "degrees_north",
},
"time_lc": {
"long_name": "Julian time of the last fix at the sea surface during the current cycle",
"units": "days since 2000-01-01 00:00",
},
"surf_fix": {
"long_name": "Number of surface fixes during the current cycle",
"units": "-",
},
"id": {
"long_name": "Float WMO number",
"units": "-",
},
"cycle": {
"long_name": "Cycle number",
"units": "-",
},
"profile_id": {
"long_name": "Profile number as given in the NetCDF prof file",
"units": "-",
},
}

# global attributes
attrs = {
"title": "ANDRO: An Argo-based deep displacement dataset",
"history": f"Dataset updated on {ANDRO_VERSION}",
"date_created": datetime.now().isoformat(),
"publisher_name": "SEANOE (SEA scieNtific Open data Edition)",
"publisher_url": "https://www.seanoe.org/data/00360/47077/",
"license": "Creative Commons Attribution 4.0 International License (http://creativecommons.org/licenses/by/4.0/)",
}

# set attributes
for var in vars_attrs.keys():
if var in ds.keys():
ds[var].attrs = vars_attrs[var]
else:
warnings.warn(f"Variable {var} not found in upstream data; skipping.")
ds.attrs = attrs

return ds
2 changes: 1 addition & 1 deletion clouddrift/adapters/gdp1h.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,7 +494,7 @@ def preprocess(index: int, **kwargs) -> xr.Dataset:
"publisher_name": "GDP Drifter DAC",
"publisher_email": "aoml.dftr@noaa.gov",
"publisher_url": "https://www.aoml.noaa.gov/phod/gdp",
"licence": "freely available",
"license": "freely available",
"processing_level": "Level 2 QC by GDP drifter DAC",
"metadata_link": "https://www.aoml.noaa.gov/phod/dac/dirall.html",
"contributor_name": "NOAA Global Drifter Program",
Expand Down
2 changes: 1 addition & 1 deletion clouddrift/adapters/gdp6h.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,7 @@ def preprocess(index: int, **kwargs) -> xr.Dataset:
"publisher_name": "GDP Drifter DAC",
"publisher_email": "aoml.dftr@noaa.gov",
"publisher_url": "https://www.aoml.noaa.gov/phod/gdp",
"licence": "freely available",
"license": "freely available",
"processing_level": "Level 2 QC by GDP drifter DAC",
"metadata_link": "https://www.aoml.noaa.gov/phod/dac/dirall.html",
"contributor_name": "NOAA Global Drifter Program",
Expand Down
2 changes: 1 addition & 1 deletion clouddrift/adapters/subsurface_floats.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ def to_xarray(
"date_created": datetime.now().isoformat(),
"publisher_name": "WOCE Subsurface Float Data Assembly Center and NOAA AOML",
"publisher_url": "https://www.aoml.noaa.gov/phod/float_traj/data.php",
"licence": "freely available",
"license": "freely available",
"acknowledgement": f"Maintained by Andree Ramsey and Heather Furey from the Woods Hole Oceanographic Institution",
}

Expand Down
Loading

0 comments on commit cc074b0

Please sign in to comment.