Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

yomaha and andro datasets #322

Merged
merged 17 commits into from
Dec 14, 2023
2 changes: 2 additions & 0 deletions clouddrift/adapters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@
in the future.
"""

import clouddrift.adapters.andro
import clouddrift.adapters.gdp1h
import clouddrift.adapters.gdp6h
import clouddrift.adapters.glad
import clouddrift.adapters.mosaic
import clouddrift.adapters.subsurface_floats
import clouddrift.adapters.yomaha
338 changes: 338 additions & 0 deletions clouddrift/adapters/andro.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,338 @@
"""
This module defines functions used to adapt the ANDRO: An Argo-based
deep displacement dataset as a ragged-arrays dataset.

The dataset is hosted at https://www.seanoe.org/data/00360/47077/ and the user manual
is available at https://archimer.ifremer.fr/doc/00360/47126/.

Example
-------
>>> from clouddrift.adapters import andro
>>> ds = andro.to_xarray()

Reference
---------
Ollitrault Michel, Rannou Philippe, Brion Emilie, Cabanes Cecile, Piron Anne, Reverdin Gilles,
Kolodziejczyk Nicolas (2022). ANDRO: An Argo-based deep displacement dataset.
SEANOE. https://doi.org/10.17882/47077
"""

from clouddrift.adapters.yomaha import download_with_progress
from datetime import datetime
import numpy as np
import os
import pandas as pd
import tempfile
import xarray as xr
import warnings

# order of the URLs is important
ANDRO_URL = "https://www.seanoe.org/data/00360/47077/data/91950.dat"
ANDRO_TMP_PATH = os.path.join(tempfile.gettempdir(), "clouddrift", "andro")
ANDRO_VERSION = "2022-03-04"


def to_xarray(tmp_path: str = None):
if tmp_path is None:
tmp_path = ANDRO_TMP_PATH
os.makedirs(tmp_path, exist_ok=True)

# get or update dataset
local_file = f"{tmp_path}/{ANDRO_URL.split('/')[-1]}"
download_with_progress(ANDRO_URL, local_file)

# parse with panda
col_names = [
# depth
"lon_d",
"lat_d",
"pres_d",
"temp_d",
"sal_d",
"time_d",
"ve_d",
"vn_d",
"err_ve_d",
"err_vn_d",
# first surface velocity
"lon_s",
"lat_s",
"time_s",
"ve_s",
"vn_s",
"err_ve_s",
"err_vn_s",
# last surface velocity
"lon_ls",
"lat_ls",
"time_ls",
"ve_ls",
"vn_ls",
"err_ve_ls",
"err_vn_ls",
# last fix previous cycle
"lon_lp",
"lat_lp",
"time_lp",
# first fix current cycle
"lon_fc",
"lat_fc",
"time_fc",
# last fix current cycle
"lon_lc",
"lat_lc",
"time_lc",
"surf_fix",
"id",
"cycle",
"profile_id",
]

na_col = [
-999.9999,
-99.9999,
-999.9,
-99.999,
-99.999,
-9999.999,
-999.9,
-999.9,
-999.9,
-999.9,
-999.9999,
-99.999,
-9999.999,
-999.99,
-999.99,
-999.99,
-999.99,
-999.9999,
-99.9999,
-9999.999,
-999.99,
-999.99,
-999.99,
-999.99,
-999.9999,
-99.9999,
-9999.999,
-999.9999,
-99.9999,
-9999.999,
-999.9999,
-99.9999,
-9999.999,
np.nan,
np.nan,
np.nan,
-99,
]

# open with pandas
df = pd.read_csv(
local_file, names=col_names, sep="\s+", header=None, na_values=na_col
)

# convert to an Xarray Dataset
ds = xr.Dataset.from_dataframe(df)

unique_id, rowsize = np.unique(ds["id"], return_counts=True)

ds = (
ds.rename_dims({"index": "obs"})
.assign({"id": ("traj", unique_id)})
.assign({"rowsize": ("traj", rowsize)})
.set_coords(["id", "time_d", "time_s", "time_lp", "time_lc", "time_lp"])
.drop_vars(["index"])
)

# Cast double floats to singles
double_vars = [
"lat_d",
"lon_d",
"lat_s",
"lon_s",
"lat_ls",
"lon_ls",
"lat_lp",
"lon_lp",
"lat_fc",
"lon_fc",
"lat_lc",
"lon_lc",
]
for var in [v for v in ds.variables if v not in double_vars]:
if ds[var].dtype == "float64":
ds[var] = ds[var].astype("float32")

# define attributes
vars_attrs = {
"lon_d": {
"long_name": "Longitude of the location where the deep velocity is calculated",
"units": "degrees_east",
},
"lat_d": {
"long_name": "Latitude of the location where the deep velocity is calculated",
"units": "degrees_north",
},
"pres_d": {
"long_name": "Reference parking pressure for this cycle",
"units": "dbar",
},
"temp_d": {
"long_name": "Parking temperature (°C) for this cycle",
"units": "degree_C",
},
"sal_d": {
"long_name": "Parking salinity for this cycle",
"units": "psu",
},
"time_d": {
"long_name": "Julian time (days) when deep velocity is estimated",
"units": "days since 2000-01-01 00:00",
},
"ve_d": {
"long_name": "Eastward component of the deep velocity",
"units": "cm s-1",
},
"vn_d": {
"long_name": "Northward component of the deep velocity",
"units": "cm s-1",
},
"err_ve_d": {
"long_name": "Error on the eastward component of the deep velocity",
"units": "cm s-1",
},
"err_vn_d": {
"long_name": "Error on the northward component of the deep velocity",
"units": "cm s-1",
},
"lon_s": {
"long_name": "Longitude of the location where the first surface velocity is calculated (over the first 6 h at surface)",
"units": "degrees_east",
},
"lat_s": {
"long_name": "Latitude of the location where the first surface velocity is calculated",
"units": "degrees_north",
},
"time_s": {
"long_name": "Julian time (days) when the first surface velocity is calculated",
"units": "days since 2000-01-01 00:00",
},
"ve_s": {
"long_name": "Eastward component of first surface velocity",
"units": "cm s-1",
},
"vn_s": {
"long_name": "Northward component of first surface velocity",
"units": "cm s-1",
},
"err_ve_s": {
"long_name": "Error on the eastward component of the first surface velocity",
"units": "cm s-1",
},
"err_vn_s": {
"long_name": "Error on the northward component of the first surface velocity",
"units": "cm s-1",
},
"lon_ls": {
"long_name": "Longitude of the location where the last surface velocity is calculated (over the last 6 h at surface)",
"units": "degrees_east",
},
"lat_ls": {
"long_name": "Latitude of the location where the last surface velocity is calculated",
"units": "degrees_north",
},
"time_ls": {
"long_name": "Julian time (days) when the last surface velocity is calculated",
"units": "days since 2000-01-01 00:00",
},
"ve_ls": {
"long_name": "Eastward component of last surface velocity (cm s-1)",
"units": "cm s-1",
},
"vn_ls": {
"long_name": "Northward component of last surface velocity (cm s-1)",
"units": "cm s-1",
},
"err_ve_ls": {
"long_name": "Error on the eastward component of the last surface velocity",
"units": "cm s-1",
},
"err_vn_ls": {
"long_name": "Error on the northward component of the last surface velocity",
"units": "cm s-1",
},
"lon_lp": {
"long_name": "Longitude of the last fix at the sea surface during the previous cycle",
"units": "degrees_east",
},
"lat_lp": {
"long_name": "Latitude of the last fix at the sea surface during the previous cycle",
"units": "degrees_north",
},
"time_lp": {
"long_name": "Julian time of the last fix at the sea surface during the previous cycle",
"units": "days since 2000-01-01 00:00",
},
"lon_fc": {
"long_name": "Longitude of the first fix at the sea surface during the current cycle",
"units": "degrees_east",
},
"lat_fc": {
"long_name": "Latitude of the first fix at the sea surface during the current cycle",
"units": "degrees_north",
},
"time_fc": {
"long_name": "Julian time of the first fix at the sea surface during the current cycle",
"units": "days since 2000-01-01 00:00",
},
"lon_lc": {
"long_name": "Longitude of the last fix at the sea surface during the current cycle",
"units": "degrees_east",
},
"lat_lc": {
"long_name": "Latitude of the last fix at the sea surface during the current cycle",
"units": "degrees_north",
},
"time_lc": {
"long_name": "Julian time of the last fix at the sea surface during the current cycle",
"units": "days since 2000-01-01 00:00",
},
"surf_fix": {
"long_name": "Number of surface fixes during the current cycle",
"units": "-",
},
"id": {
"long_name": "Float WMO number",
"units": "-",
},
"cycle": {
"long_name": "Cycle number",
"units": "-",
},
"profile_id": {
"long_name": "Profile number as given in the NetCDF prof file",
"units": "-",
},
}

# global attributes
attrs = {
"title": "ANDRO: An Argo-based deep displacement dataset",
"history": f"Dataset updated on {ANDRO_VERSION}",
"date_created": datetime.now().isoformat(),
"publisher_name": "SEANOE (SEA scieNtific Open data Edition)",
"publisher_url": "https://www.seanoe.org/data/00360/47077/",
"license": "Creative Commons Attribution 4.0 International License (http://creativecommons.org/licenses/by/4.0/)",
}

# set attributes
for var in vars_attrs.keys():
if var in ds.keys():
ds[var].attrs = vars_attrs[var]
else:
warnings.warn(f"Variable {var} not found in upstream data; skipping.")

Check warning on line 335 in clouddrift/adapters/andro.py

View check run for this annotation

Codecov / codecov/patch

clouddrift/adapters/andro.py#L335

Added line #L335 was not covered by tests
ds.attrs = attrs

return ds
2 changes: 1 addition & 1 deletion clouddrift/adapters/gdp1h.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,7 +494,7 @@ def preprocess(index: int, **kwargs) -> xr.Dataset:
"publisher_name": "GDP Drifter DAC",
"publisher_email": "aoml.dftr@noaa.gov",
"publisher_url": "https://www.aoml.noaa.gov/phod/gdp",
"licence": "freely available",
"license": "freely available",
"processing_level": "Level 2 QC by GDP drifter DAC",
"metadata_link": "https://www.aoml.noaa.gov/phod/dac/dirall.html",
"contributor_name": "NOAA Global Drifter Program",
Expand Down
2 changes: 1 addition & 1 deletion clouddrift/adapters/gdp6h.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,7 @@ def preprocess(index: int, **kwargs) -> xr.Dataset:
"publisher_name": "GDP Drifter DAC",
"publisher_email": "aoml.dftr@noaa.gov",
"publisher_url": "https://www.aoml.noaa.gov/phod/gdp",
"licence": "freely available",
"license": "freely available",
"processing_level": "Level 2 QC by GDP drifter DAC",
"metadata_link": "https://www.aoml.noaa.gov/phod/dac/dirall.html",
"contributor_name": "NOAA Global Drifter Program",
Expand Down
2 changes: 1 addition & 1 deletion clouddrift/adapters/subsurface_floats.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ def to_xarray(
"date_created": datetime.now().isoformat(),
"publisher_name": "WOCE Subsurface Float Data Assembly Center and NOAA AOML",
"publisher_url": "https://www.aoml.noaa.gov/phod/float_traj/data.php",
"licence": "freely available",
"license": "freely available",
"acknowledgement": f"Maintained by Andree Ramsey and Heather Furey from the Woods Hole Oceanographic Institution",
}

Expand Down
Loading