Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

better support for relative constraints #213

Merged
merged 11 commits into from
Sep 26, 2021
Merged
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ repos:
- --quiet-level=2

- repo: https://github.com/asottile/pyupgrade
rev: v2.26.0
rev: v2.28.0
hooks:
- id: pyupgrade
args:
Expand Down
49 changes: 28 additions & 21 deletions erddapy/erddapy.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,13 @@
def _quote_string_constraints(kwargs: Dict) -> Dict:
"""
For constraints of String variables,
the right-hand-side value must be surrounded by double quotes.
the right-hand-side value must be surrounded by double quotes if they are not relative constraints.

"""
return {k: f'"{v}"' if isinstance(v, str) else v for k, v in kwargs.items()}
return {
k: f'"{v}"' if isinstance(v, str) and not _check_substrings(v) else v
for k, v in kwargs.items()
}


def _format_constraints_url(kwargs: Dict) -> str:
Expand All @@ -42,6 +45,16 @@ def _format_constraints_url(kwargs: Dict) -> str:
return "".join([f"&{k}{v}" for k, v in kwargs.items()])


def _check_substrings(constraint):
"""
The tabledap protocol extends the OPeNDAP with these strings and we
need to pass them intact to the URL builder.

"""
substrings = ["now", "min", "max"]
return any([True for substring in substrings if substring in str(constraint)])


def parse_dates(date_time: Union[datetime, str]) -> float:
"""
ERDDAP ReSTful API standardizes the representation of dates as either ISO
Expand Down Expand Up @@ -147,7 +160,6 @@ class ERDDAP:
variables: a list variables to download.
response: default is HTML.
constraints: download constraints, default None (opendap-like url)
relative_constraints: download constraints based on ERDDAP server calculations, default None
params and requests_kwargs: `request.get` options

Returns:
Expand Down Expand Up @@ -211,7 +223,6 @@ def __init__(

# Initialized only via properties.
self.constraints: Optional[Dict] = None
self.relative_constraints: Optional[Dict] = None
self.server_functions: Optional[Dict] = None
self.dataset_id: OptionalStr = None
self.requests_kwargs: Dict = {}
Expand Down Expand Up @@ -317,12 +328,16 @@ def get_search_url(
base += "&searchFor={searchFor}"

# Convert dates from datetime to `seconds since 1970-01-01T00:00:00Z`.
min_time = kwargs.pop("min_time", None)
max_time = kwargs.pop("max_time", None)
if min_time:
min_time = kwargs.pop("min_time", "")
max_time = kwargs.pop("max_time", "")
if min_time and not _check_substrings(min_time):
kwargs.update({"min_time": parse_dates(min_time)})
if max_time:
else:
kwargs.update({"min_time": min_time})
if max_time and not _check_substrings(max_time):
kwargs.update({"max_time": parse_dates(max_time)})
else:
kwargs.update({"max_time": max_time})

protocol = protocol if protocol else self.protocol
response = response if response else self.response
Expand Down Expand Up @@ -427,7 +442,6 @@ def get_download_url(
dim_names: Optional[ListLike] = None,
response=None,
constraints=None,
relative_constraints=None,
**kwargs,
) -> str:
"""The download URL for the `server` endpoint.
Expand All @@ -445,11 +459,9 @@ def get_download_url(
'time<=': '2017-02-10T00:00:00+00:00',
'time>=': '2016-07-10T00:00:00+00:00',}

relative_constraints (dict): advanced download constraints , default None
example: relative_constraints = {'time>': 'now-7days',
'latitude<':'min(longitude)+180'
'depth>':'max(depth)-23'
}
One can also use relative constraints like {'time>': 'now-7days',
'latitude<': 'min(longitude)+180',
'depth>': 'max(depth)-23',}

Returns:
url (str): the download URL for the `response` chosen.
Expand All @@ -461,9 +473,6 @@ def get_download_url(
dim_names = dim_names if dim_names else self.dim_names
response = response if response else self.response
constraints = constraints if constraints else self.constraints
relative_constraints = (
relative_constraints if relative_constraints else self.relative_constraints
)

if not dataset_id:
raise ValueError(f"Please specify a valid `dataset_id`, got {dataset_id}")
Expand Down Expand Up @@ -517,17 +526,15 @@ def get_download_url(
if constraints:
_constraints = copy.copy(constraints)
for k, v in _constraints.items():
if _check_substrings(v):
continue
if k.startswith("time"):
_constraints.update({k: parse_dates(v)})
_constraints = _quote_string_constraints(_constraints)
_constraints_url = _format_constraints_url(_constraints)

url += f"{_constraints_url}"

if relative_constraints:
_relative_constraints_url = _format_constraints_url(relative_constraints)
url += f"{_relative_constraints_url}"

url = _distinct(url, **kwargs)
return url

Expand Down
84 changes: 72 additions & 12 deletions notebooks/00-quick_intro.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
"source": [
"First we need to instantiate the ERDDAP URL constructor for a server. In this\n",
"example we will use\n",
"[https://gliders.ioos.us/erddap](https://gliders.ioos.us/erddap/index.html).\n"
"[https://upwell.pfeg.noaa.gov/erddap](https://upwell.pfeg.noaa.gov/erddap/index.html).\n"
]
},
{
Expand All @@ -38,7 +38,7 @@
"\n",
"\n",
"e = ERDDAP(\n",
" server=\"NGDAC\", # \"NOAA IOOS NGDAC (National Glider Data Assembly Center)\"\n",
" server=\"UAF\", # NOAA UAF (Unified Access Framework)\n",
" protocol=\"tabledap\",\n",
" response=\"csv\",\n",
")"
Expand All @@ -49,7 +49,7 @@
"metadata": {},
"source": [
"Now we can populate the object a dataset id, variables of interest, and its\n",
"constraints. We can download the csvp response with the `.to_pandas` method.\n"
"constraints (last week gliders). Use the method `to_pandas` to download the csv(p) response, a comma separated values with units and explore the Dataframe.\n"
]
},
{
Expand All @@ -58,7 +58,7 @@
"metadata": {},
"outputs": [],
"source": [
"e.dataset_id = \"whoi_406-20160902T1700\"\n",
"e.dataset_id = \"scrippsGliders\"\n",
"\n",
"e.variables = [\n",
" \"depth\",\n",
Expand All @@ -70,12 +70,7 @@
"]\n",
"\n",
"e.constraints = {\n",
" \"time>=\": \"2016-09-03T00:00:00Z\",\n",
" \"time<=\": \"2016-09-04T00:00:00Z\",\n",
" \"latitude>=\": 38.0,\n",
" \"latitude<=\": 41.0,\n",
" \"longitude>=\": -72.0,\n",
" \"longitude<=\": -69.0,\n",
" \"time>=\": \"now-7days\",\n",
"}\n",
"\n",
"\n",
Expand All @@ -86,6 +81,71 @@
"\n",
"df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We can constraint the in time and space with relative constraints like in the example below. For more ways to access the data please check the \"Longer introduction.\""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"constraints = {\n",
" \"time>=\": \"now-14days\",\n",
" \"time<=\": \"now-7days\",\n",
" \"latitude>=\": \"min(latitude)+5\",\n",
" \"latitude<=\": \"max(latitude)-5\",\n",
" \"longitude>=\": \"min(longitude)+5\",\n",
" \"longitude<=\": \"min(longitude)+10\",\n",
" \"depth>=\": \"min(depth)+5\",\n",
" \"depth<=\": \"max(depth)-40\",\n",
"}\n",
"\n",
"\n",
"url = e.get_download_url(\n",
" response=\"html\",\n",
" constraints=constraints,\n",
")\n",
"\n",
"print(url)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The same request as above but with non-relative constraints. Note that these values will change if we run this at a later time."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"constraints = {\n",
" \"time>=\": \"2021-09-11T23:26:55Z\",\n",
" \"time<=\": \"2021-09-18T23:26:55Z\",\n",
" \"latitude>=\": -7.52,\n",
" \"latitude<=\": 46.67,\n",
" \"longitude>=\": -121.48,\n",
" \"longitude<=\": -116.48,\n",
" \"depth>=\": -3.82,\n",
" \"depth<=\": 1001.59,\n",
"}\n",
"\n",
"url = e.get_download_url(\n",
" response=\"html\",\n",
" constraints=constraints,\n",
")\n",
"\n",
"print(url)"
]
}
],
"metadata": {
Expand All @@ -101,7 +161,7 @@
},
"gist_id": "3f0f25b13ade0c64c84607bd92903d1b",
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -115,7 +175,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
"version": "3.9.7"
}
},
"nbformat": 4,
Expand Down
42 changes: 42 additions & 0 deletions tests/cassettes/test_download_url_distinct.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
interactions:
- request:
body: null
headers:
Accept:
- '*/*'
Accept-Encoding:
- gzip, deflate, br
Connection:
- keep-alive
User-Agent:
- python-requests/2.26.0
method: HEAD
uri: https://upwell.pfeg.noaa.gov/erddap/tabledap/gtoppAT.htmlTable?commonName,yearDeployed,serialNumber&distinct()
response:
body:
string: ''
headers:
Connection:
- close
Content-Type:
- text/plain; charset=UTF-8
Date:
- Fri, 24 Sep 2021 23:36:03 GMT
Last-Modified:
- Fri, 24 Sep 2021 23:36:03 GMT
Location:
- https://oceanview.pfeg.noaa.gov/erddap/tabledap/gtoppAT.htmlTable?commonName%2CyearDeployed%2CserialNumber&distinct()
Strict-Transport-Security:
- max-age=31536000; includeSubDomains
Transfer-Encoding:
- chunked
X-Frame-Options:
- SAMEORIGIN
erddap-server:
- '2.02'
xdods-server:
- dods/3.7
status:
code: 302
message: ''
version: 1
Loading