Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update pvlib.iotools.get_srml to adhere to SRML's new file structure #1931

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion docs/sphinx/source/reference/iotools.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ of sources and file formats relevant to solar energy modeling.
iotools.read_epw
iotools.parse_epw
iotools.read_srml
iotools.read_srml_month_from_solardat
iotools.get_srml
iotools.read_surfrad
iotools.read_midc
Expand Down
1 change: 0 additions & 1 deletion pvlib/iotools/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from pvlib.iotools.tmy import read_tmy2, read_tmy3 # noqa: F401
from pvlib.iotools.epw import read_epw, parse_epw # noqa: F401
from pvlib.iotools.srml import read_srml # noqa: F401
from pvlib.iotools.srml import read_srml_month_from_solardat # noqa: F401
from pvlib.iotools.srml import get_srml # noqa: F401
from pvlib.iotools.surfrad import read_surfrad # noqa: F401
from pvlib.iotools.midc import read_midc # noqa: F401
Expand Down
22 changes: 14 additions & 8 deletions pvlib/iotools/srml.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pandas as pd
import urllib
import warnings
import os

from pvlib._deprecation import deprecated

Expand All @@ -26,6 +27,8 @@
'937': 'temp_cell',
}

URL = 'http://is-solardata01.uoregon.edu/Step3B_Original_Format/'


def read_srml(filename, map_variables=True):
"""
Expand Down Expand Up @@ -204,7 +207,7 @@
raw or processed data. For instance, `RO` designates raw, one minute
data and `PO` designates processed one minute data. The availability
of file types varies between sites. Below is a table of file types
and their time intervals. See [1] for site information.
and their time intervals. See [1]_ for site information.

============= ============ ==================
time interval raw filetype processed filetype
Expand All @@ -231,7 +234,7 @@


def get_srml(station, start, end, filetype='PO', map_variables=True,
url="http://solardat.uoregon.edu/download/Archive/"):
url=URL):
"""Request data from UoO SRML and read it into a Dataframe.

The University of Oregon Solar Radiation Monitoring Laboratory (SRML) is
Expand All @@ -242,7 +245,7 @@
Parameters
----------
station : str
Two letter station abbreviation.
Three letter station abbreviation.
start : datetime-like
First day of the requested period
end : datetime-like
Expand All @@ -252,7 +255,7 @@
map_variables : bool, default: True
When true, renames columns of the DataFrame to pvlib variable names
where applicable. See variable :const:`VARIABLE_MAP`.
url : str, default: 'http://solardat.uoregon.edu/download/Archive/'
url : str, default: :const:`URL`
API endpoint URL

Returns
Expand Down Expand Up @@ -292,17 +295,20 @@
`http://solardat.uoregon.edu/StationIDCodes.html
<http://solardat.uoregon.edu/StationIDCodes.html>`_
"""
# prior to pvlib 0.10.3 the function used 2-letter abbreviations
if len(station) != 3:
raise ValueError('`station` should be a 3 letter station abbreviation')

Check warning on line 300 in pvlib/iotools/srml.py

View check run for this annotation

Codecov / codecov/patch

pvlib/iotools/srml.py#L300

Added line #L300 was not covered by tests

# Use pd.to_datetime so that strings (e.g. '2021-01-01') are accepted
start = pd.to_datetime(start)
end = pd.to_datetime(end)

# Generate list of months
months = pd.date_range(
start, end.replace(day=1) + pd.DateOffset(months=1), freq='1M')
months_str = months.strftime('%y%m')

# Generate list of filenames
filenames = [f"{station}{filetype}{m}.txt" for m in months_str]
# Generate list of filenames (note basename uses two-letter abbreviation)
filenames = [f"{station}/{station}_{m.year}/{station[:2]}{filetype}{m.strftime('%y%m')}.txt" for m in months] # noqa: E501

dfs = [] # Initialize list of monthly dataframes
for f in filenames:
Expand All @@ -316,6 +322,6 @@

meta = {'filetype': filetype,
'station': station,
'filenames': filenames}
'filenames': [os.path.basename(f) for f in filenames]}

return data, meta
97 changes: 25 additions & 72 deletions pvlib/tests/iotools/test_srml.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,25 @@
from numpy import isnan
import pandas as pd
import pytest
import os

from pvlib.iotools import srml
from ..conftest import (DATA_DIR, RERUNS, RERUNS_DELAY, assert_index_equal,
assert_frame_equal, fail_on_pvlib_version)
from pvlib._deprecation import pvlibDeprecationWarning
assert_frame_equal)

srml_testfile = DATA_DIR / 'SRML-day-EUPO1801.txt'

BASE_URL = 'http://is-solardata01.uoregon.edu/Step3B_Original_Format/'


def test_read_srml():
srml.read_srml(srml_testfile)


@pytest.mark.skip(reason="SRML server is undergoing maintenance as of 12-2023")
@pytest.mark.remote_data
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
def test_read_srml_remote():
srml.read_srml('http://solardat.uoregon.edu/download/Archive/EUPO1801.txt')
srml.read_srml(os.path.join(BASE_URL, 'EUO/EUO_2018/EUPO1801.txt'))


def test_read_srml_columns_exist():
Expand Down Expand Up @@ -47,11 +48,10 @@ def test_read_srml_nans_exist():
assert data['dni_0_flag'].iloc[1119] == 99


@pytest.mark.skip(reason="SRML server is undergoing maintenance as of 12-2023")
@pytest.mark.parametrize('url,year,month', [
('http://solardat.uoregon.edu/download/Archive/EUPO1801.txt',
(os.path.join(BASE_URL, 'EUO/EUO_2018/EUPO1801.txt'),
2018, 1),
('http://solardat.uoregon.edu/download/Archive/EUPO1612.txt',
(os.path.join(BASE_URL, 'EUO/EUO_2016/EUPO1612.txt'),
2016, 12),
])
@pytest.mark.remote_data
Expand All @@ -78,96 +78,49 @@ def test__map_columns(column, expected):
assert srml._map_columns(column) == expected


@pytest.mark.skip(reason="SRML server is undergoing maintenance as of 12-2023")
@pytest.mark.remote_data
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
def test_get_srml():
url = 'http://solardat.uoregon.edu/download/Archive/EUPO1801.txt'
url = os.path.join(BASE_URL, 'EUO/EUO_2018/EUPO1801.txt')
file_data = srml.read_srml(url)
requested, _ = srml.get_srml(station='EU', start='2018-01-01',
requested, _ = srml.get_srml(station='EUO', start='2018-01-01',
end='2018-01-31')
assert_frame_equal(file_data, requested)


@pytest.mark.skip(reason="SRML server is undergoing maintenance as of 12-2023")
@fail_on_pvlib_version('0.11')
@pytest.mark.remote_data
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
def test_read_srml_month_from_solardat():
url = 'http://solardat.uoregon.edu/download/Archive/EUPO1801.txt'
file_data = srml.read_srml(url)
with pytest.warns(pvlibDeprecationWarning, match='get_srml instead'):
requested = srml.read_srml_month_from_solardat('EU', 2018, 1)
assert file_data.equals(requested)


@pytest.mark.skip(reason="SRML server is undergoing maintenance as of 12-2023")
@fail_on_pvlib_version('0.11')
@pytest.mark.remote_data
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
def test_15_minute_dt_index():
with pytest.warns(pvlibDeprecationWarning, match='get_srml instead'):
data = srml.read_srml_month_from_solardat('TW', 2019, 4, 'RQ')
start = pd.Timestamp('20190401 00:00')
start = start.tz_localize('Etc/GMT+8')
end = pd.Timestamp('20190430 23:45')
end = end.tz_localize('Etc/GMT+8')
assert data.index[0] == start
assert data.index[-1] == end
assert (data.index[3::4].minute == 45).all()


@pytest.mark.skip(reason="SRML server is undergoing maintenance as of 12-2023")
@fail_on_pvlib_version('0.11')
@pytest.mark.remote_data
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
def test_hourly_dt_index():
with pytest.warns(pvlibDeprecationWarning, match='get_srml instead'):
data = srml.read_srml_month_from_solardat('CD', 1986, 4, 'PH')
start = pd.Timestamp('19860401 00:00')
start = start.tz_localize('Etc/GMT+8')
end = pd.Timestamp('19860430 23:00')
end = end.tz_localize('Etc/GMT+8')
assert data.index[0] == start
assert data.index[-1] == end
assert (data.index.minute == 0).all()


@pytest.mark.skip(reason="SRML server is undergoing maintenance as of 12-2023")
@pytest.mark.remote_data
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
def test_get_srml_hourly():
data, meta = data, meta = srml.get_srml(station='CD', start='1986-04-01',
end='1986-05-31', filetype='PH')
expected_index = pd.date_range(start='1986-04-01', end='1986-05-31 23:59',
freq='1h', tz='Etc/GMT+8')
assert_index_equal(data.index, expected_index)


@pytest.mark.skip(reason="SRML server is undergoing maintenance as of 12-2023")
@pytest.mark.remote_data
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
def test_get_srml_minute():
data_read = srml.read_srml(srml_testfile)
data_get, meta = srml.get_srml(station='EU', start='2018-01-01',
data_get, meta = srml.get_srml(station='EUO', start='2018-01-01',
end='2018-01-31', filetype='PO')
expected_index = pd.date_range(start='2018-01-01', end='2018-01-31 23:59',
freq='1min', tz='Etc/GMT+8')
assert_index_equal(data_get.index, expected_index)
assert all(c in data_get.columns for c in data_read.columns)
# Check that all indices in example file are present in remote file
assert data_read.index.isin(data_get.index).all()
assert meta['station'] == 'EU'
assert meta['station'] == 'EUO'
assert meta['filetype'] == 'PO'
assert meta['filenames'] == ['EUPO1801.txt']


@pytest.mark.skip(reason="SRML server is undergoing maintenance as of 12-2023")
@pytest.mark.remote_data
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
def test_get_srml_nonexisting_month_warning():
with pytest.warns(UserWarning, match='file was not found: EUPO0912.txt'):
with pytest.warns(UserWarning, match='file was not found: EUO/EUO_2009/EUPO0912.txt'): # noqa: E501
# Request data for a period where not all files exist
# Eugene (EU) station started reporting 1-minute data in January 2010
# Eugene (EUO) station started reporting 1-minute data in January 2010
data, meta = data, meta = srml.get_srml(
station='EUO', start='2009-12-01', end='2010-01-31', filetype='PO')


@pytest.mark.remote_data
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
def test_get_srml_station_two_letter_error():
with pytest.raises(ValueError, match='should be a 3 letter station abbreviation'): # noqa: E501
# Test that an error is raised when specifying a station using the old
# two-letter station code. E.g., Eugene, Oregon station used to be EU
# and now goes by EUO
data, meta = data, meta = srml.get_srml(
station='EU', start='2009-12-01', end='2010-01-31', filetype='PO')
station='EU', start='2022-01-01', end='2022-01-31', filetype='PO')
Loading