Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(python): combine load_version/load_with_datetime into load_as_version #1968

Merged
merged 12 commits into from
Dec 19, 2023
53 changes: 53 additions & 0 deletions python/deltalake/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -453,20 +453,68 @@ def file_uris(

file_uris.__doc__ = ""

def load_to(self, version: Union[int, str, datetime]) -> None:
"""
Load/time travel a DeltaTable with a specified version number, or a timestamp version of the table. If a
string is passed then the argument should be an RFC 3339 and ISO 8601 date and time string format.

Args:
version: the identifier of the version of the DeltaTable to load

Example:
**Use a version number**
```
1
```

**Use a datetime object**
```
datetime(2023,1,1)
```

**Use a datetime in string format**
```
"2018-01-26T18:30:09Z"
"2018-12-19T16:39:57-08:00"
"2018-01-26T18:30:09.453+00:00"
```
"""
if isinstance(version, int):
self._table.load_version(version)
elif isinstance(version, datetime):
self._table.load_with_datetime(version.isoformat())
elif isinstance(version, str):
self._table.load_with_datetime(version)
else:
raise TypeError(
"Invalid datatype provided for version, only int, str or datetime are accepted."
)

def load_version(self, version: int) -> None:
"""
Load a DeltaTable with a specified version.

!!! warning "Deprecated"
Load_version and load_with_datetime have been combined into `DeltaTable.load_to`.

Args:
version: the identifier of the version of the DeltaTable to load
"""
warnings.warn(
"Call to deprecated method DeltaTable.load_version. Use DeltaTable.load_to() instead.",
category=DeprecationWarning,
stacklevel=2,
)
self._table.load_version(version)

def load_with_datetime(self, datetime_string: str) -> None:
"""
Time travel Delta table to the latest version that's created at or before provided `datetime_string` argument.
The `datetime_string` argument should be an RFC 3339 and ISO 8601 date and time string.

!!! warning "Deprecated"
Load_version and load_with_datetime have been combined into `DeltaTable.load_to`.

Args:
datetime_string: the identifier of the datetime point of the DeltaTable to load

Expand All @@ -477,6 +525,11 @@ def load_with_datetime(self, datetime_string: str) -> None:
"2018-01-26T18:30:09.453+00:00"
```
"""
warnings.warn(
"Call to deprecated method DeltaTable.load_with_datetime. Use DeltaTable.load_to() instead.",
category=DeprecationWarning,
stacklevel=2,
)
self._table.load_with_datetime(datetime_string)

@property
Expand Down
18 changes: 12 additions & 6 deletions python/tests/test_table_read.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def test_read_simple_table_using_options_to_dict():
assert dt.to_pyarrow_dataset().to_table().to_pydict() == {"value": [1, 2, 3]}


def test_load_with_datetime():
def test_load_to_datetime():
log_dir = "../crates/deltalake-core/tests/data/simple_table/_delta_log"
log_mtime_pair = [
("00000000000000000000.json", 1588398451.0),
Expand All @@ -78,15 +78,21 @@ def test_load_with_datetime():

table_path = "../crates/deltalake-core/tests/data/simple_table"
dt = DeltaTable(table_path)
dt.load_with_datetime("2020-05-01T00:47:31-07:00")
dt.load_to("2020-05-01T00:47:31-07:00")
assert dt.version() == 0
dt.load_with_datetime("2020-05-02T22:47:31-07:00")
dt.load_to("2020-05-02T22:47:31-07:00")
assert dt.version() == 1
dt.load_with_datetime("2020-05-25T22:47:31-07:00")
dt.load_to("2020-05-25T22:47:31-07:00")
assert dt.version() == 4
dt.load_to(datetime.fromisoformat("2020-05-01T00:47:31-07:00"))
assert dt.version() == 0
dt.load_to(datetime.fromisoformat("2020-05-02T22:47:31-07:00"))
assert dt.version() == 1
dt.load_to(datetime.fromisoformat("2020-05-25T22:47:31-07:00"))
assert dt.version() == 4


def test_load_with_datetime_bad_format():
def test_load_to_datetime_bad_format():
table_path = "../crates/deltalake-core/tests/data/simple_table"
dt = DeltaTable(table_path)

Expand All @@ -96,7 +102,7 @@ def test_load_with_datetime_bad_format():
"2020-05-01T00:47:31+08",
]:
with pytest.raises(Exception, match="Failed to parse datetime string:"):
dt.load_with_datetime(bad_format)
dt.load_to(bad_format)


def test_read_simple_table_update_incremental():
Expand Down