Skip to content

Commit

Permalink
feat: unify API on snapshot_id
Browse files Browse the repository at this point in the history
  • Loading branch information
DahnJ committed Feb 22, 2025
1 parent 5d3829b commit 30edf17
Show file tree
Hide file tree
Showing 11 changed files with 46 additions and 46 deletions.
2 changes: 1 addition & 1 deletion Changelog.python.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ repo.readonly_session("dev")
# still possible:
repo.readonly_session(tag="v0.1")
repo.readonly_session(branch="foo")
repo.readonly_session(snapshot="NXH3M0HJ7EEJ0699DPP0")
repo.readonly_session(snapshot_id="NXH3M0HJ7EEJ0699DPP0")
```

- Icechunk is now more resilient to changes in Zarr metadata spec, and can handle Zarr extensions.
Expand Down
4 changes: 2 additions & 2 deletions docs/docs/icechunk-python/quickstart.md
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ snapshot_id_2 = session_2.commit("overwrite some values")
We can see the full version history of our repo:

```python
hist = repo.ancestry(snapshot=snapshot_id_2)
hist = repo.ancestry(snapshot_id=snapshot_id_2)
for ancestor in hist:
print(ancestor.id, ancestor.message, ancestor.written_at)

Expand All @@ -151,7 +151,7 @@ for ancestor in hist:
# latest version
assert array[0] == 2
# check out earlier snapshot
earlier_session = repo.readonly_session(snapshot=hist[1].id)
earlier_session = repo.readonly_session(snapshot_id=hist[1].id)
store = earlier_session.store

# get the array
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/icechunk-python/xarray.md
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ xr.open_zarr(session.store, consolidated=False)
We can also read data from previous snapshots by checking out prior versions:

```python
session = repo.readonly_session(snapshot=first_snapshot)
session = repo.readonly_session(snapshot_id=first_snapshot)

xr.open_zarr(session.store, consolidated=False)
# <xarray.Dataset> Size: 9MB
Expand Down
4 changes: 2 additions & 2 deletions icechunk-python/notebooks/demo-dummy-data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,7 @@
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": null,
"id": "d904f719-98cf-4f51-8e9a-1631dcb3fcba",
"metadata": {},
"outputs": [
Expand All @@ -348,7 +348,7 @@
}
],
"source": [
"session = repo.readonly_session(snapshot=first_commit)\n",
"session = repo.readonly_session(snapshot_id=first_commit)\n",
"root_group = zarr.open_group(session.store, mode=\"r\")\n",
"\n",
"try:\n",
Expand Down
4 changes: 2 additions & 2 deletions icechunk-python/notebooks/version-control.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": null,
"id": "e785d9a1-36ec-4207-b334-20e0a68e3ac8",
"metadata": {},
"outputs": [
Expand All @@ -258,7 +258,7 @@
}
],
"source": [
"session = repo.readonly_session(snapshot=first_commit)\n",
"session = repo.readonly_session(snapshot_id=first_commit)\n",
"root_group = zarr.open_group(store=session.store, mode=\"r\")\n",
"dict(root_group.attrs)"
]
Expand Down
8 changes: 4 additions & 4 deletions icechunk-python/python/icechunk/_icechunk_python.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -957,7 +957,7 @@ class PyRepository:
*,
branch: str | None = None,
tag: str | None = None,
snapshot: str | None = None,
snapshot_id: str | None = None,
) -> AsyncIterator[SnapshotInfo]: ...
def create_branch(self, branch: str, snapshot_id: str) -> None: ...
def list_branches(self) -> set[str]: ...
Expand All @@ -972,17 +972,17 @@ class PyRepository:
self,
from_branch: str | None = None,
from_tag: str | None = None,
from_snapshot: str | None = None,
from_snapshot_id: str | None = None,
to_branch: str | None = None,
to_tag: str | None = None,
to_snapshot: str | None = None,
to_snapshot_id: str | None = None,
) -> Diff: ...
def readonly_session(
self,
branch: str | None = None,
*,
tag: str | None = None,
snapshot: str | None = None,
snapshot_id: str | None = None,
) -> PySession: ...
def writable_session(self, branch: str) -> PySession: ...
def expire_snapshots(
Expand Down
26 changes: 13 additions & 13 deletions icechunk-python/python/icechunk/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ def ancestry(
*,
branch: str | None = None,
tag: str | None = None,
snapshot: str | None = None,
snapshot_id: str | None = None,
) -> Iterator[SnapshotInfo]:
"""
Get the ancestry of a snapshot.
Expand All @@ -207,7 +207,7 @@ def ancestry(
The branch to get the ancestry of.
tag : str, optional
The tag to get the ancestry of.
snapshot : str, optional
snapshot_id : str, optional
The snapshot ID to get the ancestry of.
Returns
Expand All @@ -223,7 +223,7 @@ def ancestry(
# the returned object is both an Async and Sync iterator
res = cast(
Iterator[SnapshotInfo],
self._repository.async_ancestry(branch=branch, tag=tag, snapshot=snapshot),
self._repository.async_ancestry(branch=branch, tag=tag, snapshot_id=snapshot_id),
)
return res

Expand All @@ -232,7 +232,7 @@ def async_ancestry(
*,
branch: str | None = None,
tag: str | None = None,
snapshot: str | None = None,
snapshot_id: str | None = None,
) -> AsyncIterator[SnapshotInfo]:
"""
Get the ancestry of a snapshot.
Expand All @@ -243,7 +243,7 @@ def async_ancestry(
The branch to get the ancestry of.
tag : str, optional
The tag to get the ancestry of.
snapshot : str, optional
snapshot_id : str, optional
The snapshot ID to get the ancestry of.
Returns
Expand All @@ -255,7 +255,7 @@ def async_ancestry(
-----
Only one of the arguments can be specified.
"""
return self._repository.async_ancestry(branch=branch, tag=tag, snapshot=snapshot)
return self._repository.async_ancestry(branch=branch, tag=tag, snapshot_id=snapshot_id)

def create_branch(self, branch: str, snapshot_id: str) -> None:
"""
Expand Down Expand Up @@ -400,10 +400,10 @@ def diff(
*,
from_branch: str | None = None,
from_tag: str | None = None,
from_snapshot: str | None = None,
from_snapshot_id: str | None = None,
to_branch: str | None = None,
to_tag: str | None = None,
to_snapshot: str | None = None,
to_snapshot_id: str | None = None,
) -> Diff:
"""
Compute an overview of the operations executed from version `from` to version `to`.
Expand All @@ -421,18 +421,18 @@ def diff(
return self._repository.diff(
from_branch=from_branch,
from_tag=from_tag,
from_snapshot=from_snapshot,
from_snapshot_id=from_snapshot_id,
to_branch=to_branch,
to_tag=to_tag,
to_snapshot=to_snapshot,
to_snapshot_id=to_snapshot_id,
)

def readonly_session(
self,
branch: str | None = None,
*,
tag: str | None = None,
snapshot: str | None = None,
snapshot_id: str | None = None,
) -> Session:
"""
Create a read-only session.
Expand All @@ -447,7 +447,7 @@ def readonly_session(
If provided, the branch to create the session on.
tag : str, optional
If provided, the tag to create the session on.
snapshot : str, optional
snapshot_id : str, optional
If provided, the snapshot ID to create the session on.
Returns
Expand All @@ -460,7 +460,7 @@ def readonly_session(
Only one of the arguments can be specified.
"""
return Session(
self._repository.readonly_session(branch=branch, tag=tag, snapshot=snapshot)
self._repository.readonly_session(branch=branch, tag=tag, snapshot_id=snapshot_id)
)

def writable_session(self, branch: str) -> Session:
Expand Down
22 changes: 11 additions & 11 deletions icechunk-python/src/repository.rs
Original file line number Diff line number Diff line change
Expand Up @@ -506,18 +506,18 @@ impl PyRepository {
}

/// Returns an object that is both a sync and an async iterator
#[pyo3(signature = (*, branch = None, tag = None, snapshot = None))]
#[pyo3(signature = (*, branch = None, tag = None, snapshot_id = None))]
pub fn async_ancestry(
&self,
py: Python<'_>,
branch: Option<String>,
tag: Option<String>,
snapshot: Option<String>,
snapshot_id: Option<String>,
) -> PyResult<PyAsyncGenerator> {
let repo = Arc::clone(&self.0);
// This function calls block_on, so we need to allow other thread python to make progress
py.allow_threads(move || {
let version = args_to_version_info(branch, tag, snapshot)?;
let version = args_to_version_info(branch, tag, snapshot_id)?;
let ancestry = pyo3_async_runtimes::tokio::get_runtime()
.block_on(async move { repo.ancestry_arc(&version).await })
.map_err(PyIcechunkStoreError::RepositoryError)?
Expand Down Expand Up @@ -689,20 +689,20 @@ impl PyRepository {
})
}

#[pyo3(signature = (*, from_branch=None, from_tag=None, from_snapshot=None, to_branch=None, to_tag=None, to_snapshot=None))]
#[pyo3(signature = (*, from_branch=None, from_tag=None, from_snapshot_id=None, to_branch=None, to_tag=None, to_snapshot_id=None))]
#[allow(clippy::too_many_arguments)]
pub fn diff(
&self,
py: Python<'_>,
from_branch: Option<String>,
from_tag: Option<String>,
from_snapshot: Option<String>,
from_snapshot_id: Option<String>,
to_branch: Option<String>,
to_tag: Option<String>,
to_snapshot: Option<String>,
to_snapshot_id: Option<String>,
) -> PyResult<PyDiff> {
let from = args_to_version_info(from_branch, from_tag, from_snapshot)?;
let to = args_to_version_info(to_branch, to_tag, to_snapshot)?;
let from = args_to_version_info(from_branch, from_tag, from_snapshot_id)?;
let to = args_to_version_info(to_branch, to_tag, to_snapshot_id)?;

// This function calls block_on, so we need to allow other thread python to make progress
py.allow_threads(move || {
Expand All @@ -717,17 +717,17 @@ impl PyRepository {
})
}

#[pyo3(signature = (*, branch = None, tag = None, snapshot = None))]
#[pyo3(signature = (*, branch = None, tag = None, snapshot_id = None))]
pub fn readonly_session(
&self,
py: Python<'_>,
branch: Option<String>,
tag: Option<String>,
snapshot: Option<String>,
snapshot_id: Option<String>,
) -> PyResult<PySession> {
// This function calls block_on, so we need to allow other thread python to make progress
py.allow_threads(move || {
let version = args_to_version_info(branch, tag, snapshot)?;
let version = args_to_version_info(branch, tag, snapshot_id)?;
let session =
pyo3_async_runtimes::tokio::get_runtime().block_on(async move {
self.0
Expand Down
4 changes: 2 additions & 2 deletions icechunk-python/tests/test_can_read_old.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ async def test_icechunk_can_read_old_repo() -> None:
"Repository initialized",
]
assert [
p.message for p in repo.ancestry(snapshot=main_snapshot)
p.message for p in repo.ancestry(snapshot_id=main_snapshot)
] == expected_main_history

expected_branch_history = [
Expand Down Expand Up @@ -256,7 +256,7 @@ async def test_icechunk_can_read_old_repo() -> None:
assert_array_equal(big_chunks[:], 42.0)

parents = list(repo.ancestry(branch="main"))
diff = repo.diff(to_branch="main", from_snapshot=parents[-2].id)
diff = repo.diff(to_branch="main", from_snapshot_id=parents[-2].id)
assert diff.new_groups == set()
assert diff.new_arrays == set()
assert set(diff.updated_chunks.keys()) == {
Expand Down
2 changes: 1 addition & 1 deletion icechunk-python/tests/test_stateful_repo_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ def commit(self, message):
@rule(ref=commits)
def checkout_commit(self, ref):
note(f"Checking out commit {ref}")
self.session = self.repo.readonly_session(snapshot=ref)
self.session = self.repo.readonly_session(snapshot_id=ref)
assert self.session.read_only
self.model.checkout_commit(ref)

Expand Down
14 changes: 7 additions & 7 deletions icechunk-python/tests/test_timetravel.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,14 +58,14 @@ def test_timetravel() -> None:

new_snapshot_id = session.commit("commit 2")

session = repo.readonly_session(snapshot=first_snapshot_id)
session = repo.readonly_session(snapshot_id=first_snapshot_id)
store = session.store
group = zarr.open_group(store=store, mode="r")
air_temp = cast(zarr.core.array.Array, group["air_temp"])
assert store.read_only
assert air_temp[200, 6] == 42

session = repo.readonly_session(snapshot=new_snapshot_id)
session = repo.readonly_session(snapshot_id=new_snapshot_id)
store = session.store
group = zarr.open_group(store=store, mode="r")
air_temp = cast(zarr.core.array.Array, group["air_temp"])
Expand Down Expand Up @@ -116,7 +116,7 @@ def test_timetravel() -> None:
air_temp = cast(zarr.core.array.Array, group["air_temp"])
assert air_temp[200, 6] == 90

parents = list(repo.ancestry(snapshot=feature_snapshot_id))
parents = list(repo.ancestry(snapshot_id=feature_snapshot_id))
assert [snap.message for snap in parents] == [
"commit 3",
"commit 2",
Expand All @@ -128,7 +128,7 @@ def test_timetravel() -> None:
assert list(repo.ancestry(tag="v1.0")) == parents
assert list(repo.ancestry(branch="feature-not-dead")) == parents

diff = repo.diff(to_tag="v1.0", from_snapshot=parents[-1].id)
diff = repo.diff(to_tag="v1.0", from_snapshot_id=parents[-1].id)
assert diff.new_groups == {"/"}
assert diff.new_arrays == {"/air_temp"}
assert list(diff.updated_chunks.keys()) == ["/air_temp"]
Expand Down Expand Up @@ -185,11 +185,11 @@ def test_timetravel() -> None:

with pytest.raises(ValueError, match="doesn't include"):
# if we call diff in the wrong order it fails with a message
repo.diff(from_tag="v1.0", to_snapshot=parents[-1].id)
repo.diff(from_tag="v1.0", to_snapshot_id=parents[-1].id)

# check async ancestry works
assert list(repo.ancestry(snapshot=feature_snapshot_id)) == asyncio.run(
async_ancestry(repo, snapshot=feature_snapshot_id)
assert list(repo.ancestry(snapshot_id=feature_snapshot_id)) == asyncio.run(
async_ancestry(repo, snapshot_id=feature_snapshot_id)
)
assert list(repo.ancestry(tag="v1.0")) == asyncio.run(
async_ancestry(repo, tag="v1.0")
Expand Down

0 comments on commit 30edf17

Please sign in to comment.