Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement end-user-facing Python lib wrapping ObjectStore #240

Merged
merged 19 commits into from
Oct 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ members = [
"arro3-compute",
"arro3-core",
"arro3-io",
"object-store-rs",
"pyo3-arrow",
"pyo3-object_store",
]
Expand All @@ -29,12 +30,16 @@ arrow-csv = "53"
arrow-ipc = { version = "53", features = ["lz4", "zstd"] }
arrow-schema = "53"
arrow-select = "53"
bytes = "1.7.0"
half = "2"
indexmap = "2"
numpy = "0.22"
object_store = "0.11"
parquet = "53"
pyo3 = { version = "0.22", features = ["macros", "indexmap"] }
pyo3-async-runtimes = { git = "https://github.com/PyO3/pyo3-async-runtimes", features = [
"tokio-runtime",
] }
pyo3-file = "0.9"
thiserror = "1"

Expand Down
4 changes: 2 additions & 2 deletions arro3-io/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,13 @@ arrow-buffer = { workspace = true }
arrow-csv = { workspace = true }
arrow-ipc = { workspace = true }
arrow-schema = { workspace = true }
bytes = "1.7.0"
bytes = { workspace = true }
futures = { version = "0.3.30", optional = true }
object_store = { workspace = true, optional = true }
parquet = { workspace = true }
pyo3 = { workspace = true }
pyo3-arrow = { path = "../pyo3-arrow" }
pyo3-async-runtimes = { git = "https://github.com/PyO3/pyo3-async-runtimes", features = [
pyo3-async-runtimes = { workspace = true, features = [
"tokio-runtime",
], optional = true }
pyo3-file = { workspace = true }
Expand Down
36 changes: 36 additions & 0 deletions object-store-rs/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
[package]
name = "object-store-rs"
version = { workspace = true }
authors = { workspace = true }
edition = { workspace = true }
description = "Core library for representing Arrow data in Python."
readme = "README.md"
repository = { workspace = true }
homepage = { workspace = true }
license = { workspace = true }
keywords = { workspace = true }
categories = { workspace = true }
rust-version = { workspace = true }

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[lib]
name = "_object_store_rs"
crate-type = ["cdylib"]

[dependencies]
bytes = { workspace = true }
chrono = "0.4.38"
futures = "0.3.31"
http = "1.1"
object_store = { workspace = true }
pyo3 = { workspace = true, features = ["chrono"] }
pyo3-async-runtimes = { workspace = true, features = ["tokio-runtime"] }
pyo3-file = { workspace = true }
pyo3-object_store = { path = "../pyo3-object_store" }
tokio = { version = "1.40", features = [
"macros",
"rt",
"rt-multi-thread",
"sync",
] }
url = "2"
1 change: 1 addition & 0 deletions object-store-rs/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# object-store-rs
20 changes: 20 additions & 0 deletions object-store-rs/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
[build-system]
requires = ["maturin>=1.4.0,<2.0"]
build-backend = "maturin"

[project]
name = "object-store-rs"
requires-python = ">=3.9"
dependencies = []
classifiers = [
"Programming Language :: Rust",
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: PyPy",
]
dynamic = ["version"]

[tool.maturin]
features = ["pyo3/extension-module"]
module-name = "object_store_rs._object_store_rs"
python-source = "python"
strip = true
4 changes: 4 additions & 0 deletions object-store-rs/python/object_store_rs/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from ._object_store_rs import *
from ._object_store_rs import ___version

__version__: str = ___version()
6 changes: 6 additions & 0 deletions object-store-rs/python/object_store_rs/_copy.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from ._pyo3_object_store import ObjectStore

def copy(store: ObjectStore, from_: str, to: str) -> None: ...
async def copy_async(store: ObjectStore, from_: str, to: str) -> None: ...
def copy_if_not_exists(store: ObjectStore, from_: str, to: str) -> None: ...
async def copy_if_not_exists_async(store: ObjectStore, from_: str, to: str) -> None: ...
4 changes: 4 additions & 0 deletions object-store-rs/python/object_store_rs/_delete.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from ._pyo3_object_store import ObjectStore

def delete(store: ObjectStore, location: str) -> None: ...
async def delete_async(store: ObjectStore, location: str) -> None: ...
114 changes: 114 additions & 0 deletions object-store-rs/python/object_store_rs/_get.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
from datetime import datetime
from typing import Sequence, TypedDict

from ._list import ObjectMeta
from ._pyo3_object_store import ObjectStore
from ._sign import HTTP_METHOD as HTTP_METHOD
from ._sign import SignCapableStore as SignCapableStore
from ._sign import sign_url as sign_url
from ._sign import sign_url_async as sign_url_async

class GetOptions(TypedDict):
"""Options for a get request, such as range"""

if_match: str | None
"""
Request will succeed if the `ObjectMeta::e_tag` matches
otherwise returning [`Error::Precondition`]

See <https://datatracker.ietf.org/doc/html/rfc9110#name-if-match>

Examples:

```text
If-Match: "xyzzy"
If-Match: "xyzzy", "r2d2xxxx", "c3piozzzz"
If-Match: *
```
"""

if_none_match: str | None
"""
Request will succeed if the `ObjectMeta::e_tag` does not match
otherwise returning [`Error::NotModified`]

See <https://datatracker.ietf.org/doc/html/rfc9110#section-13.1.2>

Examples:

```text
If-None-Match: "xyzzy"
If-None-Match: "xyzzy", "r2d2xxxx", "c3piozzzz"
If-None-Match: *
```
"""

if_unmodified_since: datetime | None
"""
Request will succeed if the object has been modified since

<https://datatracker.ietf.org/doc/html/rfc9110#section-13.1.3>
"""

if_modified_since: datetime | None
"""
Request will succeed if the object has not been modified since
otherwise returning [`Error::Precondition`]

Some stores, such as S3, will only return `NotModified` for exact
timestamp matches, instead of for any timestamp greater than or equal.

<https://datatracker.ietf.org/doc/html/rfc9110#section-13.1.4>
"""

# range:
"""
Request transfer of only the specified range of bytes
otherwise returning [`Error::NotModified`]

<https://datatracker.ietf.org/doc/html/rfc9110#name-range>
"""

version: str | None
"""
Request a particular object version
"""

head: bool
"""
Request transfer of no content

<https://datatracker.ietf.org/doc/html/rfc9110#name-head>
"""

class GetResult:
def bytes(self) -> bytes:
"""
Collects the data into bytes
"""

async def bytes_async(self) -> bytes:
"""
Collects the data into bytes
"""

@property
def meta(self) -> ObjectMeta:
"""The ObjectMeta for this object"""

def get(
store: ObjectStore, location: str, *, options: GetOptions | None = None
) -> GetResult: ...
async def get_async(
store: ObjectStore, location: str, *, options: GetOptions | None = None
) -> GetResult: ...
def get_range(store: ObjectStore, location: str, offset: int, length: int) -> bytes: ...
async def get_range_async(
store: ObjectStore, location: str, offset: int, length: int
) -> bytes: ...
def get_ranges(
store: ObjectStore, location: str, offset: Sequence[int], length: Sequence[int]
) -> bytes: ...
async def get_ranges_async(
store: ObjectStore, location: str, offset: Sequence[int], length: Sequence[int]
) -> bytes: ...
5 changes: 5 additions & 0 deletions object-store-rs/python/object_store_rs/_head.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from ._list import ObjectMeta
from ._pyo3_object_store import ObjectStore

def head(store: ObjectStore, location: str) -> ObjectMeta: ...
async def head_async(store: ObjectStore, location: str) -> ObjectMeta: ...
45 changes: 45 additions & 0 deletions object-store-rs/python/object_store_rs/_list.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from datetime import datetime
from typing import List, TypedDict

from ._pyo3_object_store import ObjectStore
from ._sign import HTTP_METHOD as HTTP_METHOD
from ._sign import SignCapableStore as SignCapableStore
from ._sign import sign_url as sign_url
from ._sign import sign_url_async as sign_url_async

class ObjectMeta(TypedDict):
location: str
"""The full path to the object"""

last_modified: datetime
"""The last modified time"""

size: int
"""The size in bytes of the object"""

e_tag: str | None
"""The unique identifier for the object

<https://datatracker.ietf.org/doc/html/rfc9110#name-etag>
"""

version: str | None
"""A version indicator for this object"""

class ListResult(TypedDict):
common_prefixes: List[str]
"""Prefixes that are common (like directories)"""

objects: List[ObjectMeta]
"""Object metadata for the listing"""

def list(store: ObjectStore, prefix: str | None = None) -> List[ObjectMeta]: ...
async def list_async(
store: ObjectStore, prefix: str | None = None
) -> List[ObjectMeta]: ...
def list_with_delimiter(
store: ObjectStore, prefix: str | None = None
) -> ListResult: ...
async def list_with_delimiter_async(
store: ObjectStore, prefix: str | None = None
) -> ListResult: ...
32 changes: 32 additions & 0 deletions object-store-rs/python/object_store_rs/_object_store_rs.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from ._copy import copy as copy
from ._copy import copy_async as copy_async
from ._copy import copy_if_not_exists as copy_if_not_exists
from ._copy import copy_if_not_exists_async as copy_if_not_exists_async
from ._delete import delete as delete
from ._delete import delete_async as delete_async
from ._get import GetOptions as GetOptions
from ._get import GetResult as GetResult
from ._get import get as get
from ._get import get_async as get_async
from ._get import get_range as get_range
from ._get import get_range_async as get_range_async
from ._get import get_ranges as get_ranges
from ._get import get_ranges_async as get_ranges_async
from ._head import head as head
from ._head import head_async as head_async
from ._list import ListResult as ListResult
from ._list import ObjectMeta as ObjectMeta
from ._list import list as list
from ._list import list_async as list_async
from ._list import list_with_delimiter as list_with_delimiter
from ._list import list_with_delimiter_async as list_with_delimiter_async
from ._put import put_file as put_file
from ._put import put_file_async as put_file_async
from ._rename import rename as rename
from ._rename import rename_async as rename_async
from ._rename import rename_if_not_exists as rename_if_not_exists
from ._rename import rename_if_not_exists_async as rename_if_not_exists_async
from ._sign import HTTP_METHOD as HTTP_METHOD
from ._sign import SignCapableStore as SignCapableStore
from ._sign import sign_url as sign_url
from ._sign import sign_url_async as sign_url_async
21 changes: 21 additions & 0 deletions object-store-rs/python/object_store_rs/_put.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from pathlib import Path
from typing import IO

from ._pyo3_object_store import ObjectStore

def put_file(
store: ObjectStore,
location: str,
file: IO[bytes] | Path | bytes,
*,
chunk_size: int = 5 * 1024,
max_concurrency: int = 12,
) -> None: ...
async def put_file_async(
store: ObjectStore,
location: str,
file: IO[bytes] | Path | bytes,
*,
chunk_size: int = 5 * 1024,
max_concurrency: int = 12,
) -> None: ...
Loading