-
Notifications
You must be signed in to change notification settings - Fork 1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add s3 support (with custom endpoints) #1789
Changes from 6 commits
565332c
0fc36ef
645ebaa
d872a19
a97802b
50468db
eda2764
5986998
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,6 +2,9 @@ | |
from typing import Any, Dict | ||
|
||
import pandas as pd | ||
from minio import Minio | ||
from testcontainers.core.generic import DockerContainer | ||
from testcontainers.core.waiting_utils import wait_for_logs | ||
|
||
from feast import FileSource | ||
from feast.data_format import ParquetFormat | ||
|
@@ -19,7 +22,7 @@ class FileDataSourceCreator(DataSourceCreator): | |
def __init__(self, _: str): | ||
pass | ||
|
||
def create_data_sources( | ||
def create_data_source( | ||
self, | ||
destination: str, | ||
df: pd.DataFrame, | ||
|
@@ -46,3 +49,79 @@ def create_offline_store_config(self) -> FeastConfigBaseModel: | |
|
||
def teardown(self): | ||
self.f.close() | ||
|
||
|
||
class S3FileDataSourceCreator(DataSourceCreator): | ||
f: Any | ||
minio: DockerContainer | ||
woop marked this conversation as resolved.
Show resolved
Hide resolved
|
||
bucket = "feast-test" | ||
access_key = "AKIAIOSFODNN7EXAMPLE" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can't really get around hardcoding There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agreed yeah I'll make a task for that |
||
secret = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY" | ||
minio_image = "minio/minio:RELEASE.2021-08-17T20-53-08Z" | ||
|
||
def __init__(self, _: str): | ||
self._setup_minio() | ||
|
||
def _setup_minio(self): | ||
self.minio = DockerContainer(self.minio_image) | ||
self.minio.with_exposed_ports(9000).with_exposed_ports(9001).with_env( | ||
"MINIO_ROOT_USER", self.access_key | ||
).with_env("MINIO_ROOT_PASSWORD", self.secret).with_command( | ||
'server /data --console-address ":9001"' | ||
) | ||
self.minio.start() | ||
log_string_to_wait_for = ( | ||
"API" # The minio container will print "API: ..." when ready. | ||
) | ||
wait_for_logs(container=self.minio, predicate=log_string_to_wait_for, timeout=5) | ||
|
||
def _upload_parquet_file(self, df, file_name, minio_endpoint): | ||
self.f = tempfile.NamedTemporaryFile(suffix=".parquet", delete=False) | ||
df.to_parquet(self.f.name) | ||
|
||
client = Minio( | ||
minio_endpoint, | ||
access_key=self.access_key, | ||
secret_key=self.secret, | ||
secure=False, | ||
) | ||
if not client.bucket_exists(self.bucket): | ||
client.make_bucket(self.bucket) | ||
client.fput_object( | ||
self.bucket, file_name, self.f.name, | ||
) | ||
|
||
def create_data_source( | ||
self, | ||
destination: str, | ||
df: pd.DataFrame, | ||
event_timestamp_column="ts", | ||
created_timestamp_column="created_ts", | ||
field_mapping: Dict[str, str] = None, | ||
) -> DataSource: | ||
filename = f"{destination}.parquet" | ||
port = self.minio.get_exposed_port("9000") | ||
host = self.minio.get_container_host_ip() | ||
minio_endpoint = f"{host}:{port}" | ||
|
||
self._upload_parquet_file(df, filename, minio_endpoint) | ||
|
||
return FileSource( | ||
file_format=ParquetFormat(), | ||
path=f"s3://{self.bucket}/{filename}", | ||
event_timestamp_column=event_timestamp_column, | ||
created_timestamp_column=created_timestamp_column, | ||
date_partition_column="", | ||
field_mapping=field_mapping or {"ts_1": "ts"}, | ||
s3_endpoint_override=f"http://{host}:{port}", | ||
) | ||
|
||
def get_prefixed_table_name(self, name: str, suffix: str) -> str: | ||
return f"{suffix}" | ||
|
||
def create_offline_store_config(self) -> FeastConfigBaseModel: | ||
return FileOfflineStoreConfig() | ||
|
||
def teardown(self): | ||
self.minio.stop() | ||
self.f.close() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit, type annotations for the output of this method?