Skip to content

Commit

Permalink
fix: Remove date partition column field from datasources that don't s… (
Browse files Browse the repository at this point in the history
#2478)

* fix: Remove date partition column field from datasources that don't support them

Signed-off-by: Achal Shah <achals@gmail.com>

* remove for file sources as well

Signed-off-by: Achal Shah <achals@gmail.com>

* remove other references

Signed-off-by: Achal Shah <achals@gmail.com>

* remove other references

Signed-off-by: Achal Shah <achals@gmail.com>

* reference to removal

Signed-off-by: Achal Shah <achals@gmail.com>

* reference to removal

Signed-off-by: Achal Shah <achals@gmail.com>

* reorder

Signed-off-by: Achal Shah <achals@gmail.com>

* remove more:

Signed-off-by: Achal Shah <achals@gmail.com>

* docs

Signed-off-by: Achal Shah <achals@gmail.com>

* comment

Signed-off-by: Achal Shah <achals@gmail.com>

* cr

Signed-off-by: Achal Shah <achals@gmail.com>
  • Loading branch information
achals authored Apr 4, 2022
1 parent 495b435 commit ce35835
Show file tree
Hide file tree
Showing 11 changed files with 68 additions and 48 deletions.
22 changes: 14 additions & 8 deletions sdk/python/feast/infra/offline_stores/bigquery_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def __init__(
table_ref: Optional[str] = None,
created_timestamp_column: Optional[str] = "",
field_mapping: Optional[Dict[str, str]] = None,
date_partition_column: Optional[str] = "",
date_partition_column: Optional[str] = None,
query: Optional[str] = None,
name: Optional[str] = None,
description: Optional[str] = "",
Expand All @@ -37,7 +37,7 @@ def __init__(
created_timestamp_column (optional): Timestamp column when row was created, used for deduplicating rows.
field_mapping: A dictionary mapping of column names in this data source to feature names in a feature table
or view. Only used for feature columns, not entities or timestamp columns.
date_partition_column (optional): Timestamp column used for partitioning.
date_partition_column (deprecated): Timestamp column used for partitioning.
query (optional): SQL query to execute to generate data for this data source.
name (optional): Name for the source. Defaults to the table_ref if not specified.
description (optional): A human-readable description.
Expand All @@ -61,6 +61,15 @@ def __init__(
table = table_ref
self.bigquery_options = BigQueryOptions(table_ref=table, query=query)

if date_partition_column:
warnings.warn(
(
"The argument 'date_partition_column' is not supported for BigQuery sources. "
"It will be removed in Feast 0.21+"
),
DeprecationWarning,
)

# If no name, use the table_ref as the default name
_name = name
if not _name:
Expand All @@ -78,10 +87,9 @@ def __init__(

super().__init__(
_name if _name else "",
event_timestamp_column,
created_timestamp_column,
field_mapping,
date_partition_column,
event_timestamp_column=event_timestamp_column,
created_timestamp_column=created_timestamp_column,
field_mapping=field_mapping,
description=description,
tags=tags,
owner=owner,
Expand Down Expand Up @@ -128,7 +136,6 @@ def from_proto(data_source: DataSourceProto):
table_ref=data_source.bigquery_options.table_ref,
event_timestamp_column=data_source.event_timestamp_column,
created_timestamp_column=data_source.created_timestamp_column,
date_partition_column=data_source.date_partition_column,
query=data_source.bigquery_options.query,
description=data_source.description,
tags=dict(data_source.tags),
Expand All @@ -148,7 +155,6 @@ def to_proto(self) -> DataSourceProto:

data_source_proto.event_timestamp_column = self.event_timestamp_column
data_source_proto.created_timestamp_column = self.created_timestamp_column
data_source_proto.date_partition_column = self.date_partition_column

return data_source_proto

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,21 @@ def __init__(
_name = table
else:
raise DataSourceNoNameException()

if date_partition_column:
warnings.warn(
(
"The argument 'date_partition_column' is not supported for Spark sources."
"It will be removed in Feast 0.21+"
),
DeprecationWarning,
)

super().__init__(
_name,
event_timestamp_column,
created_timestamp_column,
field_mapping,
date_partition_column,
name=_name,
event_timestamp_column=event_timestamp_column,
created_timestamp_column=created_timestamp_column,
field_mapping=field_mapping,
description=description,
tags=tags,
owner=owner,
Expand Down Expand Up @@ -130,7 +139,6 @@ def from_proto(data_source: DataSourceProto) -> Any:
file_format=spark_options.file_format,
event_timestamp_column=data_source.event_timestamp_column,
created_timestamp_column=data_source.created_timestamp_column,
date_partition_column=data_source.date_partition_column,
description=data_source.description,
tags=dict(data_source.tags),
owner=data_source.owner,
Expand All @@ -149,7 +157,6 @@ def to_proto(self) -> DataSourceProto:

data_source_proto.event_timestamp_column = self.event_timestamp_column
data_source_proto.created_timestamp_column = self.created_timestamp_column
data_source_proto.date_partition_column = self.date_partition_column

return data_source_proto

Expand Down
20 changes: 13 additions & 7 deletions sdk/python/feast/infra/offline_stores/file_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,12 +66,20 @@ def __init__(
s3_endpoint_override=s3_endpoint_override,
)

if date_partition_column:
warnings.warn(
(
"The argument 'date_partition_column' is not supported for File sources."
"It will be removed in Feast 0.21+"
),
DeprecationWarning,
)

super().__init__(
name if name else path,
event_timestamp_column,
created_timestamp_column,
field_mapping,
date_partition_column,
name=name if name else path,
event_timestamp_column=event_timestamp_column,
created_timestamp_column=created_timestamp_column,
field_mapping=field_mapping,
description=description,
tags=tags,
owner=owner,
Expand Down Expand Up @@ -114,7 +122,6 @@ def from_proto(data_source: DataSourceProto):
path=data_source.file_options.uri,
event_timestamp_column=data_source.event_timestamp_column,
created_timestamp_column=data_source.created_timestamp_column,
date_partition_column=data_source.date_partition_column,
s3_endpoint_override=data_source.file_options.s3_endpoint_override,
description=data_source.description,
tags=dict(data_source.tags),
Expand All @@ -134,7 +141,6 @@ def to_proto(self) -> DataSourceProto:

data_source_proto.event_timestamp_column = self.event_timestamp_column
data_source_proto.created_timestamp_column = self.created_timestamp_column
data_source_proto.date_partition_column = self.date_partition_column

return data_source_proto

Expand Down
21 changes: 13 additions & 8 deletions sdk/python/feast/infra/offline_stores/redshift_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def __init__(
schema: Optional[str] = None,
created_timestamp_column: Optional[str] = "",
field_mapping: Optional[Dict[str, str]] = None,
date_partition_column: Optional[str] = "",
date_partition_column: Optional[str] = None,
query: Optional[str] = None,
name: Optional[str] = None,
description: Optional[str] = "",
Expand All @@ -41,7 +41,7 @@ def __init__(
row was created, used for deduplicating rows.
field_mapping (optional): A dictionary mapping of column names in this data
source to column names in a feature table or view.
date_partition_column (optional): Timestamp column used for partitioning.
date_partition_column (deprecated): Timestamp column used for partitioning.
query (optional): The query to be executed to obtain the features.
name (optional): Name for the source. Defaults to the table_ref if not specified.
description (optional): A human-readable description.
Expand Down Expand Up @@ -70,13 +70,20 @@ def __init__(
),
DeprecationWarning,
)
if date_partition_column:
warnings.warn(
(
"The argument 'date_partition_column' is not supported for Redshift sources."
"It will be removed in Feast 0.21+"
),
DeprecationWarning,
)

super().__init__(
_name if _name else "",
event_timestamp_column,
created_timestamp_column,
field_mapping,
date_partition_column,
event_timestamp_column=event_timestamp_column,
created_timestamp_column=created_timestamp_column,
field_mapping=field_mapping,
description=description,
tags=tags,
owner=owner,
Expand All @@ -99,7 +106,6 @@ def from_proto(data_source: DataSourceProto):
schema=data_source.redshift_options.schema,
event_timestamp_column=data_source.event_timestamp_column,
created_timestamp_column=data_source.created_timestamp_column,
date_partition_column=data_source.date_partition_column,
query=data_source.redshift_options.query,
description=data_source.description,
tags=dict(data_source.tags),
Expand Down Expand Up @@ -169,7 +175,6 @@ def to_proto(self) -> DataSourceProto:

data_source_proto.event_timestamp_column = self.event_timestamp_column
data_source_proto.created_timestamp_column = self.created_timestamp_column
data_source_proto.date_partition_column = self.date_partition_column

return data_source_proto

Expand Down
22 changes: 14 additions & 8 deletions sdk/python/feast/infra/offline_stores/snowflake_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ def __init__(
table: Optional[str] = None,
query: Optional[str] = None,
event_timestamp_column: Optional[str] = "",
date_partition_column: Optional[str] = None,
created_timestamp_column: Optional[str] = "",
field_mapping: Optional[Dict[str, str]] = None,
date_partition_column: Optional[str] = "",
name: Optional[str] = None,
description: Optional[str] = "",
tags: Optional[Dict[str, str]] = None,
Expand All @@ -42,7 +42,7 @@ def __init__(
row was created, used for deduplicating rows.
field_mapping (optional): A dictionary mapping of column names in this data
source to column names in a feature table or view.
date_partition_column (optional): Timestamp column used for partitioning.
date_partition_column (deprecated): Timestamp column used for partitioning.
name (optional): Name for the source. Defaults to the table if not specified.
description (optional): A human-readable description.
tags (optional): A dictionary of key-value pairs to store arbitrary metadata.
Expand Down Expand Up @@ -72,12 +72,20 @@ def __init__(
DeprecationWarning,
)

if date_partition_column:
warnings.warn(
(
"The argument 'date_partition_column' is not supported for Snowflake sources."
"It will be removed in Feast 0.21+"
),
DeprecationWarning,
)

super().__init__(
_name if _name else "",
event_timestamp_column,
created_timestamp_column,
field_mapping,
date_partition_column,
event_timestamp_column=event_timestamp_column,
created_timestamp_column=created_timestamp_column,
field_mapping=field_mapping,
description=description,
tags=tags,
owner=owner,
Expand All @@ -101,7 +109,6 @@ def from_proto(data_source: DataSourceProto):
table=data_source.snowflake_options.table,
event_timestamp_column=data_source.event_timestamp_column,
created_timestamp_column=data_source.created_timestamp_column,
date_partition_column=data_source.date_partition_column,
query=data_source.snowflake_options.query,
description=data_source.description,
tags=dict(data_source.tags),
Expand Down Expand Up @@ -170,7 +177,6 @@ def to_proto(self) -> DataSourceProto:

data_source_proto.event_timestamp_column = self.event_timestamp_column
data_source_proto.created_timestamp_column = self.created_timestamp_column
data_source_proto.date_partition_column = self.date_partition_column

return data_source_proto

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@ def create_data_source(
table_ref=destination_name,
event_timestamp_column=event_timestamp_column,
created_timestamp_column=created_timestamp_column,
date_partition_column="",
field_mapping=field_mapping or {"ts_1": "ts"},
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ def create_data_source(
path=f"{f.name}",
event_timestamp_column=event_timestamp_column,
created_timestamp_column=created_timestamp_column,
date_partition_column="",
field_mapping=field_mapping or {"ts_1": "ts"},
)

Expand Down Expand Up @@ -130,7 +129,6 @@ def create_data_source(
path=f"s3://{self.bucket}/{filename}",
event_timestamp_column=event_timestamp_column,
created_timestamp_column=created_timestamp_column,
date_partition_column="",
field_mapping=field_mapping or {"ts_1": "ts"},
s3_endpoint_override=f"http://{host}:{port}",
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@ def create_data_source(
table=destination_name,
event_timestamp_column=event_timestamp_column,
created_timestamp_column=created_timestamp_column,
date_partition_column="",
field_mapping=field_mapping or {"ts_1": "ts"},
database=self.offline_store_config.database,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ def create_data_source(
table=destination_name,
event_timestamp_column=event_timestamp_column,
created_timestamp_column=created_timestamp_column,
date_partition_column="",
field_mapping=field_mapping or {"ts_1": "ts"},
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,6 @@ def create_data_source(
table=destination_name,
event_timestamp_column=event_timestamp_column,
created_timestamp_column=created_timestamp_column,
date_partition_column="",
# maps certain column names to other names
field_mapping=field_mapping or {"ts_1": "ts"},
)
Expand Down
4 changes: 0 additions & 4 deletions sdk/python/tests/integration/registration/test_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,6 @@ def test_apply_feature_view_success(test_registry):
path="file://feast/*",
event_timestamp_column="ts_col",
created_timestamp_column="timestamp",
date_partition_column="date_partition_col",
)

fv1 = FeatureView(
Expand Down Expand Up @@ -243,7 +242,6 @@ def test_modify_feature_views_success(test_registry):
path="file://feast/*",
event_timestamp_column="ts_col",
created_timestamp_column="timestamp",
date_partition_column="date_partition_col",
)

request_source = RequestDataSource(
Expand Down Expand Up @@ -364,7 +362,6 @@ def test_apply_feature_view_integration(test_registry):
path="file://feast/*",
event_timestamp_column="ts_col",
created_timestamp_column="timestamp",
date_partition_column="date_partition_col",
)

fv1 = FeatureView(
Expand Down Expand Up @@ -440,7 +437,6 @@ def test_apply_data_source(test_registry: Registry):
path="file://feast/*",
event_timestamp_column="ts_col",
created_timestamp_column="timestamp",
date_partition_column="date_partition_col",
)

fv1 = FeatureView(
Expand Down

0 comments on commit ce35835

Please sign in to comment.