From 6ba7fc789da34f0d239d7cae1b3a95e14eae9e2f Mon Sep 17 00:00:00 2001 From: Danny Chiao Date: Sun, 6 Mar 2022 03:55:46 -0500 Subject: [PATCH] fix: Fix default feast apply path without any extras (#2373) * fix: Fix default feast apply path without any extras Signed-off-by: Danny Chiao * revert removing ge Signed-off-by: Danny Chiao --- docs/reference/data-sources/spark.md | 12 +++++++++--- sdk/python/feast/__init__.py | 4 ---- sdk/python/feast/feature_store.py | 2 -- sdk/python/feast/inference.py | 7 ++++--- 4 files changed, 13 insertions(+), 12 deletions(-) diff --git a/docs/reference/data-sources/spark.md b/docs/reference/data-sources/spark.md index 25b69c7355..d0bc495924 100644 --- a/docs/reference/data-sources/spark.md +++ b/docs/reference/data-sources/spark.md @@ -13,7 +13,9 @@ The spark data source API allows for the retrieval of historical feature values Using a table reference from SparkSession(for example, either in memory or a Hive Metastore) ```python -from feast import SparkSource +from feast.infra.offline_stores.contrib.spark_offline_store.spark_source import ( + SparkSource, +) my_spark_source = SparkSource( table="FEATURE_TABLE", @@ -23,7 +25,9 @@ my_spark_source = SparkSource( Using a query ```python -from feast import SparkSource +from feast.infra.offline_stores.contrib.spark_offline_store.spark_source import ( + SparkSource, +) my_spark_source = SparkSource( query="SELECT timestamp as ts, created, f1, f2 " @@ -34,7 +38,9 @@ my_spark_source = SparkSource( Using a file reference ```python -from feast import SparkSource +from feast.infra.offline_stores.contrib.spark_offline_store.spark_source import ( + SparkSource, +) my_spark_source = SparkSource( path=f"{CURRENT_DIR}/data/driver_hourly_stats", diff --git a/sdk/python/feast/__init__.py b/sdk/python/feast/__init__.py index 0af226aa05..83b504b0cb 100644 --- a/sdk/python/feast/__init__.py +++ b/sdk/python/feast/__init__.py @@ -3,9 +3,6 @@ from pkg_resources import DistributionNotFound, get_distribution from feast.infra.offline_stores.bigquery_source import BigQuerySource -from feast.infra.offline_stores.contrib.spark_offline_store.spark_source import ( - SparkSource, -) from feast.infra.offline_stores.file_source import FileSource from feast.infra.offline_stores.redshift_source import RedshiftSource from feast.infra.offline_stores.snowflake_source import SnowflakeSource @@ -50,5 +47,4 @@ "RedshiftSource", "RequestFeatureView", "SnowflakeSource", - "SparkSource", ] diff --git a/sdk/python/feast/feature_store.py b/sdk/python/feast/feature_store.py index 89e4df1d5f..19741bcf12 100644 --- a/sdk/python/feast/feature_store.py +++ b/sdk/python/feast/feature_store.py @@ -42,7 +42,6 @@ from feast.data_source import DataSource from feast.diff.infra_diff import InfraDiff, diff_infra_protos from feast.diff.registry_diff import RegistryDiff, apply_diff_to_registry, diff_between -from feast.dqm.profilers.ge_profiler import GEProfiler from feast.entity import Entity from feast.errors import ( EntityNotFoundException, @@ -881,7 +880,6 @@ def create_saved_dataset( storage: SavedDatasetStorage, tags: Optional[Dict[str, str]] = None, feature_service: Optional[FeatureService] = None, - profiler: Optional[GEProfiler] = None, ) -> SavedDataset: """ Execute provided retrieval job and persist its outcome in given storage. diff --git a/sdk/python/feast/inference.py b/sdk/python/feast/inference.py index d233631d3d..b3e51b4816 100644 --- a/sdk/python/feast/inference.py +++ b/sdk/python/feast/inference.py @@ -8,7 +8,6 @@ FileSource, RedshiftSource, SnowflakeSource, - SparkSource, ) from feast.data_source import DataSource, RequestDataSource from feast.errors import RegistryInferenceFailure @@ -87,8 +86,10 @@ def update_data_sources_with_inferred_event_timestamp_col( ): # prepare right match pattern for data source ts_column_type_regex_pattern = "" - if isinstance(data_source, FileSource) or isinstance( - data_source, SparkSource + # TODO(adchia): Move Spark source inference out of this logic + if ( + isinstance(data_source, FileSource) + or "SparkSource" == data_source.__class__.__name__ ): ts_column_type_regex_pattern = r"^timestamp" elif isinstance(data_source, BigQuerySource):