From 6ba7fc789da34f0d239d7cae1b3a95e14eae9e2f Mon Sep 17 00:00:00 2001
From: Danny Chiao <danny@tecton.ai>
Date: Sun, 6 Mar 2022 03:55:46 -0500
Subject: [PATCH] fix: Fix default feast apply path without any extras (#2373)

* fix: Fix default feast apply path without any extras

Signed-off-by: Danny Chiao <danny@tecton.ai>

* revert removing ge

Signed-off-by: Danny Chiao <danny@tecton.ai>
---
 docs/reference/data-sources/spark.md | 12 +++++++++---
 sdk/python/feast/__init__.py         |  4 ----
 sdk/python/feast/feature_store.py    |  2 --
 sdk/python/feast/inference.py        |  7 ++++---
 4 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/docs/reference/data-sources/spark.md b/docs/reference/data-sources/spark.md
index 25b69c7355..d0bc495924 100644
--- a/docs/reference/data-sources/spark.md
+++ b/docs/reference/data-sources/spark.md
@@ -13,7 +13,9 @@ The spark data source API allows for the retrieval of historical feature values
 Using a table reference from SparkSession(for example, either in memory or a Hive Metastore)
 
 ```python
-from feast import SparkSource
+from feast.infra.offline_stores.contrib.spark_offline_store.spark_source import (
+    SparkSource,
+)
 
 my_spark_source = SparkSource(
     table="FEATURE_TABLE",
@@ -23,7 +25,9 @@ my_spark_source = SparkSource(
 Using a query
 
 ```python
-from feast import SparkSource
+from feast.infra.offline_stores.contrib.spark_offline_store.spark_source import (
+    SparkSource,
+)
 
 my_spark_source = SparkSource(
     query="SELECT timestamp as ts, created, f1, f2 "
@@ -34,7 +38,9 @@ my_spark_source = SparkSource(
 Using a file reference
 
 ```python
-from feast import SparkSource
+from feast.infra.offline_stores.contrib.spark_offline_store.spark_source import (
+    SparkSource,
+)
 
 my_spark_source = SparkSource(
     path=f"{CURRENT_DIR}/data/driver_hourly_stats",
diff --git a/sdk/python/feast/__init__.py b/sdk/python/feast/__init__.py
index 0af226aa05..83b504b0cb 100644
--- a/sdk/python/feast/__init__.py
+++ b/sdk/python/feast/__init__.py
@@ -3,9 +3,6 @@
 from pkg_resources import DistributionNotFound, get_distribution
 
 from feast.infra.offline_stores.bigquery_source import BigQuerySource
-from feast.infra.offline_stores.contrib.spark_offline_store.spark_source import (
-    SparkSource,
-)
 from feast.infra.offline_stores.file_source import FileSource
 from feast.infra.offline_stores.redshift_source import RedshiftSource
 from feast.infra.offline_stores.snowflake_source import SnowflakeSource
@@ -50,5 +47,4 @@
     "RedshiftSource",
     "RequestFeatureView",
     "SnowflakeSource",
-    "SparkSource",
 ]
diff --git a/sdk/python/feast/feature_store.py b/sdk/python/feast/feature_store.py
index 89e4df1d5f..19741bcf12 100644
--- a/sdk/python/feast/feature_store.py
+++ b/sdk/python/feast/feature_store.py
@@ -42,7 +42,6 @@
 from feast.data_source import DataSource
 from feast.diff.infra_diff import InfraDiff, diff_infra_protos
 from feast.diff.registry_diff import RegistryDiff, apply_diff_to_registry, diff_between
-from feast.dqm.profilers.ge_profiler import GEProfiler
 from feast.entity import Entity
 from feast.errors import (
     EntityNotFoundException,
@@ -881,7 +880,6 @@ def create_saved_dataset(
         storage: SavedDatasetStorage,
         tags: Optional[Dict[str, str]] = None,
         feature_service: Optional[FeatureService] = None,
-        profiler: Optional[GEProfiler] = None,
     ) -> SavedDataset:
         """
             Execute provided retrieval job and persist its outcome in given storage.
diff --git a/sdk/python/feast/inference.py b/sdk/python/feast/inference.py
index d233631d3d..b3e51b4816 100644
--- a/sdk/python/feast/inference.py
+++ b/sdk/python/feast/inference.py
@@ -8,7 +8,6 @@
     FileSource,
     RedshiftSource,
     SnowflakeSource,
-    SparkSource,
 )
 from feast.data_source import DataSource, RequestDataSource
 from feast.errors import RegistryInferenceFailure
@@ -87,8 +86,10 @@ def update_data_sources_with_inferred_event_timestamp_col(
         ):
             # prepare right match pattern for data source
             ts_column_type_regex_pattern = ""
-            if isinstance(data_source, FileSource) or isinstance(
-                data_source, SparkSource
+            # TODO(adchia): Move Spark source inference out of this logic
+            if (
+                isinstance(data_source, FileSource)
+                or "SparkSource" == data_source.__class__.__name__
             ):
                 ts_column_type_regex_pattern = r"^timestamp"
             elif isinstance(data_source, BigQuerySource):