From f604af9ebf56ebd88b4e6ef541fdc20de2cc5b8c Mon Sep 17 00:00:00 2001 From: Jeremy Ary Date: Wed, 6 Mar 2024 08:20:27 -0600 Subject: [PATCH 1/7] fix: Swap security label check on the PR title validation job to explicit permissions instead (#3987) revert security label check for PR title validation & add explicit read-only permission instead Signed-off-by: Jeremy Ary --- .github/workflows/lint_pr.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/lint_pr.yml b/.github/workflows/lint_pr.yml index 12f7182ce8e..d1aa7d16a3e 100644 --- a/.github/workflows/lint_pr.yml +++ b/.github/workflows/lint_pr.yml @@ -7,12 +7,13 @@ on: - edited - synchronize +permissions: + # read-only perms specified due to use of pull_request_target in lieu of security label check + pull-requests: read + jobs: validate-title: - # when using pull_request_target, all jobs MUST have this if check for 'ok-to-test' or 'approved' for security purposes. if: - ((github.event.action == 'labeled' && (github.event.label.name == 'approved' || github.event.label.name == 'lgtm' || github.event.label.name == 'ok-to-test')) || - (github.event.action != 'labeled' && (contains(github.event.pull_request.labels.*.name, 'ok-to-test') || contains(github.event.pull_request.labels.*.name, 'approved') || contains(github.event.pull_request.labels.*.name, 'lgtm')))) && github.repository == 'feast-dev/feast' name: Validate PR title runs-on: ubuntu-latest From 43b2c287705c2a3e882517524229f155c9ce0a01 Mon Sep 17 00:00:00 2001 From: locnt241 <73770977+ElliotNguyen68@users.noreply.github.com> Date: Thu, 7 Mar 2024 00:20:46 +0700 Subject: [PATCH 2/7] feat: Add Entity df in format of a Spark Dataframe instead of just pd.DataFrame or string for SparkOfflineStore (#3988) * remove unused parameter when init sparksource Signed-off-by: tanlocnguyen * feat: add entity df to SparkOfflineStore when get_historical_features Signed-off-by: tanlocnguyen * fix: lint error Signed-off-by: tanlocnguyen --------- Signed-off-by: tanlocnguyen Co-authored-by: tanlocnguyen --- .../contrib/spark_offline_store/spark.py | 27 ++++++++++++------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark.py b/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark.py index c9591b7c3f0..b1b1c04c7d7 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark.py +++ b/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark.py @@ -125,7 +125,7 @@ def get_historical_features( config: RepoConfig, feature_views: List[FeatureView], feature_refs: List[str], - entity_df: Union[pandas.DataFrame, str], + entity_df: Union[pandas.DataFrame, str, pyspark.sql.DataFrame], registry: Registry, project: str, full_feature_names: bool = False, @@ -473,15 +473,16 @@ def _get_entity_df_event_timestamp_range( entity_df_event_timestamp.min().to_pydatetime(), entity_df_event_timestamp.max().to_pydatetime(), ) - elif isinstance(entity_df, str): + elif isinstance(entity_df, str) or isinstance(entity_df, pyspark.sql.DataFrame): # If the entity_df is a string (SQL query), determine range # from table - df = spark_session.sql(entity_df).select(entity_df_event_timestamp_col) - - # Checks if executing entity sql resulted in any data - if df.rdd.isEmpty(): - raise EntitySQLEmptyResults(entity_df) - + if isinstance(entity_df, str): + df = spark_session.sql(entity_df).select(entity_df_event_timestamp_col) + # Checks if executing entity sql resulted in any data + if df.rdd.isEmpty(): + raise EntitySQLEmptyResults(entity_df) + else: + df = entity_df # TODO(kzhang132): need utc conversion here. entity_df_event_timestamp_range = ( @@ -499,8 +500,11 @@ def _get_entity_schema( ) -> Dict[str, np.dtype]: if isinstance(entity_df, pd.DataFrame): return dict(zip(entity_df.columns, entity_df.dtypes)) - elif isinstance(entity_df, str): - entity_spark_df = spark_session.sql(entity_df) + elif isinstance(entity_df, str) or isinstance(entity_df, pyspark.sql.DataFrame): + if isinstance(entity_df, str): + entity_spark_df = spark_session.sql(entity_df) + else: + entity_spark_df = entity_df return dict( zip( entity_spark_df.columns, @@ -526,6 +530,9 @@ def _upload_entity_df( elif isinstance(entity_df, str): spark_session.sql(entity_df).createOrReplaceTempView(table_name) return + elif isinstance(entity_df, pyspark.sql.DataFrame): + entity_df.createOrReplaceTempView(table_name) + return else: raise InvalidEntityType(type(entity_df)) From 60f24f9ed16a216acb0f3642892dea73690ca29f Mon Sep 17 00:00:00 2001 From: Francisco Javier Arceo Date: Wed, 6 Mar 2024 13:28:54 -0500 Subject: [PATCH 3/7] feat: Dropping unit tests for Python 3.8 (#3989) feat: dropping unit tests for Python 3.8 Update unit_tests.yml to no longer run for Python 3.8 Signed-off-by: franciscojavierarceo --- .github/workflows/unit_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index 31e6d08c743..7e2e3b577af 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -7,7 +7,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [ "3.8", "3.9", "3.10" ] + python-version: [ "3.9", "3.10" ] os: [ ubuntu-latest, macOS-latest ] exclude: - os: macOS-latest From f93c5fd4b8bd0031942c4f6ba4e84ebc54be8522 Mon Sep 17 00:00:00 2001 From: cburroughs Date: Wed, 6 Mar 2024 16:45:39 -0500 Subject: [PATCH 4/7] fix: Move gRPC dependencies to an extra (#3900) --- .../docker-compose/feast10/entrypoint.sh | 4 ++-- setup.py | 15 ++++++++++----- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/java/serving/src/test/resources/docker-compose/feast10/entrypoint.sh b/java/serving/src/test/resources/docker-compose/feast10/entrypoint.sh index d7dcd03c5fb..0690b734c38 100755 --- a/java/serving/src/test/resources/docker-compose/feast10/entrypoint.sh +++ b/java/serving/src/test/resources/docker-compose/feast10/entrypoint.sh @@ -4,8 +4,8 @@ set -e # feast root directory is expected to be mounted (eg, by docker compose) cd /mnt/feast -pip install -e '.[redis]' +pip install -e '.[grpcio,redis]' cd /app python materialize.py -feast serve_transformations --port 8080 \ No newline at end of file +feast serve_transformations --port 8080 diff --git a/setup.py b/setup.py index 6d59fa0aa51..b601c901462 100644 --- a/setup.py +++ b/setup.py @@ -44,10 +44,6 @@ "click>=7.0.0,<9.0.0", "colorama>=0.3.9,<1", "dill~=0.3.0", - "grpcio>=1.56.2,<2", - "grpcio-tools>=1.56.2,<2", - "grpcio-reflection>=1.56.2,<2", - "grpcio-health-checking>=1.56.2,<2", "mypy-protobuf==3.1", "Jinja2>=2,<4", "jsonschema", @@ -143,7 +139,14 @@ IBIS_REQUIRED = [ "ibis-framework", - "ibis-substrait" + "ibis-substrait", +] + +GRPCIO_REQUIRED = [ + "grpcio>=1.56.2,<2", + "grpcio-tools>=1.56.2,<2", + "grpcio-reflection>=1.56.2,<2", + "grpcio-health-checking>=1.56.2,<2", ] DUCKDB_REQUIRED = [ @@ -209,6 +212,7 @@ + ROCKSET_REQUIRED + HAZELCAST_REQUIRED + IBIS_REQUIRED + + GRPCIO_REQUIRED ) @@ -375,6 +379,7 @@ def run(self): "docs": DOCS_REQUIRED, "cassandra": CASSANDRA_REQUIRED, "hazelcast": HAZELCAST_REQUIRED, + "grpcio": GRPCIO_REQUIRED, "rockset": ROCKSET_REQUIRED, "ibis": IBIS_REQUIRED, "duckdb": DUCKDB_REQUIRED From 0e036f86738cb7085630817394ba9e6c8cfbf8c9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 6 Mar 2024 21:08:51 -0500 Subject: [PATCH 5/7] chore: Bump ip from 1.1.5 to 1.1.9 in /ui (#3959) --- ui/yarn.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ui/yarn.lock b/ui/yarn.lock index becb6bbd7ba..5c9e5c17ace 100644 --- a/ui/yarn.lock +++ b/ui/yarn.lock @@ -6461,9 +6461,9 @@ invariant@^2.2.4: loose-envify "^1.0.0" ip@^1.1.0: - version "1.1.5" - resolved "https://registry.yarnpkg.com/ip/-/ip-1.1.5.tgz#bdded70114290828c0a039e72ef25f5aaec4354a" - integrity sha1-vd7XARQpCCjAoDnnLvJfWq7ENUo= + version "1.1.9" + resolved "https://registry.yarnpkg.com/ip/-/ip-1.1.9.tgz#8dfbcc99a754d07f425310b86a99546b1151e396" + integrity sha512-cyRxvOEpNHNtchU3Ln9KC/auJgup87llfQpQ+t5ghoC/UhL16SWzbueiCsdTnWmqAWl7LadfuwhlqmtOaqMHdQ== ipaddr.js@1.9.1: version "1.9.1" From 158a240b36593cc2fd5b60bf1d3bb0bc1b847b93 Mon Sep 17 00:00:00 2001 From: locnt241 <73770977+ElliotNguyen68@users.noreply.github.com> Date: Thu, 7 Mar 2024 19:43:32 +0700 Subject: [PATCH 6/7] chore: Update readme spark.md (#3992) update readme spark.md Signed-off-by: tanlocnguyen Co-authored-by: tanlocnguyen --- docs/reference/offline-stores/spark.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/offline-stores/spark.md b/docs/reference/offline-stores/spark.md index ae5ea78071e..3cca2aab1af 100644 --- a/docs/reference/offline-stores/spark.md +++ b/docs/reference/offline-stores/spark.md @@ -4,7 +4,7 @@ The Spark offline store provides support for reading [SparkSources](../data-sources/spark.md). -* Entity dataframes can be provided as a SQL query or can be provided as a Pandas dataframe. A Pandas dataframes will be converted to a Spark dataframe and processed as a temporary view. +* Entity dataframes can be provided as a SQL query, Pandas dataframe or can be provided as a Pyspark dataframe. A Pandas dataframes will be converted to a Spark dataframe and processed as a temporary view. ## Disclaimer From 817995c12588cc35c53d1ad487efaaf53da287be Mon Sep 17 00:00:00 2001 From: Francisco Javier Arceo Date: Thu, 7 Mar 2024 07:53:20 -0500 Subject: [PATCH 7/7] feat: Dropping Python 3.8 from local integration tests and integration tests (#3994) --- .github/workflows/pr_integration_tests.yml | 4 ++-- .github/workflows/pr_local_integration_tests.yml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/pr_integration_tests.yml b/.github/workflows/pr_integration_tests.yml index ba4169c2921..2b0e6a1056b 100644 --- a/.github/workflows/pr_integration_tests.yml +++ b/.github/workflows/pr_integration_tests.yml @@ -86,7 +86,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [ "3.8", "3.10" ] + python-version: [ "3.10" ] os: [ ubuntu-latest ] env: OS: ${{ matrix.os }} @@ -167,4 +167,4 @@ jobs: SNOWFLAKE_CI_PASSWORD: ${{ secrets.SNOWFLAKE_CI_PASSWORD }} SNOWFLAKE_CI_ROLE: ${{ secrets.SNOWFLAKE_CI_ROLE }} SNOWFLAKE_CI_WAREHOUSE: ${{ secrets.SNOWFLAKE_CI_WAREHOUSE }} - run: pytest -n 8 --cov=./ --cov-report=xml --color=yes sdk/python/tests --integration --durations=5 --timeout=1200 --timeout_method=thread \ No newline at end of file + run: pytest -n 8 --cov=./ --cov-report=xml --color=yes sdk/python/tests --integration --durations=5 --timeout=1200 --timeout_method=thread diff --git a/.github/workflows/pr_local_integration_tests.yml b/.github/workflows/pr_local_integration_tests.yml index 668bcb5e506..17ff54b1f84 100644 --- a/.github/workflows/pr_local_integration_tests.yml +++ b/.github/workflows/pr_local_integration_tests.yml @@ -19,7 +19,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [ "3.8", "3.10" ] + python-version: [ "3.10" ] os: [ ubuntu-latest ] env: OS: ${{ matrix.os }}