From be0e44e59b3e71cb11353e11f19146e0d1827432 Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Wed, 13 Sep 2023 15:51:27 +0800 Subject: [PATCH] [SPARK-45141][PYTHON][INFRA][TESTS] Pin `pyarrow==12.0.1` in CI Pin `pyarrow==12.0.1` in CI to fix test failure, https://github.com/apache/spark/actions/runs/6167186123/job/16738683632 ``` ====================================================================== FAIL [0.095s]: test_from_to_pandas (pyspark.pandas.tests.data_type_ops.test_datetime_ops.DatetimeOpsTests) ---------------------------------------------------------------------- Traceback (most recent call last): File "/__w/spark/spark/python/pyspark/testing/pandasutils.py", line 122, in _assert_pandas_equal assert_series_equal( File "/usr/local/lib/python3.9/dist-packages/pandas/_testing/asserters.py", line 931, in assert_series_equal assert_attr_equal("dtype", left, right, obj=f"Attributes of {obj}") File "/usr/local/lib/python3.9/dist-packages/pandas/_testing/asserters.py", line 415, in assert_attr_equal raise_assert_detail(obj, msg, left_attr, right_attr) File "/usr/local/lib/python3.9/dist-packages/pandas/_testing/asserters.py", line 599, in raise_assert_detail raise AssertionError(msg) AssertionError: Attributes of Series are different Attribute "dtype" are different [left]: datetime64[ns] [right]: datetime64[us] ``` No CI and manually test No Closes #42897 from zhengruifeng/pin_pyarrow. Authored-by: Ruifeng Zheng Signed-off-by: Ruifeng Zheng (cherry picked from commit e3d2dfa8b514f9358823c3cb1ad6523da8a6646b) Signed-off-by: Dongjoon Hyun (cherry picked from commit 8049a203b8c5f2f8045701916e66cfc786e16b57) Signed-off-by: Dongjoon Hyun --- .github/workflows/build_and_test.yml | 4 ++-- dev/infra/Dockerfile | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 33747fb5b61d3..2184577d5c44a 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -252,7 +252,7 @@ jobs: - name: Install Python packages (Python 3.8) if: (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) run: | - python3.8 -m pip install 'numpy>=1.20.0' pyarrow pandas scipy unittest-xml-reporting 'grpcio==1.48.1' 'protobuf==3.19.5' + python3.8 -m pip install 'numpy>=1.20.0' 'pyarrow==12.0.1' pandas scipy unittest-xml-reporting 'grpcio==1.48.1' 'protobuf==3.19.5' python3.8 -m pip list # Run the tests. - name: Run tests @@ -626,7 +626,7 @@ jobs: # See also https://issues.apache.org/jira/browse/SPARK-38279. python3.9 -m pip install 'sphinx<3.1.0' mkdocs pydata_sphinx_theme 'sphinx-copybutton==0.5.2' nbsphinx numpydoc 'jinja2<3.0.0' 'markupsafe==2.0.1' 'pyzmq<24.0.0' 'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5' 'nest-asyncio==1.5.8' 'rpds-py==0.16.2' 'alabaster==0.7.13' python3.9 -m pip install ipython_genutils # See SPARK-38517 - python3.9 -m pip install sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly>=4.8' + python3.9 -m pip install sphinx_plotly_directive 'numpy>=1.20.0' 'pyarrow==12.0.1' pandas 'plotly>=4.8' python3.9 -m pip install 'docutils<0.18.0' # See SPARK-39421 apt-get update -y apt-get install -y ruby ruby-dev diff --git a/dev/infra/Dockerfile b/dev/infra/Dockerfile index 2e78f4af21447..93d8793826ffc 100644 --- a/dev/infra/Dockerfile +++ b/dev/infra/Dockerfile @@ -65,7 +65,7 @@ RUN Rscript -e "devtools::install_version('roxygen2', version='7.2.0', repos='ht ENV R_LIBS_SITE "/usr/local/lib/R/site-library:${R_LIBS_SITE}:/usr/lib/R/library" RUN pypy3 -m pip install numpy 'pandas<=1.5.3' scipy coverage matplotlib -RUN python3.9 -m pip install numpy pyarrow 'pandas<=1.5.3' scipy unittest-xml-reporting plotly>=4.8 scikit-learn 'mlflow>=1.0' coverage matplotlib openpyxl 'memory-profiler==0.60.0' 'scikit-learn==1.1.*' +RUN python3.9 -m pip install numpy 'pyarrow==12.0.1' 'pandas<=1.5.3' scipy unittest-xml-reporting plotly>=4.8 scikit-learn 'mlflow>=1.0' coverage matplotlib openpyxl 'memory-profiler==0.60.0' 'scikit-learn==1.1.*' # Add Python deps for Spark Connect. RUN python3.9 -m pip install grpcio protobuf googleapis-common-protos grpcio-status