From 0d2d29f844c881dbef785f2df28671a82be0c335 Mon Sep 17 00:00:00 2001 From: Kunal Bhattacharya Date: Sun, 26 Jan 2025 20:00:30 +0530 Subject: [PATCH 01/14] Moved apache beam provider to new folder structure --- .github/boring-cyborg.yml | 5 +- docs/.gitignore | 1 + providers/apache/beam/README.rst | 83 +++++ .../beam/docs}/.latest-doc-only-change.txt | 0 .../apache/beam/hooks/beam/index.rst | 267 ++++++++++++++ .../providers/apache/beam/hooks/index.rst | 17 +- .../airflow/providers/apache/beam/index.rst | 39 +++ .../apache/beam/operators/beam/index.rst | 326 ++++++++++++++++++ .../providers/apache/beam/operators/index.rst | 30 ++ .../apache/beam/triggers/beam/index.rst | 126 +++++++ .../providers/apache/beam/triggers/index.rst | 30 ++ .../system/apache/beam/example_beam/index.rst | 36 ++ .../beam/example_beam_java_flink/index.rst | 36 ++ .../beam/example_beam_java_spark/index.rst | 36 ++ .../system/apache/beam/example_go/index.rst | 36 ++ .../apache/beam/example_go_dataflow/index.rst | 36 ++ .../beam/example_java_dataflow/index.rst | 36 ++ .../apache/beam/example_python/index.rst | 36 ++ .../beam/example_python_async/index.rst | 36 ++ .../beam/example_python_dataflow/index.rst | 36 ++ .../_api/tests/system/apache/beam/index.rst | 39 +++ .../tests/system/apache/beam/utils/index.rst | 136 ++++++++ .../beam/docs/changelog.rst} | 0 .../apache/beam/docs}/commits.rst | 0 .../apache/beam/docs}/index.rst | 0 .../installing-providers-from-sources.rst | 0 .../apache/beam/docs}/operators.rst | 26 +- .../apache/beam/docs}/security.rst | 0 .../providers => }/apache/beam/provider.yaml | 11 - providers/apache/beam/pyproject.toml | 89 +++++ .../src/airflow/providers/apache/beam/LICENSE | 201 +++++++++++ .../airflow/providers/apache/beam/README.md | 0 .../airflow/providers/apache/beam/__init__.py | 0 .../apache/beam/get_provider_info.py | 102 ++++++ .../providers/apache/beam/hooks/__init__.py | 0 .../providers/apache/beam/hooks/beam.py | 0 .../apache/beam/operators/__init__.py | 0 .../providers/apache/beam/operators/beam.py | 0 .../apache/beam/triggers/__init__.py | 0 .../providers/apache/beam/triggers/beam.py | 0 providers/apache/beam/tests/conftest.py | 32 ++ .../beam/tests/provider_tests/__init__.py | 17 + .../tests/provider_tests/apache/__init__.py | 17 + .../provider_tests}/apache/beam/__init__.py | 0 .../apache/beam/hooks/__init__.py | 0 .../apache/beam/hooks/test_beam.py | 0 .../apache/beam/operators/__init__.py | 0 .../apache/beam/operators/test_beam.py | 0 .../apache/beam/triggers/__init__.py | 0 .../apache/beam/triggers/test_beam.py | 0 .../tests/system/apache/beam/__init__.py | 0 .../tests/system/apache/beam/example_beam.py | 0 .../apache/beam/example_beam_java_flink.py | 0 .../apache/beam/example_beam_java_spark.py | 0 .../tests/system/apache/beam/example_go.py | 0 .../system/apache/beam/example_go_dataflow.py | 0 .../apache/beam/example_java_dataflow.py | 0 .../system/apache/beam/example_python.py | 0 .../apache/beam/example_python_async.py | 0 .../apache/beam/example_python_dataflow.py | 0 .../beam}/tests/system/apache/beam/utils.py | 0 pyproject.toml | 3 + scripts/ci/docker-compose/remove-sources.yml | 1 + scripts/ci/docker-compose/tests-sources.yml | 1 + 64 files changed, 1889 insertions(+), 34 deletions(-) create mode 100644 providers/apache/beam/README.rst rename providers/{src/airflow/providers/apache/beam => apache/beam/docs}/.latest-doc-only-change.txt (100%) create mode 100644 providers/apache/beam/docs/_api/airflow/providers/apache/beam/hooks/beam/index.rst rename docs/apache-airflow-providers-apache-beam/changelog.rst => providers/apache/beam/docs/_api/airflow/providers/apache/beam/hooks/index.rst (69%) create mode 100644 providers/apache/beam/docs/_api/airflow/providers/apache/beam/index.rst create mode 100644 providers/apache/beam/docs/_api/airflow/providers/apache/beam/operators/beam/index.rst create mode 100644 providers/apache/beam/docs/_api/airflow/providers/apache/beam/operators/index.rst create mode 100644 providers/apache/beam/docs/_api/airflow/providers/apache/beam/triggers/beam/index.rst create mode 100644 providers/apache/beam/docs/_api/airflow/providers/apache/beam/triggers/index.rst create mode 100644 providers/apache/beam/docs/_api/tests/system/apache/beam/example_beam/index.rst create mode 100644 providers/apache/beam/docs/_api/tests/system/apache/beam/example_beam_java_flink/index.rst create mode 100644 providers/apache/beam/docs/_api/tests/system/apache/beam/example_beam_java_spark/index.rst create mode 100644 providers/apache/beam/docs/_api/tests/system/apache/beam/example_go/index.rst create mode 100644 providers/apache/beam/docs/_api/tests/system/apache/beam/example_go_dataflow/index.rst create mode 100644 providers/apache/beam/docs/_api/tests/system/apache/beam/example_java_dataflow/index.rst create mode 100644 providers/apache/beam/docs/_api/tests/system/apache/beam/example_python/index.rst create mode 100644 providers/apache/beam/docs/_api/tests/system/apache/beam/example_python_async/index.rst create mode 100644 providers/apache/beam/docs/_api/tests/system/apache/beam/example_python_dataflow/index.rst create mode 100644 providers/apache/beam/docs/_api/tests/system/apache/beam/index.rst create mode 100644 providers/apache/beam/docs/_api/tests/system/apache/beam/utils/index.rst rename providers/{src/airflow/providers/apache/beam/CHANGELOG.rst => apache/beam/docs/changelog.rst} (100%) rename {docs/apache-airflow-providers-apache-beam => providers/apache/beam/docs}/commits.rst (100%) rename {docs/apache-airflow-providers-apache-beam => providers/apache/beam/docs}/index.rst (100%) rename {docs/apache-airflow-providers-apache-beam => providers/apache/beam/docs}/installing-providers-from-sources.rst (100%) rename {docs/apache-airflow-providers-apache-beam => providers/apache/beam/docs}/operators.rst (87%) rename {docs/apache-airflow-providers-apache-beam => providers/apache/beam/docs}/security.rst (100%) rename providers/{src/airflow/providers => }/apache/beam/provider.yaml (90%) create mode 100644 providers/apache/beam/pyproject.toml create mode 100644 providers/apache/beam/src/airflow/providers/apache/beam/LICENSE rename providers/{ => apache/beam}/src/airflow/providers/apache/beam/README.md (100%) rename providers/{ => apache/beam}/src/airflow/providers/apache/beam/__init__.py (100%) create mode 100644 providers/apache/beam/src/airflow/providers/apache/beam/get_provider_info.py rename providers/{ => apache/beam}/src/airflow/providers/apache/beam/hooks/__init__.py (100%) rename providers/{ => apache/beam}/src/airflow/providers/apache/beam/hooks/beam.py (100%) rename providers/{ => apache/beam}/src/airflow/providers/apache/beam/operators/__init__.py (100%) rename providers/{ => apache/beam}/src/airflow/providers/apache/beam/operators/beam.py (100%) rename providers/{ => apache/beam}/src/airflow/providers/apache/beam/triggers/__init__.py (100%) rename providers/{ => apache/beam}/src/airflow/providers/apache/beam/triggers/beam.py (100%) create mode 100644 providers/apache/beam/tests/conftest.py create mode 100644 providers/apache/beam/tests/provider_tests/__init__.py create mode 100644 providers/apache/beam/tests/provider_tests/apache/__init__.py rename providers/{tests => apache/beam/tests/provider_tests}/apache/beam/__init__.py (100%) rename providers/{tests => apache/beam/tests/provider_tests}/apache/beam/hooks/__init__.py (100%) rename providers/{tests => apache/beam/tests/provider_tests}/apache/beam/hooks/test_beam.py (100%) rename providers/{tests => apache/beam/tests/provider_tests}/apache/beam/operators/__init__.py (100%) rename providers/{tests => apache/beam/tests/provider_tests}/apache/beam/operators/test_beam.py (100%) rename providers/{tests => apache/beam/tests/provider_tests}/apache/beam/triggers/__init__.py (100%) rename providers/{tests => apache/beam/tests/provider_tests}/apache/beam/triggers/test_beam.py (100%) rename providers/{ => apache/beam}/tests/system/apache/beam/__init__.py (100%) rename providers/{ => apache/beam}/tests/system/apache/beam/example_beam.py (100%) rename providers/{ => apache/beam}/tests/system/apache/beam/example_beam_java_flink.py (100%) rename providers/{ => apache/beam}/tests/system/apache/beam/example_beam_java_spark.py (100%) rename providers/{ => apache/beam}/tests/system/apache/beam/example_go.py (100%) rename providers/{ => apache/beam}/tests/system/apache/beam/example_go_dataflow.py (100%) rename providers/{ => apache/beam}/tests/system/apache/beam/example_java_dataflow.py (100%) rename providers/{ => apache/beam}/tests/system/apache/beam/example_python.py (100%) rename providers/{ => apache/beam}/tests/system/apache/beam/example_python_async.py (100%) rename providers/{ => apache/beam}/tests/system/apache/beam/example_python_dataflow.py (100%) rename providers/{ => apache/beam}/tests/system/apache/beam/utils.py (100%) diff --git a/.github/boring-cyborg.yml b/.github/boring-cyborg.yml index 251afdfa7f482..9503a05a46c08 100644 --- a/.github/boring-cyborg.yml +++ b/.github/boring-cyborg.yml @@ -31,10 +31,7 @@ labelPRBasedOnFilePath: - providers/tests/system/amazon/aws/**/* provider:apache-beam: - - providers/src/airflow/providers/apache/beam/**/* - - docs/apache-airflow-providers-apache-beam/**/* - - providers/tests/apache/beam/**/* - - providers/tests/system/apache/beam/**/* + - providers/apache/beam/** provider:apache-cassandra: - providers/src/airflow/providers/apache/cassandra/**/* diff --git a/docs/.gitignore b/docs/.gitignore index 960685d73623d..5cc8421b146d6 100644 --- a/docs/.gitignore +++ b/docs/.gitignore @@ -2,6 +2,7 @@ # Eventually when we swtich to individually build docs for each provider, we should remove this altogether apache-airflow-providers-airbyte apache-airflow-providers-alibaba +apache-airflow-providers-apache-beam apache-airflow-providers-apache-iceberg apache-airflow-providers-celery apache-airflow-providers-cohere diff --git a/providers/apache/beam/README.rst b/providers/apache/beam/README.rst new file mode 100644 index 0000000000000..6de998e174c45 --- /dev/null +++ b/providers/apache/beam/README.rst @@ -0,0 +1,83 @@ + + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + .. NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! + + .. IF YOU WANT TO MODIFY TEMPLATE FOR THIS FILE, YOU SHOULD MODIFY THE TEMPLATE + `PROVIDER_README_TEMPLATE.rst.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY + + +Package ``apache-airflow-providers-apache-beam`` + +Release: ``6.0.0`` + + +`Apache Beam `__. + + +Provider package +---------------- + +This is a provider package for ``apache.beam`` provider. All classes for this provider package +are in ``airflow.providers.apache.beam`` python package. + +You can find package information and changelog for the provider +in the `documentation `_. + +Installation +------------ + +You can install this package on top of an existing Airflow 2 installation (see ``Requirements`` below +for the minimum Airflow version supported) via +``pip install apache-airflow-providers-apache-beam`` + +The package supports the following python versions: 3.9,3.10,3.11 + +Requirements +------------ + +================== ================== +PIP package Version required +================== ================== +``apache-airflow`` ``>=2.9.0`` +``apache-beam`` ``>=2.53.0`` +``pyarrow`` ``>=14.0.1`` +================== ================== + +Cross provider package dependencies +----------------------------------- + +Those are dependencies that might be needed in order to use all the features of the package. +You need to install the specified provider packages in order to use them. + +You can install such cross-provider dependencies when installing from PyPI. For example: + +.. code-block:: bash + + pip install apache-airflow-providers-apache-beam[common.compat] + + +================================================================================================================== ================= +Dependent package Extra +================================================================================================================== ================= +`apache-airflow-providers-common-compat `_ ``common.compat`` +`apache-airflow-providers-google `_ ``google`` +================================================================================================================== ================= + +The changelog for the provider package can be found in the +`changelog `_. diff --git a/providers/src/airflow/providers/apache/beam/.latest-doc-only-change.txt b/providers/apache/beam/docs/.latest-doc-only-change.txt similarity index 100% rename from providers/src/airflow/providers/apache/beam/.latest-doc-only-change.txt rename to providers/apache/beam/docs/.latest-doc-only-change.txt diff --git a/providers/apache/beam/docs/_api/airflow/providers/apache/beam/hooks/beam/index.rst b/providers/apache/beam/docs/_api/airflow/providers/apache/beam/hooks/beam/index.rst new file mode 100644 index 0000000000000..573f4ad55cdfa --- /dev/null +++ b/providers/apache/beam/docs/_api/airflow/providers/apache/beam/hooks/beam/index.rst @@ -0,0 +1,267 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +:py:mod:`airflow.providers.apache.beam.hooks.beam` +================================================== + +.. py:module:: airflow.providers.apache.beam.hooks.beam + +.. autoapi-nested-parse:: + + This module contains a Apache Beam Hook. + + + +Module Contents +--------------- + +Classes +~~~~~~~ + +.. autoapisummary:: + + airflow.providers.apache.beam.hooks.beam.BeamRunnerType + airflow.providers.apache.beam.hooks.beam.BeamHook + airflow.providers.apache.beam.hooks.beam.BeamAsyncHook + + + +Functions +~~~~~~~~~ + +.. autoapisummary:: + + airflow.providers.apache.beam.hooks.beam.beam_options_to_args + airflow.providers.apache.beam.hooks.beam.process_fd + airflow.providers.apache.beam.hooks.beam.run_beam_command + + + +.. py:class:: BeamRunnerType + + + Helper class for listing runner types. + + For more information about runners see: https://beam.apache.org/documentation/ + + .. py:attribute:: DataflowRunner + :value: 'DataflowRunner' + + + + .. py:attribute:: DirectRunner + :value: 'DirectRunner' + + + + .. py:attribute:: SparkRunner + :value: 'SparkRunner' + + + + .. py:attribute:: FlinkRunner + :value: 'FlinkRunner' + + + + .. py:attribute:: SamzaRunner + :value: 'SamzaRunner' + + + + .. py:attribute:: NemoRunner + :value: 'NemoRunner' + + + + .. py:attribute:: JetRunner + :value: 'JetRunner' + + + + .. py:attribute:: Twister2Runner + :value: 'Twister2Runner' + + + + +.. py:function:: beam_options_to_args(options) + + Return a formatted pipeline options from a dictionary of arguments. + + The logic of this method should be compatible with Apache Beam: + https://github.com/apache/beam/blob/b56740f0e8cd80c2873412847d0b336837429fb9/sdks/python/ + apache_beam/options/pipeline_options.py#L230-L251 + + :param options: Dictionary with options + :return: List of arguments + + +.. py:function:: process_fd(proc, fd, log, process_line_callback = None, check_job_status_callback = None) + + Print output to logs. + + :param proc: subprocess. + :param fd: File descriptor. + :param process_line_callback: Optional callback which can be used to process + stdout and stderr to detect job id. + :param log: logger. + + +.. py:function:: run_beam_command(cmd, log, process_line_callback = None, working_directory = None, check_job_status_callback = None) + + Run pipeline command in subprocess. + + :param cmd: Parts of the command to be run in subprocess + :param process_line_callback: Optional callback which can be used to process + stdout and stderr to detect job id + :param working_directory: Working directory + :param log: logger. + + +.. py:class:: BeamHook(runner) + + + Bases: :py:obj:`airflow.hooks.base.BaseHook` + + Hook for Apache Beam. + + All the methods in the hook where project_id is used must be called with + keyword arguments rather than positional. + + :param runner: Runner type + + .. py:method:: start_python_pipeline(variables, py_file, py_options, py_interpreter = 'python3', py_requirements = None, py_system_site_packages = False, process_line_callback = None, check_job_status_callback = None) + + Start Apache Beam python pipeline. + + :param variables: Variables passed to the pipeline. + :param py_file: Path to the python file to execute. + :param py_options: Additional options. + :param py_interpreter: Python version of the Apache Beam pipeline. + If None, this defaults to the python3. + To track python versions supported by beam and related + issues check: https://issues.apache.org/jira/browse/BEAM-1251 + :param py_requirements: Additional python package(s) to install. + If a value is passed to this parameter, a new virtual environment has been created with + additional packages installed. + + You could also install the apache-beam package if it is not installed on your system, or you want + to use a different version. + :param py_system_site_packages: Whether to include system_site_packages in your virtualenv. + See virtualenv documentation for more information. + + This option is only relevant if the ``py_requirements`` parameter is not None. + :param process_line_callback: (optional) Callback that can be used to process each line of + the stdout and stderr file descriptors. + + + .. py:method:: start_java_pipeline(variables, jar, job_class = None, process_line_callback = None) + + Start Apache Beam Java pipeline. + + :param variables: Variables passed to the job. + :param jar: Name of the jar for the pipeline + :param job_class: Name of the java class for the pipeline. + :param process_line_callback: (optional) Callback that can be used to process each line of + the stdout and stderr file descriptors. + + + .. py:method:: start_go_pipeline(variables, go_file, process_line_callback = None, should_init_module = False) + + Start Apache Beam Go pipeline with a source file. + + :param variables: Variables passed to the job. + :param go_file: Path to the Go file with your beam pipeline. + :param process_line_callback: (optional) Callback that can be used to process each line of + the stdout and stderr file descriptors. + :param should_init_module: If False (default), will just execute a `go run` command. If True, will + init a module and dependencies with a ``go mod init`` and ``go mod tidy``, useful when pulling + source with GCSHook. + :return: + + + .. py:method:: start_go_pipeline_with_binary(variables, launcher_binary, worker_binary, process_line_callback = None) + + Start Apache Beam Go pipeline with an executable binary. + + :param variables: Variables passed to the job. + :param launcher_binary: Path to the binary compiled for the launching platform. + :param worker_binary: Path to the binary compiled for the worker platform. + :param process_line_callback: (optional) Callback that can be used to process each line of + the stdout and stderr file descriptors. + + + +.. py:class:: BeamAsyncHook(runner) + + + Bases: :py:obj:`BeamHook` + + Asynchronous hook for Apache Beam. + + :param runner: Runner type. + + .. py:method:: start_python_pipeline_async(variables, py_file, py_options = None, py_interpreter = 'python3', py_requirements = None, py_system_site_packages = False) + :async: + + Start Apache Beam python pipeline. + + :param variables: Variables passed to the pipeline. + :param py_file: Path to the python file to execute. + :param py_options: Additional options. + :param py_interpreter: Python version of the Apache Beam pipeline. + If None, this defaults to the python3. + To track python versions supported by beam and related + issues check: https://issues.apache.org/jira/browse/BEAM-1251 + :param py_requirements: Additional python package(s) to install. + If a value is passed to this parameter, a new virtual environment has been created with + additional packages installed. + You could also install the apache-beam package if it is not installed on your system, or you want + to use a different version. + :param py_system_site_packages: Whether to include system_site_packages in your virtualenv. + See virtualenv documentation for more information. + This option is only relevant if the ``py_requirements`` parameter is not None. + + + .. py:method:: start_java_pipeline_async(variables, jar, job_class = None) + :async: + + Start Apache Beam Java pipeline. + + :param variables: Variables passed to the job. + :param jar: Name of the jar for the pipeline. + :param job_class: Name of the java class for the pipeline. + :return: Beam command execution return code. + + + .. py:method:: start_pipeline_async(variables, command_prefix, working_directory = None) + :async: + + + .. py:method:: run_beam_command_async(cmd, log, working_directory = None) + :async: + + Run pipeline command in subprocess. + + :param cmd: Parts of the command to be run in subprocess + :param working_directory: Working directory + :param log: logger. + + + .. py:method:: read_logs(stream_reader) + :async: diff --git a/docs/apache-airflow-providers-apache-beam/changelog.rst b/providers/apache/beam/docs/_api/airflow/providers/apache/beam/hooks/index.rst similarity index 69% rename from docs/apache-airflow-providers-apache-beam/changelog.rst rename to providers/apache/beam/docs/_api/airflow/providers/apache/beam/hooks/index.rst index 7728c11ed6a62..41b328533cb6c 100644 --- a/docs/apache-airflow-providers-apache-beam/changelog.rst +++ b/providers/apache/beam/docs/_api/airflow/providers/apache/beam/hooks/index.rst @@ -1,4 +1,3 @@ - .. Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information @@ -16,10 +15,16 @@ specific language governing permissions and limitations under the License. - .. NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE - OVERWRITTEN WHEN PREPARING PACKAGES. +:py:mod:`airflow.providers.apache.beam.hooks` +============================================= + +.. py:module:: airflow.providers.apache.beam.hooks + - .. IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE - `PROVIDER_CHANGELOG_TEMPLATE.rst.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY +Submodules +---------- +.. toctree:: + :titlesonly: + :maxdepth: 1 -.. include:: ../../providers/src/airflow/providers/apache/beam/CHANGELOG.rst + beam/index.rst diff --git a/providers/apache/beam/docs/_api/airflow/providers/apache/beam/index.rst b/providers/apache/beam/docs/_api/airflow/providers/apache/beam/index.rst new file mode 100644 index 0000000000000..dea2e72f02965 --- /dev/null +++ b/providers/apache/beam/docs/_api/airflow/providers/apache/beam/index.rst @@ -0,0 +1,39 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +:py:mod:`airflow.providers.apache.beam` +======================================= + +.. py:module:: airflow.providers.apache.beam + + +Subpackages +----------- +.. toctree:: + :titlesonly: + :maxdepth: 3 + + hooks/index.rst + operators/index.rst + triggers/index.rst + + +Package Contents +---------------- + +.. py:data:: __version__ + :value: '5.9.1' diff --git a/providers/apache/beam/docs/_api/airflow/providers/apache/beam/operators/beam/index.rst b/providers/apache/beam/docs/_api/airflow/providers/apache/beam/operators/beam/index.rst new file mode 100644 index 0000000000000..4dbcf4e24afa7 --- /dev/null +++ b/providers/apache/beam/docs/_api/airflow/providers/apache/beam/operators/beam/index.rst @@ -0,0 +1,326 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +:py:mod:`airflow.providers.apache.beam.operators.beam` +====================================================== + +.. py:module:: airflow.providers.apache.beam.operators.beam + +.. autoapi-nested-parse:: + + This module contains Apache Beam operators. + + + +Module Contents +--------------- + +Classes +~~~~~~~ + +.. autoapisummary:: + + airflow.providers.apache.beam.operators.beam.BeamDataflowMixin + airflow.providers.apache.beam.operators.beam.BeamBasePipelineOperator + airflow.providers.apache.beam.operators.beam.BeamRunPythonPipelineOperator + airflow.providers.apache.beam.operators.beam.BeamRunJavaPipelineOperator + airflow.providers.apache.beam.operators.beam.BeamRunGoPipelineOperator + + + + +.. py:class:: BeamDataflowMixin + + + Helper class to store common, Dataflow specific logic for both. + + :class:`~airflow.providers.apache.beam.operators.beam.BeamRunPythonPipelineOperator`, + :class:`~airflow.providers.apache.beam.operators.beam.BeamRunJavaPipelineOperator` and + :class:`~airflow.providers.apache.beam.operators.beam.BeamRunGoPipelineOperator`. + + .. py:attribute:: dataflow_hook + :type: airflow.providers.google.cloud.hooks.dataflow.DataflowHook | None + + + + .. py:attribute:: dataflow_config + :type: airflow.providers.google.cloud.operators.dataflow.DataflowConfiguration + + + + .. py:attribute:: gcp_conn_id + :type: str + + + + .. py:attribute:: dataflow_support_impersonation + :type: bool + :value: True + + + + +.. py:class:: BeamBasePipelineOperator(*, runner = 'DirectRunner', default_pipeline_options = None, pipeline_options = None, gcp_conn_id = 'google_cloud_default', dataflow_config = None, **kwargs) + + + Bases: :py:obj:`airflow.models.BaseOperator`, :py:obj:`BeamDataflowMixin`, :py:obj:`abc.ABC` + + Abstract base class for Beam Pipeline Operators. + + :param runner: Runner on which pipeline will be run. By default "DirectRunner" is being used. + Other possible options: DataflowRunner, SparkRunner, FlinkRunner, PortableRunner. + See: :class:`~providers.apache.beam.hooks.beam.BeamRunnerType` + See: https://beam.apache.org/documentation/runners/capability-matrix/ + + :param default_pipeline_options: Map of default pipeline options. + :param pipeline_options: Map of pipeline options.The key must be a dictionary. + The value can contain different types: + + * If the value is None, the single option - ``--key`` (without value) will be added. + * If the value is False, this option will be skipped + * If the value is True, the single option - ``--key`` (without value) will be added. + * If the value is list, the many options will be added for each key. + If the value is ``['A', 'B']`` and the key is ``key`` then the ``--key=A --key=B`` options + will be left + * Other value types will be replaced with the Python textual representation. + + When defining labels (labels option), you can also provide a dictionary. + :param gcp_conn_id: Optional. + The connection ID to use connecting to Google Cloud Storage if python file is on GCS. + :param dataflow_config: Dataflow's configuration, used when runner type is set to DataflowRunner, + (optional) defaults to None. + + .. py:property:: dataflow_job_id + + + .. py:method:: execute_complete(context, event) + + Execute when the trigger fires - returns immediately. + + Relies on trigger to throw an exception, otherwise it assumes execution was + successful. + + + +.. py:class:: BeamRunPythonPipelineOperator(*, py_file, runner = 'DirectRunner', default_pipeline_options = None, pipeline_options = None, py_interpreter = 'python3', py_options = None, py_requirements = None, py_system_site_packages = False, gcp_conn_id = 'google_cloud_default', dataflow_config = None, deferrable = conf.getboolean('operators', 'default_deferrable', fallback=False), **kwargs) + + + Bases: :py:obj:`BeamBasePipelineOperator` + + Launch Apache Beam pipelines written in Python. + + Note that both ``default_pipeline_options`` and ``pipeline_options`` + will be merged to specify pipeline execution parameter, and + ``default_pipeline_options`` is expected to save high-level options, + for instances, project and zone information, which apply to all beam + operators in the DAG. + + .. seealso:: + For more information on how to use this operator, take a look at the guide: + :ref:`howto/operator:BeamRunPythonPipelineOperator` + + .. seealso:: + For more detail on Apache Beam have a look at the reference: + https://beam.apache.org/documentation/ + + :param py_file: Reference to the python Apache Beam pipeline file.py, e.g., + /some/local/file/path/to/your/python/pipeline/file. (templated) + :param py_options: Additional python options, e.g., ["-m", "-v"]. + :param py_interpreter: Python version of the beam pipeline. + If None, this defaults to the python3. + To track python versions supported by beam and related + issues check: https://issues.apache.org/jira/browse/BEAM-1251 + :param py_requirements: Additional python package(s) to install. + If a value is passed to this parameter, a new virtual environment has been created with + additional packages installed. + + You could also install the apache_beam package if it is not installed on your system or you want + to use a different version. + :param py_system_site_packages: Whether to include system_site_packages in your virtualenv. + See virtualenv documentation for more information. + This option is only relevant if the ``py_requirements`` parameter is not None. + :param deferrable: Run operator in the deferrable mode: checks for the state using asynchronous calls. + + .. py:attribute:: template_fields + :type: collections.abc.Sequence[str] + :value: ('py_file', 'runner', 'pipeline_options', 'default_pipeline_options', 'dataflow_config') + + + + .. py:attribute:: template_fields_renderers + + + + .. py:attribute:: operator_extra_links + :value: () + + + + .. py:method:: execute(context) + + Execute the Apache Beam Python Pipeline. + + + .. py:method:: execute_sync(context) + + + .. py:method:: execute_async(context) + + + .. py:method:: on_kill() + + Override this method to clean up subprocesses when a task instance gets killed. + + Any use of the threading, subprocess or multiprocessing module within an + operator needs to be cleaned up, or it will leave ghost processes behind. + + + +.. py:class:: BeamRunJavaPipelineOperator(*, jar, runner = 'DirectRunner', job_class = None, default_pipeline_options = None, pipeline_options = None, gcp_conn_id = 'google_cloud_default', dataflow_config = None, deferrable = conf.getboolean('operators', 'default_deferrable', fallback=False), **kwargs) + + + Bases: :py:obj:`BeamBasePipelineOperator` + + Launching Apache Beam pipelines written in Java. + + Note that both + ``default_pipeline_options`` and ``pipeline_options`` will be merged to specify pipeline + execution parameter, and ``default_pipeline_options`` is expected to save + high-level pipeline_options, for instances, project and zone information, which + apply to all Apache Beam operators in the DAG. + + .. seealso:: + For more information on how to use this operator, take a look at the guide: + :ref:`howto/operator:BeamRunJavaPipelineOperator` + + .. seealso:: + For more detail on Apache Beam have a look at the reference: + https://beam.apache.org/documentation/ + + You need to pass the path to your jar file as a file reference with the ``jar`` + parameter, the jar needs to be a self executing jar (see documentation here: + https://beam.apache.org/documentation/runners/dataflow/#self-executing-jar). + Use ``pipeline_options`` to pass on pipeline_options to your job. + + :param jar: The reference to a self executing Apache Beam jar (templated). + :param job_class: The name of the Apache Beam pipeline class to be executed, it + is often not the main class configured in the pipeline jar file. + + .. py:attribute:: template_fields + :type: collections.abc.Sequence[str] + :value: ('jar', 'runner', 'job_class', 'pipeline_options', 'default_pipeline_options', 'dataflow_config') + + + + .. py:attribute:: template_fields_renderers + + + + .. py:attribute:: ui_color + :value: '#0273d4' + + + + .. py:attribute:: operator_extra_links + :value: () + + + + .. py:method:: execute(context) + + Execute the Apache Beam Python Pipeline. + + + .. py:method:: execute_sync(context) + + Execute the Apache Beam Pipeline. + + + .. py:method:: execute_async(context) + + + .. py:method:: on_kill() + + Override this method to clean up subprocesses when a task instance gets killed. + + Any use of the threading, subprocess or multiprocessing module within an + operator needs to be cleaned up, or it will leave ghost processes behind. + + + +.. py:class:: BeamRunGoPipelineOperator(*, go_file = '', launcher_binary = '', worker_binary = '', runner = 'DirectRunner', default_pipeline_options = None, pipeline_options = None, gcp_conn_id = 'google_cloud_default', dataflow_config = None, **kwargs) + + + Bases: :py:obj:`BeamBasePipelineOperator` + + Launch Apache Beam pipelines written in Go. + + Note that both ``default_pipeline_options`` and ``pipeline_options`` + will be merged to specify pipeline execution parameter, and + ``default_pipeline_options`` is expected to save high-level options, + for instances, project and zone information, which apply to all beam + operators in the DAG. + + .. seealso:: + For more information on how to use this operator, take a look at the guide: + :ref:`howto/operator:BeamRunGoPipelineOperator` + + .. seealso:: + For more detail on Apache Beam have a look at the reference: + https://beam.apache.org/documentation/ + + :param go_file: Reference to the Apache Beam pipeline Go source file, + e.g. /local/path/to/main.go or gs://bucket/path/to/main.go. + Exactly one of go_file and launcher_binary must be provided. + + :param launcher_binary: Reference to the Apache Beam pipeline Go binary compiled for the launching + platform, e.g. /local/path/to/launcher-main or gs://bucket/path/to/launcher-main. + Exactly one of go_file and launcher_binary must be provided. + + :param worker_binary: Reference to the Apache Beam pipeline Go binary compiled for the worker platform, + e.g. /local/path/to/worker-main or gs://bucket/path/to/worker-main. + Needed if the OS or architecture of the workers running the pipeline is different from that + of the platform launching the pipeline. For more information, see the Apache Beam documentation + for Go cross compilation: https://beam.apache.org/documentation/sdks/go-cross-compilation/. + If launcher_binary is not set, providing a worker_binary will have no effect. If launcher_binary is + set and worker_binary is not, worker_binary will default to the value of launcher_binary. + + .. py:attribute:: template_fields + :value: ['go_file', 'launcher_binary', 'worker_binary', 'runner', 'pipeline_options',... + + + + .. py:attribute:: template_fields_renderers + + + + .. py:attribute:: operator_extra_links + :value: () + + + + .. py:method:: execute(context) + + Execute the Apache Beam Pipeline. + + + .. py:method:: on_kill() + + Override this method to clean up subprocesses when a task instance gets killed. + + Any use of the threading, subprocess or multiprocessing module within an + operator needs to be cleaned up, or it will leave ghost processes behind. diff --git a/providers/apache/beam/docs/_api/airflow/providers/apache/beam/operators/index.rst b/providers/apache/beam/docs/_api/airflow/providers/apache/beam/operators/index.rst new file mode 100644 index 0000000000000..faa8fb832e4ab --- /dev/null +++ b/providers/apache/beam/docs/_api/airflow/providers/apache/beam/operators/index.rst @@ -0,0 +1,30 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +:py:mod:`airflow.providers.apache.beam.operators` +================================================= + +.. py:module:: airflow.providers.apache.beam.operators + + +Submodules +---------- +.. toctree:: + :titlesonly: + :maxdepth: 1 + + beam/index.rst diff --git a/providers/apache/beam/docs/_api/airflow/providers/apache/beam/triggers/beam/index.rst b/providers/apache/beam/docs/_api/airflow/providers/apache/beam/triggers/beam/index.rst new file mode 100644 index 0000000000000..fade25e4af7e7 --- /dev/null +++ b/providers/apache/beam/docs/_api/airflow/providers/apache/beam/triggers/beam/index.rst @@ -0,0 +1,126 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +:py:mod:`airflow.providers.apache.beam.triggers.beam` +===================================================== + +.. py:module:: airflow.providers.apache.beam.triggers.beam + + +Module Contents +--------------- + +Classes +~~~~~~~ + +.. autoapisummary:: + + airflow.providers.apache.beam.triggers.beam.BeamPipelineBaseTrigger + airflow.providers.apache.beam.triggers.beam.BeamPythonPipelineTrigger + airflow.providers.apache.beam.triggers.beam.BeamJavaPipelineTrigger + + + + +.. py:class:: BeamPipelineBaseTrigger(**kwargs) + + + Bases: :py:obj:`airflow.triggers.base.BaseTrigger` + + Base class for Beam Pipeline Triggers. + + +.. py:class:: BeamPythonPipelineTrigger(variables, py_file, py_options = None, py_interpreter = 'python3', py_requirements = None, py_system_site_packages = False, runner = 'DirectRunner', gcp_conn_id = 'google_cloud_default') + + + Bases: :py:obj:`BeamPipelineBaseTrigger` + + Trigger to perform checking the Python pipeline status until it reaches terminate state. + + :param variables: Variables passed to the pipeline. + :param py_file: Path to the python file to execute. + :param py_options: Additional options. + :param py_interpreter: Python version of the Apache Beam pipeline. If `None`, this defaults to the + python3. To track python versions supported by beam and related issues + check: https://issues.apache.org/jira/browse/BEAM-1251 + :param py_requirements: Additional python package(s) to install. + If a value is passed to this parameter, a new virtual environment has been created with + additional packages installed. + You could also install the apache-beam package if it is not installed on your system, or you want + to use a different version. + :param py_system_site_packages: Whether to include system_site_packages in your virtualenv. + See virtualenv documentation for more information. + This option is only relevant if the ``py_requirements`` parameter is not None. + :param runner: Runner on which pipeline will be run. By default, "DirectRunner" is being used. + Other possible options: DataflowRunner, SparkRunner, FlinkRunner, PortableRunner. + See: :class:`~providers.apache.beam.hooks.beam.BeamRunnerType` + See: https://beam.apache.org/documentation/runners/capability-matrix/ + :param gcp_conn_id: Optional. The connection ID to use connecting to Google Cloud. + + .. py:method:: serialize() + + Serialize BeamPythonPipelineTrigger arguments and classpath. + + + .. py:method:: run() + :async: + + Get current pipeline status and yields a TriggerEvent. + + + +.. py:class:: BeamJavaPipelineTrigger(variables, jar, job_class = None, runner = 'DirectRunner', check_if_running = False, project_id = None, location = None, job_name = None, gcp_conn_id = 'google_cloud_default', impersonation_chain = None, poll_sleep = 10, cancel_timeout = None) + + + Bases: :py:obj:`BeamPipelineBaseTrigger` + + Trigger to perform checking the Java pipeline status until it reaches terminate state. + + :param variables: Variables passed to the job. + :param jar: Name of the jar for the pipeline. + :param job_class: Optional. Name of the java class for the pipeline. + :param runner: Runner on which pipeline will be run. By default, "DirectRunner" is being used. + Other possible options: DataflowRunner, SparkRunner, FlinkRunner, PortableRunner. + See: :class:`~providers.apache.beam.hooks.beam.BeamRunnerType` + See: https://beam.apache.org/documentation/runners/capability-matrix/ + :param check_if_running: Optional. Before running job, validate that a previous run is not in process. + :param project_id: Optional. The Google Cloud project ID in which to start a job. + :param location: Optional. Job location. + :param job_name: Optional. The 'jobName' to use when executing the Dataflow job. + :param gcp_conn_id: Optional. The connection ID to use connecting to Google Cloud. + :param impersonation_chain: Optional. GCP service account to impersonate using short-term + credentials, or chained list of accounts required to get the access_token + of the last account in the list, which will be impersonated in the request. + If set as a string, the account must grant the originating account + the Service Account Token Creator IAM role. + If set as a sequence, the identities from the list must grant + Service Account Token Creator IAM role to the directly preceding identity, with first + account from the list granting this role to the originating account (templated). + :param poll_sleep: Optional. The time in seconds to sleep between polling GCP for the dataflow job status. + Default value is 10s. + :param cancel_timeout: Optional. How long (in seconds) operator should wait for the pipeline to be + successfully cancelled when task is being killed. Default value is 300s. + + .. py:method:: serialize() + + Serialize BeamJavaPipelineTrigger arguments and classpath. + + + .. py:method:: run() + :async: + + Get current Java pipeline status and yields a TriggerEvent. diff --git a/providers/apache/beam/docs/_api/airflow/providers/apache/beam/triggers/index.rst b/providers/apache/beam/docs/_api/airflow/providers/apache/beam/triggers/index.rst new file mode 100644 index 0000000000000..76add3058fcef --- /dev/null +++ b/providers/apache/beam/docs/_api/airflow/providers/apache/beam/triggers/index.rst @@ -0,0 +1,30 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +:py:mod:`airflow.providers.apache.beam.triggers` +================================================ + +.. py:module:: airflow.providers.apache.beam.triggers + + +Submodules +---------- +.. toctree:: + :titlesonly: + :maxdepth: 1 + + beam/index.rst diff --git a/providers/apache/beam/docs/_api/tests/system/apache/beam/example_beam/index.rst b/providers/apache/beam/docs/_api/tests/system/apache/beam/example_beam/index.rst new file mode 100644 index 0000000000000..0596d2e7d4f6d --- /dev/null +++ b/providers/apache/beam/docs/_api/tests/system/apache/beam/example_beam/index.rst @@ -0,0 +1,36 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +:py:mod:`tests.system.apache.beam.example_beam` +=============================================== + +.. py:module:: tests.system.apache.beam.example_beam + +.. autoapi-nested-parse:: + + Example Airflow DAG for Apache Beam operators + + + +Module Contents +--------------- + +.. py:data:: jar_to_local_direct_runner + + + +.. py:data:: test_run diff --git a/providers/apache/beam/docs/_api/tests/system/apache/beam/example_beam_java_flink/index.rst b/providers/apache/beam/docs/_api/tests/system/apache/beam/example_beam_java_flink/index.rst new file mode 100644 index 0000000000000..662549cef14be --- /dev/null +++ b/providers/apache/beam/docs/_api/tests/system/apache/beam/example_beam_java_flink/index.rst @@ -0,0 +1,36 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +:py:mod:`tests.system.apache.beam.example_beam_java_flink` +========================================================== + +.. py:module:: tests.system.apache.beam.example_beam_java_flink + +.. autoapi-nested-parse:: + + Example Airflow DAG for Apache Beam operators + + + +Module Contents +--------------- + +.. py:data:: jar_to_local_flink_runner + + + +.. py:data:: test_run diff --git a/providers/apache/beam/docs/_api/tests/system/apache/beam/example_beam_java_spark/index.rst b/providers/apache/beam/docs/_api/tests/system/apache/beam/example_beam_java_spark/index.rst new file mode 100644 index 0000000000000..299cf2a5b8ca3 --- /dev/null +++ b/providers/apache/beam/docs/_api/tests/system/apache/beam/example_beam_java_spark/index.rst @@ -0,0 +1,36 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +:py:mod:`tests.system.apache.beam.example_beam_java_spark` +========================================================== + +.. py:module:: tests.system.apache.beam.example_beam_java_spark + +.. autoapi-nested-parse:: + + Example Airflow DAG for Apache Beam operators + + + +Module Contents +--------------- + +.. py:data:: jar_to_local_spark_runner + + + +.. py:data:: test_run diff --git a/providers/apache/beam/docs/_api/tests/system/apache/beam/example_go/index.rst b/providers/apache/beam/docs/_api/tests/system/apache/beam/example_go/index.rst new file mode 100644 index 0000000000000..20171c2837d55 --- /dev/null +++ b/providers/apache/beam/docs/_api/tests/system/apache/beam/example_go/index.rst @@ -0,0 +1,36 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +:py:mod:`tests.system.apache.beam.example_go` +============================================= + +.. py:module:: tests.system.apache.beam.example_go + +.. autoapi-nested-parse:: + + Example Airflow DAG for Apache Beam operators + + + +Module Contents +--------------- + +.. py:data:: start_go_pipeline_local_direct_runner + + + +.. py:data:: test_run diff --git a/providers/apache/beam/docs/_api/tests/system/apache/beam/example_go_dataflow/index.rst b/providers/apache/beam/docs/_api/tests/system/apache/beam/example_go_dataflow/index.rst new file mode 100644 index 0000000000000..445bcf15c3555 --- /dev/null +++ b/providers/apache/beam/docs/_api/tests/system/apache/beam/example_go_dataflow/index.rst @@ -0,0 +1,36 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +:py:mod:`tests.system.apache.beam.example_go_dataflow` +====================================================== + +.. py:module:: tests.system.apache.beam.example_go_dataflow + +.. autoapi-nested-parse:: + + Example Airflow DAG for Apache Beam operators + + + +Module Contents +--------------- + +.. py:data:: start_go_job_dataflow_runner_async + + + +.. py:data:: test_run diff --git a/providers/apache/beam/docs/_api/tests/system/apache/beam/example_java_dataflow/index.rst b/providers/apache/beam/docs/_api/tests/system/apache/beam/example_java_dataflow/index.rst new file mode 100644 index 0000000000000..e648a973a1b4e --- /dev/null +++ b/providers/apache/beam/docs/_api/tests/system/apache/beam/example_java_dataflow/index.rst @@ -0,0 +1,36 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +:py:mod:`tests.system.apache.beam.example_java_dataflow` +======================================================== + +.. py:module:: tests.system.apache.beam.example_java_dataflow + +.. autoapi-nested-parse:: + + Example Airflow DAG for Apache Beam operators + + + +Module Contents +--------------- + +.. py:data:: jar_to_local_dataflow_runner + + + +.. py:data:: test_run diff --git a/providers/apache/beam/docs/_api/tests/system/apache/beam/example_python/index.rst b/providers/apache/beam/docs/_api/tests/system/apache/beam/example_python/index.rst new file mode 100644 index 0000000000000..6590465aff3d2 --- /dev/null +++ b/providers/apache/beam/docs/_api/tests/system/apache/beam/example_python/index.rst @@ -0,0 +1,36 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +:py:mod:`tests.system.apache.beam.example_python` +================================================= + +.. py:module:: tests.system.apache.beam.example_python + +.. autoapi-nested-parse:: + + Example Airflow DAG for Apache Beam operators + + + +Module Contents +--------------- + +.. py:data:: start_python_pipeline_local_direct_runner + + + +.. py:data:: test_run diff --git a/providers/apache/beam/docs/_api/tests/system/apache/beam/example_python_async/index.rst b/providers/apache/beam/docs/_api/tests/system/apache/beam/example_python_async/index.rst new file mode 100644 index 0000000000000..6086b72c058fb --- /dev/null +++ b/providers/apache/beam/docs/_api/tests/system/apache/beam/example_python_async/index.rst @@ -0,0 +1,36 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +:py:mod:`tests.system.apache.beam.example_python_async` +======================================================= + +.. py:module:: tests.system.apache.beam.example_python_async + +.. autoapi-nested-parse:: + + Example Airflow DAG for Apache Beam operators + + + +Module Contents +--------------- + +.. py:data:: start_python_pipeline_local_direct_runner + + + +.. py:data:: test_run diff --git a/providers/apache/beam/docs/_api/tests/system/apache/beam/example_python_dataflow/index.rst b/providers/apache/beam/docs/_api/tests/system/apache/beam/example_python_dataflow/index.rst new file mode 100644 index 0000000000000..bbcf4b3791e13 --- /dev/null +++ b/providers/apache/beam/docs/_api/tests/system/apache/beam/example_python_dataflow/index.rst @@ -0,0 +1,36 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +:py:mod:`tests.system.apache.beam.example_python_dataflow` +========================================================== + +.. py:module:: tests.system.apache.beam.example_python_dataflow + +.. autoapi-nested-parse:: + + Example Airflow DAG for Apache Beam operators + + + +Module Contents +--------------- + +.. py:data:: start_python_job_dataflow_runner_async + + + +.. py:data:: test_run diff --git a/providers/apache/beam/docs/_api/tests/system/apache/beam/index.rst b/providers/apache/beam/docs/_api/tests/system/apache/beam/index.rst new file mode 100644 index 0000000000000..db63523c42e75 --- /dev/null +++ b/providers/apache/beam/docs/_api/tests/system/apache/beam/index.rst @@ -0,0 +1,39 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +:py:mod:`tests.system.apache.beam` +================================== + +.. py:module:: tests.system.apache.beam + + +Submodules +---------- +.. toctree:: + :titlesonly: + :maxdepth: 1 + + example_beam/index.rst + example_beam_java_flink/index.rst + example_beam_java_spark/index.rst + example_go/index.rst + example_go_dataflow/index.rst + example_java_dataflow/index.rst + example_python/index.rst + example_python_async/index.rst + example_python_dataflow/index.rst + utils/index.rst diff --git a/providers/apache/beam/docs/_api/tests/system/apache/beam/utils/index.rst b/providers/apache/beam/docs/_api/tests/system/apache/beam/utils/index.rst new file mode 100644 index 0000000000000..03fb284dd11fc --- /dev/null +++ b/providers/apache/beam/docs/_api/tests/system/apache/beam/utils/index.rst @@ -0,0 +1,136 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +:py:mod:`tests.system.apache.beam.utils` +======================================== + +.. py:module:: tests.system.apache.beam.utils + +.. autoapi-nested-parse:: + + Example Utils for Apache Beam operator example DAG's + + + +Module Contents +--------------- + +.. py:data:: GCP_PROJECT_ID + + + +.. py:data:: GCS_INPUT + + + +.. py:data:: GCS_TMP + + + +.. py:data:: GCS_STAGING + + + +.. py:data:: GCS_OUTPUT + + + +.. py:data:: GCS_PYTHON + + + +.. py:data:: GCS_PYTHON_DATAFLOW_ASYNC + + + +.. py:data:: GCS_GO + + + +.. py:data:: GCS_GO_DATAFLOW_ASYNC + + + +.. py:data:: GCS_JAR_DIRECT_RUNNER + + + +.. py:data:: GCS_JAR_DATAFLOW_RUNNER + + + +.. py:data:: GCS_JAR_SPARK_RUNNER + + + +.. py:data:: GCS_JAR_FLINK_RUNNER + + + +.. py:data:: GCS_JAR_DIRECT_RUNNER_PARTS + + + +.. py:data:: GCS_JAR_DIRECT_RUNNER_BUCKET_NAME + + + +.. py:data:: GCS_JAR_DIRECT_RUNNER_OBJECT_NAME + + + +.. py:data:: GCS_JAR_DATAFLOW_RUNNER_PARTS + + + +.. py:data:: GCS_JAR_DATAFLOW_RUNNER_BUCKET_NAME + + + +.. py:data:: GCS_JAR_DATAFLOW_RUNNER_OBJECT_NAME + + + +.. py:data:: GCS_JAR_SPARK_RUNNER_PARTS + + + +.. py:data:: GCS_JAR_SPARK_RUNNER_BUCKET_NAME + + + +.. py:data:: GCS_JAR_SPARK_RUNNER_OBJECT_NAME + + + +.. py:data:: GCS_JAR_FLINK_RUNNER_PARTS + + + +.. py:data:: GCS_JAR_FLINK_RUNNER_BUCKET_NAME + + + +.. py:data:: GCS_JAR_FLINK_RUNNER_OBJECT_NAME + + + +.. py:data:: DEFAULT_ARGS + + + +.. py:data:: START_DATE diff --git a/providers/src/airflow/providers/apache/beam/CHANGELOG.rst b/providers/apache/beam/docs/changelog.rst similarity index 100% rename from providers/src/airflow/providers/apache/beam/CHANGELOG.rst rename to providers/apache/beam/docs/changelog.rst diff --git a/docs/apache-airflow-providers-apache-beam/commits.rst b/providers/apache/beam/docs/commits.rst similarity index 100% rename from docs/apache-airflow-providers-apache-beam/commits.rst rename to providers/apache/beam/docs/commits.rst diff --git a/docs/apache-airflow-providers-apache-beam/index.rst b/providers/apache/beam/docs/index.rst similarity index 100% rename from docs/apache-airflow-providers-apache-beam/index.rst rename to providers/apache/beam/docs/index.rst diff --git a/docs/apache-airflow-providers-apache-beam/installing-providers-from-sources.rst b/providers/apache/beam/docs/installing-providers-from-sources.rst similarity index 100% rename from docs/apache-airflow-providers-apache-beam/installing-providers-from-sources.rst rename to providers/apache/beam/docs/installing-providers-from-sources.rst diff --git a/docs/apache-airflow-providers-apache-beam/operators.rst b/providers/apache/beam/docs/operators.rst similarity index 87% rename from docs/apache-airflow-providers-apache-beam/operators.rst rename to providers/apache/beam/docs/operators.rst index da4e696d2ba73..3fb4768d7503a 100644 --- a/docs/apache-airflow-providers-apache-beam/operators.rst +++ b/providers/apache/beam/docs/operators.rst @@ -54,13 +54,13 @@ recommend avoiding unless the Dataflow job requires it. Python Pipelines with DirectRunner ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -.. exampleinclude:: /../../providers/tests/system/apache/beam/example_python.py +.. exampleinclude:: /../../providers/apache/beam/tests/system/apache/beam/example_python.py :language: python :dedent: 4 :start-after: [START howto_operator_start_python_direct_runner_pipeline_local_file] :end-before: [END howto_operator_start_python_direct_runner_pipeline_local_file] -.. exampleinclude:: /../../providers/tests/system/apache/beam/example_python.py +.. exampleinclude:: /../../providers/apache/beam/tests/system/apache/beam/example_python.py :language: python :dedent: 4 :start-after: [START howto_operator_start_python_direct_runner_pipeline_gcs_file] @@ -71,13 +71,13 @@ possibility to free up the worker when it knows it has to wait, and hand off the As a result, while it is suspended (deferred), it is not taking up a worker slot and your cluster will have a lot less resources wasted on idle Operators or Sensors: -.. exampleinclude:: /../../providers/tests/system/apache/beam/example_python_async.py +.. exampleinclude:: /../../providers/apache/beam/tests/system/apache/beam/example_python_async.py :language: python :dedent: 4 :start-after: [START howto_operator_start_python_direct_runner_pipeline_local_file_async] :end-before: [END howto_operator_start_python_direct_runner_pipeline_local_file_async] -.. exampleinclude:: /../../providers/tests/system/apache/beam/example_python_async.py +.. exampleinclude:: /../../providers/apache/beam/tests/system/apache/beam/example_python_async.py :language: python :dedent: 4 :start-after: [START howto_operator_start_python_direct_runner_pipeline_gcs_file_async] @@ -86,13 +86,13 @@ lot less resources wasted on idle Operators or Sensors: Python Pipelines with DataflowRunner ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -.. exampleinclude:: /../../providers/tests/system/apache/beam/example_python.py +.. exampleinclude:: /../../providers/apache/beam/tests/system/apache/beam/example_python.py :language: python :dedent: 4 :start-after: [START howto_operator_start_python_dataflow_runner_pipeline_gcs_file] :end-before: [END howto_operator_start_python_dataflow_runner_pipeline_gcs_file] -.. exampleinclude:: /../../providers/tests/system/apache/beam/example_python_dataflow.py +.. exampleinclude:: /../../providers/apache/beam/tests/system/apache/beam/example_python_dataflow.py :language: python :dedent: 4 :start-after: [START howto_operator_start_python_dataflow_runner_pipeline_async_gcs_file] @@ -104,7 +104,7 @@ possibility to free up the worker when it knows it has to wait, and hand off the As a result, while it is suspended (deferred), it is not taking up a worker slot and your cluster will have a lot less resources wasted on idle Operators or Sensors: -.. exampleinclude:: /../../providers/tests/system/apache/beam/example_python_async.py +.. exampleinclude:: /../../providers/apache/beam/tests/system/apache/beam/example_python_async.py :language: python :dedent: 4 :start-after: [START howto_operator_start_python_dataflow_runner_pipeline_gcs_file_async] @@ -126,7 +126,7 @@ has the ability to download or available on the local filesystem (provide the ab Java Pipelines with DirectRunner ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -.. exampleinclude:: /../../providers/tests/system/apache/beam/example_beam.py +.. exampleinclude:: /../../providers/apache/beam/tests/system/apache/beam/example_beam.py :language: python :dedent: 4 :start-after: [START howto_operator_start_java_direct_runner_pipeline] @@ -135,7 +135,7 @@ Java Pipelines with DirectRunner Java Pipelines with DataflowRunner ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -.. exampleinclude:: /../../providers/tests/system/apache/beam/example_java_dataflow.py +.. exampleinclude:: /../../providers/apache/beam/tests/system/apache/beam/example_java_dataflow.py :language: python :dedent: 4 :start-after: [START howto_operator_start_java_dataflow_runner_pipeline] @@ -159,13 +159,13 @@ init the module and install dependencies with ``go run init example.com/main`` a Go Pipelines with DirectRunner ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -.. exampleinclude:: /../../providers/tests/system/apache/beam/example_go.py +.. exampleinclude:: /../../providers/apache/beam/tests/system/apache/beam/example_go.py :language: python :dedent: 4 :start-after: [START howto_operator_start_go_direct_runner_pipeline_local_file] :end-before: [END howto_operator_start_go_direct_runner_pipeline_local_file] -.. exampleinclude:: /../../providers/tests/system/apache/beam/example_go.py +.. exampleinclude:: /../../providers/apache/beam/tests/system/apache/beam/example_go.py :language: python :dedent: 4 :start-after: [START howto_operator_start_go_direct_runner_pipeline_gcs_file] @@ -174,13 +174,13 @@ Go Pipelines with DirectRunner Go Pipelines with DataflowRunner ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -.. exampleinclude:: /../../providers/tests/system/apache/beam/example_go.py +.. exampleinclude:: /../../providers/apache/beam/tests/system/apache/beam/example_go.py :language: python :dedent: 4 :start-after: [START howto_operator_start_go_dataflow_runner_pipeline_gcs_file] :end-before: [END howto_operator_start_go_dataflow_runner_pipeline_gcs_file] -.. exampleinclude:: /../../providers/tests/system/apache/beam/example_go_dataflow.py +.. exampleinclude:: /../../providers/apache/beam/tests/system/apache/beam/example_go_dataflow.py :language: python :dedent: 4 :start-after: [START howto_operator_start_go_dataflow_runner_pipeline_async_gcs_file] diff --git a/docs/apache-airflow-providers-apache-beam/security.rst b/providers/apache/beam/docs/security.rst similarity index 100% rename from docs/apache-airflow-providers-apache-beam/security.rst rename to providers/apache/beam/docs/security.rst diff --git a/providers/src/airflow/providers/apache/beam/provider.yaml b/providers/apache/beam/provider.yaml similarity index 90% rename from providers/src/airflow/providers/apache/beam/provider.yaml rename to providers/apache/beam/provider.yaml index b043512de7902..c66b362c7cf98 100644 --- a/providers/src/airflow/providers/apache/beam/provider.yaml +++ b/providers/apache/beam/provider.yaml @@ -63,17 +63,6 @@ versions: - 1.0.1 - 1.0.0 -dependencies: - - apache-airflow>=2.9.0 - # Apache Beam > 2.53.0 and pyarrow > 14.0.1 fix https://nvd.nist.gov/vuln/detail/CVE-2023-47248. - - apache-beam>=2.53.0 - - pyarrow>=14.0.1 - -additional-extras: - - name: google - dependencies: - - apache-beam[gcp] - # Apache Beam currently does not support Python 3.12 # There is an issue tracking it https://github.com/apache/beam/issues/29149. # While the original issue above is closed, Apache Beam still does not support Python 3.12 diff --git a/providers/apache/beam/pyproject.toml b/providers/apache/beam/pyproject.toml new file mode 100644 index 0000000000000..4a0c521f2b386 --- /dev/null +++ b/providers/apache/beam/pyproject.toml @@ -0,0 +1,89 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! + +# IF YOU WANT TO MODIFY THIS FILE EXCEPT DEPENDENCIES, YOU SHOULD MODIFY THE TEMPLATE +# `pyproject_TEMPLATE.toml.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY +[build-system] +requires = ["flit_core==3.10.1"] +build-backend = "flit_core.buildapi" + +[project] +name = "apache-airflow-providers-apache-beam" +version = "6.0.0" +description = "Provider package apache-airflow-providers-apache-beam for Apache Airflow" +readme = "README.rst" +authors = [ + {name="Apache Software Foundation", email="dev@airflow.apache.org"}, +] +maintainers = [ + {name="Apache Software Foundation", email="dev@airflow.apache.org"}, +] +keywords = [ "airflow-provider", "apache.beam", "airflow", "integration" ] +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Environment :: Console", + "Environment :: Web Environment", + "Intended Audience :: Developers", + "Intended Audience :: System Administrators", + "Framework :: Apache Airflow", + "Framework :: Apache Airflow :: Provider", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Topic :: System :: Monitoring", +] +requires-python = "~=3.9" + +# The dependencies should be modified in place in the generated file +# Any change in the dependencies is preserved when the file is regenerated +dependencies = [ + "apache-airflow>=2.9.0", + # Apache Beam > 2.53.0 and pyarrow > 14.0.1 fix https://nvd.nist.gov/vuln/detail/CVE-2023-47248. + "apache-beam>=2.53.0", + "pyarrow>=14.0.1", +] + +# The optional dependencies should be modified in place in the generated file +# Any change in the dependencies is preserved when the file is regenerated +[project.optional-dependencies] +"google" = [ + "apache-beam[gcp]", +] +"common.compat" = [ + "apache-airflow-providers-common-compat" +] + +[project.urls] +"Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-apache-beam/6.0.0" +"Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-apache-beam/6.0.0/changelog.html" +"Bug Tracker" = "https://github.com/apache/airflow/issues" +"Source Code" = "https://github.com/apache/airflow" +"Slack Chat" = "https://s.apache.org/airflow-slack" +"Twitter" = "https://x.com/ApacheAirflow" +"YouTube" = "https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/" + +[project.entry-points."apache_airflow_provider"] +provider_info = "airflow.providers.apache.beam.get_provider_info:get_provider_info" + +[tool.flit.module] +name = "airflow.providers.apache.beam" + +[tool.pytest.ini_options] +ignore = "tests/system/" diff --git a/providers/apache/beam/src/airflow/providers/apache/beam/LICENSE b/providers/apache/beam/src/airflow/providers/apache/beam/LICENSE new file mode 100644 index 0000000000000..11069edd79019 --- /dev/null +++ b/providers/apache/beam/src/airflow/providers/apache/beam/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/providers/src/airflow/providers/apache/beam/README.md b/providers/apache/beam/src/airflow/providers/apache/beam/README.md similarity index 100% rename from providers/src/airflow/providers/apache/beam/README.md rename to providers/apache/beam/src/airflow/providers/apache/beam/README.md diff --git a/providers/src/airflow/providers/apache/beam/__init__.py b/providers/apache/beam/src/airflow/providers/apache/beam/__init__.py similarity index 100% rename from providers/src/airflow/providers/apache/beam/__init__.py rename to providers/apache/beam/src/airflow/providers/apache/beam/__init__.py diff --git a/providers/apache/beam/src/airflow/providers/apache/beam/get_provider_info.py b/providers/apache/beam/src/airflow/providers/apache/beam/get_provider_info.py new file mode 100644 index 0000000000000..7c5726eac722b --- /dev/null +++ b/providers/apache/beam/src/airflow/providers/apache/beam/get_provider_info.py @@ -0,0 +1,102 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! +# +# IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE +# `get_provider_info_TEMPLATE.py.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY + + +def get_provider_info(): + return { + "package-name": "apache-airflow-providers-apache-beam", + "name": "Apache Beam", + "description": "`Apache Beam `__.\n", + "state": "ready", + "source-date-epoch": 1734527138, + "versions": [ + "6.0.0", + "5.9.1", + "5.9.0", + "5.8.1", + "5.8.0", + "5.7.2", + "5.7.1", + "5.7.0", + "5.6.3", + "5.6.2", + "5.6.1", + "5.6.0", + "5.5.0", + "5.4.0", + "5.3.0", + "5.2.3", + "5.2.2", + "5.2.1", + "5.2.0", + "5.1.1", + "5.1.0", + "5.0.0", + "4.3.0", + "4.2.0", + "4.1.1", + "4.1.0", + "4.0.0", + "3.4.0", + "3.3.0", + "3.2.1", + "3.2.0", + "3.1.0", + "3.0.1", + "3.0.0", + "2.0.0", + "1.0.1", + "1.0.0", + ], + "excluded-python-versions": ["3.12"], + "integrations": [ + { + "integration-name": "Apache Beam", + "external-doc-url": "https://beam.apache.org/", + "how-to-guide": ["/docs/apache-airflow-providers-apache-beam/operators.rst"], + "tags": ["apache"], + } + ], + "operators": [ + { + "integration-name": "Apache Beam", + "python-modules": ["airflow.providers.apache.beam.operators.beam"], + } + ], + "hooks": [ + { + "integration-name": "Apache Beam", + "python-modules": ["airflow.providers.apache.beam.hooks.beam"], + } + ], + "triggers": [ + { + "integration-name": "Apache Beam", + "python-modules": ["airflow.providers.apache.beam.triggers.beam"], + } + ], + "dependencies": ["apache-airflow>=2.9.0", "apache-beam>=2.53.0", "pyarrow>=14.0.1"], + "optional-dependencies": { + "google": ["apache-beam[gcp]"], + "common.compat": ["apache-airflow-providers-common-compat"], + }, + } diff --git a/providers/src/airflow/providers/apache/beam/hooks/__init__.py b/providers/apache/beam/src/airflow/providers/apache/beam/hooks/__init__.py similarity index 100% rename from providers/src/airflow/providers/apache/beam/hooks/__init__.py rename to providers/apache/beam/src/airflow/providers/apache/beam/hooks/__init__.py diff --git a/providers/src/airflow/providers/apache/beam/hooks/beam.py b/providers/apache/beam/src/airflow/providers/apache/beam/hooks/beam.py similarity index 100% rename from providers/src/airflow/providers/apache/beam/hooks/beam.py rename to providers/apache/beam/src/airflow/providers/apache/beam/hooks/beam.py diff --git a/providers/src/airflow/providers/apache/beam/operators/__init__.py b/providers/apache/beam/src/airflow/providers/apache/beam/operators/__init__.py similarity index 100% rename from providers/src/airflow/providers/apache/beam/operators/__init__.py rename to providers/apache/beam/src/airflow/providers/apache/beam/operators/__init__.py diff --git a/providers/src/airflow/providers/apache/beam/operators/beam.py b/providers/apache/beam/src/airflow/providers/apache/beam/operators/beam.py similarity index 100% rename from providers/src/airflow/providers/apache/beam/operators/beam.py rename to providers/apache/beam/src/airflow/providers/apache/beam/operators/beam.py diff --git a/providers/src/airflow/providers/apache/beam/triggers/__init__.py b/providers/apache/beam/src/airflow/providers/apache/beam/triggers/__init__.py similarity index 100% rename from providers/src/airflow/providers/apache/beam/triggers/__init__.py rename to providers/apache/beam/src/airflow/providers/apache/beam/triggers/__init__.py diff --git a/providers/src/airflow/providers/apache/beam/triggers/beam.py b/providers/apache/beam/src/airflow/providers/apache/beam/triggers/beam.py similarity index 100% rename from providers/src/airflow/providers/apache/beam/triggers/beam.py rename to providers/apache/beam/src/airflow/providers/apache/beam/triggers/beam.py diff --git a/providers/apache/beam/tests/conftest.py b/providers/apache/beam/tests/conftest.py new file mode 100644 index 0000000000000..068fe6bbf5ae9 --- /dev/null +++ b/providers/apache/beam/tests/conftest.py @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import pathlib + +import pytest + +pytest_plugins = "tests_common.pytest_plugin" + + +@pytest.hookimpl(tryfirst=True) +def pytest_configure(config: pytest.Config) -> None: + deprecations_ignore_path = pathlib.Path(__file__).parent.joinpath("deprecations_ignore.yml") + dep_path = [deprecations_ignore_path] if deprecations_ignore_path.exists() else [] + config.inicfg["airflow_deprecations_ignore"] = ( + config.inicfg.get("airflow_deprecations_ignore", []) + dep_path # type: ignore[assignment,operator] + ) diff --git a/providers/apache/beam/tests/provider_tests/__init__.py b/providers/apache/beam/tests/provider_tests/__init__.py new file mode 100644 index 0000000000000..e8fd22856438c --- /dev/null +++ b/providers/apache/beam/tests/provider_tests/__init__.py @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +__path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore diff --git a/providers/apache/beam/tests/provider_tests/apache/__init__.py b/providers/apache/beam/tests/provider_tests/apache/__init__.py new file mode 100644 index 0000000000000..e8fd22856438c --- /dev/null +++ b/providers/apache/beam/tests/provider_tests/apache/__init__.py @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +__path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore diff --git a/providers/tests/apache/beam/__init__.py b/providers/apache/beam/tests/provider_tests/apache/beam/__init__.py similarity index 100% rename from providers/tests/apache/beam/__init__.py rename to providers/apache/beam/tests/provider_tests/apache/beam/__init__.py diff --git a/providers/tests/apache/beam/hooks/__init__.py b/providers/apache/beam/tests/provider_tests/apache/beam/hooks/__init__.py similarity index 100% rename from providers/tests/apache/beam/hooks/__init__.py rename to providers/apache/beam/tests/provider_tests/apache/beam/hooks/__init__.py diff --git a/providers/tests/apache/beam/hooks/test_beam.py b/providers/apache/beam/tests/provider_tests/apache/beam/hooks/test_beam.py similarity index 100% rename from providers/tests/apache/beam/hooks/test_beam.py rename to providers/apache/beam/tests/provider_tests/apache/beam/hooks/test_beam.py diff --git a/providers/tests/apache/beam/operators/__init__.py b/providers/apache/beam/tests/provider_tests/apache/beam/operators/__init__.py similarity index 100% rename from providers/tests/apache/beam/operators/__init__.py rename to providers/apache/beam/tests/provider_tests/apache/beam/operators/__init__.py diff --git a/providers/tests/apache/beam/operators/test_beam.py b/providers/apache/beam/tests/provider_tests/apache/beam/operators/test_beam.py similarity index 100% rename from providers/tests/apache/beam/operators/test_beam.py rename to providers/apache/beam/tests/provider_tests/apache/beam/operators/test_beam.py diff --git a/providers/tests/apache/beam/triggers/__init__.py b/providers/apache/beam/tests/provider_tests/apache/beam/triggers/__init__.py similarity index 100% rename from providers/tests/apache/beam/triggers/__init__.py rename to providers/apache/beam/tests/provider_tests/apache/beam/triggers/__init__.py diff --git a/providers/tests/apache/beam/triggers/test_beam.py b/providers/apache/beam/tests/provider_tests/apache/beam/triggers/test_beam.py similarity index 100% rename from providers/tests/apache/beam/triggers/test_beam.py rename to providers/apache/beam/tests/provider_tests/apache/beam/triggers/test_beam.py diff --git a/providers/tests/system/apache/beam/__init__.py b/providers/apache/beam/tests/system/apache/beam/__init__.py similarity index 100% rename from providers/tests/system/apache/beam/__init__.py rename to providers/apache/beam/tests/system/apache/beam/__init__.py diff --git a/providers/tests/system/apache/beam/example_beam.py b/providers/apache/beam/tests/system/apache/beam/example_beam.py similarity index 100% rename from providers/tests/system/apache/beam/example_beam.py rename to providers/apache/beam/tests/system/apache/beam/example_beam.py diff --git a/providers/tests/system/apache/beam/example_beam_java_flink.py b/providers/apache/beam/tests/system/apache/beam/example_beam_java_flink.py similarity index 100% rename from providers/tests/system/apache/beam/example_beam_java_flink.py rename to providers/apache/beam/tests/system/apache/beam/example_beam_java_flink.py diff --git a/providers/tests/system/apache/beam/example_beam_java_spark.py b/providers/apache/beam/tests/system/apache/beam/example_beam_java_spark.py similarity index 100% rename from providers/tests/system/apache/beam/example_beam_java_spark.py rename to providers/apache/beam/tests/system/apache/beam/example_beam_java_spark.py diff --git a/providers/tests/system/apache/beam/example_go.py b/providers/apache/beam/tests/system/apache/beam/example_go.py similarity index 100% rename from providers/tests/system/apache/beam/example_go.py rename to providers/apache/beam/tests/system/apache/beam/example_go.py diff --git a/providers/tests/system/apache/beam/example_go_dataflow.py b/providers/apache/beam/tests/system/apache/beam/example_go_dataflow.py similarity index 100% rename from providers/tests/system/apache/beam/example_go_dataflow.py rename to providers/apache/beam/tests/system/apache/beam/example_go_dataflow.py diff --git a/providers/tests/system/apache/beam/example_java_dataflow.py b/providers/apache/beam/tests/system/apache/beam/example_java_dataflow.py similarity index 100% rename from providers/tests/system/apache/beam/example_java_dataflow.py rename to providers/apache/beam/tests/system/apache/beam/example_java_dataflow.py diff --git a/providers/tests/system/apache/beam/example_python.py b/providers/apache/beam/tests/system/apache/beam/example_python.py similarity index 100% rename from providers/tests/system/apache/beam/example_python.py rename to providers/apache/beam/tests/system/apache/beam/example_python.py diff --git a/providers/tests/system/apache/beam/example_python_async.py b/providers/apache/beam/tests/system/apache/beam/example_python_async.py similarity index 100% rename from providers/tests/system/apache/beam/example_python_async.py rename to providers/apache/beam/tests/system/apache/beam/example_python_async.py diff --git a/providers/tests/system/apache/beam/example_python_dataflow.py b/providers/apache/beam/tests/system/apache/beam/example_python_dataflow.py similarity index 100% rename from providers/tests/system/apache/beam/example_python_dataflow.py rename to providers/apache/beam/tests/system/apache/beam/example_python_dataflow.py diff --git a/providers/tests/system/apache/beam/utils.py b/providers/apache/beam/tests/system/apache/beam/utils.py similarity index 100% rename from providers/tests/system/apache/beam/utils.py rename to providers/apache/beam/tests/system/apache/beam/utils.py diff --git a/pyproject.toml b/pyproject.toml index e67eb509425b4..0f6999bfd8f42 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -616,6 +616,7 @@ dev = [ "local-providers", "apache-airflow-providers-airbyte", "apache-airflow-providers-alibaba", + "apache-airflow-providers-apache-beam", "apache-airflow-providers-apache-iceberg", "apache-airflow-providers-celery", "apache-airflow-providers-cohere", @@ -633,6 +634,7 @@ dev = [ local-providers = { workspace = true } apache-airflow-providers-airbyte = {workspace = true} apache-airflow-providers-alibaba = { workspace = true } +apache-airflow-providers-apache-beam = { workspace = true } apache-airflow-providers-apache-iceberg = {workspace = true} apache-airflow-providers-celery = {workspace = true} apache-airflow-providers-cohere = { workspace = true } @@ -648,6 +650,7 @@ members = [ "providers", "providers/airbyte", "providers/alibaba", + "providers/apache/beam", "providers/apache/iceberg", "providers/celery", "providers/cohere", diff --git a/scripts/ci/docker-compose/remove-sources.yml b/scripts/ci/docker-compose/remove-sources.yml index 35154c3caf60c..60a967cd08762 100644 --- a/scripts/ci/docker-compose/remove-sources.yml +++ b/scripts/ci/docker-compose/remove-sources.yml @@ -34,6 +34,7 @@ services: # START automatically generated volumes by generate-volumes-for-sources pre-commit - ../../../empty:/opt/airflow/providers/airbyte/src - ../../../empty:/opt/airflow/providers/alibaba/src + - ../../../empty:/opt/airflow/providers/apache/beam/src - ../../../empty:/opt/airflow/providers/apache/iceberg/src - ../../../empty:/opt/airflow/providers/celery/src - ../../../empty:/opt/airflow/providers/cohere/src diff --git a/scripts/ci/docker-compose/tests-sources.yml b/scripts/ci/docker-compose/tests-sources.yml index a5947d816c5ca..a58ade7b9166d 100644 --- a/scripts/ci/docker-compose/tests-sources.yml +++ b/scripts/ci/docker-compose/tests-sources.yml @@ -41,6 +41,7 @@ services: # START automatically generated volumes by generate-volumes-for-sources pre-commit - ../../../providers/airbyte/tests:/opt/airflow/providers/airbyte/tests - ../../../providers/alibaba/tests:/opt/airflow/providers/alibaba/tests + - ../../../providers/apache/beam/tests:/opt/airflow/providers/apache/beam/tests - ../../../providers/apache/iceberg/tests:/opt/airflow/providers/apache/iceberg/tests - ../../../providers/celery/tests:/opt/airflow/providers/celery/tests - ../../../providers/cohere/tests:/opt/airflow/providers/cohere/tests From 5045694bf084336a2e6875ba9b78d9acd46a98ec Mon Sep 17 00:00:00 2001 From: Kunal Bhattacharya Date: Sun, 26 Jan 2025 20:14:22 +0530 Subject: [PATCH 02/14] Static check fixes --- generated/provider_dependencies.json | 4 +--- providers/apache/beam/README.rst | 2 +- .../airflow/providers/apache/beam/triggers/beam/index.rst | 2 +- providers/apache/beam/provider.yaml | 7 ------- providers/apache/beam/pyproject.toml | 1 + .../src/airflow/providers/apache/beam/get_provider_info.py | 1 - 6 files changed, 4 insertions(+), 13 deletions(-) diff --git a/generated/provider_dependencies.json b/generated/provider_dependencies.json index 7ae6ab718abb1..cff8f4847be19 100644 --- a/generated/provider_dependencies.json +++ b/generated/provider_dependencies.json @@ -84,9 +84,7 @@ "common.compat", "google" ], - "excluded-python-versions": [ - "3.12" - ], + "excluded-python-versions": [], "state": "ready" }, "apache.cassandra": { diff --git a/providers/apache/beam/README.rst b/providers/apache/beam/README.rst index 6de998e174c45..30a339f02b55a 100644 --- a/providers/apache/beam/README.rst +++ b/providers/apache/beam/README.rst @@ -46,7 +46,7 @@ You can install this package on top of an existing Airflow 2 installation (see ` for the minimum Airflow version supported) via ``pip install apache-airflow-providers-apache-beam`` -The package supports the following python versions: 3.9,3.10,3.11 +The package supports the following python versions: 3.9,3.10,3.11,3.12 Requirements ------------ diff --git a/providers/apache/beam/docs/_api/airflow/providers/apache/beam/triggers/beam/index.rst b/providers/apache/beam/docs/_api/airflow/providers/apache/beam/triggers/beam/index.rst index fade25e4af7e7..e2e09212bc0c1 100644 --- a/providers/apache/beam/docs/_api/airflow/providers/apache/beam/triggers/beam/index.rst +++ b/providers/apache/beam/docs/_api/airflow/providers/apache/beam/triggers/beam/index.rst @@ -54,7 +54,7 @@ Classes :param variables: Variables passed to the pipeline. :param py_file: Path to the python file to execute. :param py_options: Additional options. - :param py_interpreter: Python version of the Apache Beam pipeline. If `None`, this defaults to the + :param py_interpreter: Python version of the Apache Beam pipeline. If ``None``, this defaults to the python3. To track python versions supported by beam and related issues check: https://issues.apache.org/jira/browse/BEAM-1251 :param py_requirements: Additional python package(s) to install. diff --git a/providers/apache/beam/provider.yaml b/providers/apache/beam/provider.yaml index c66b362c7cf98..bd0823f9ad6da 100644 --- a/providers/apache/beam/provider.yaml +++ b/providers/apache/beam/provider.yaml @@ -63,13 +63,6 @@ versions: - 1.0.1 - 1.0.0 -# Apache Beam currently does not support Python 3.12 -# There is an issue tracking it https://github.com/apache/beam/issues/29149. -# While the original issue above is closed, Apache Beam still does not support Python 3.12 -# because the dill version used by them break our PythonVirtualenvOperator when dill is enabled -# See https://github.com/apache/beam/issues/32617 -excluded-python-versions: ['3.12'] - integrations: - integration-name: Apache Beam external-doc-url: https://beam.apache.org/ diff --git a/providers/apache/beam/pyproject.toml b/providers/apache/beam/pyproject.toml index 4a0c521f2b386..ac5fb3bee884a 100644 --- a/providers/apache/beam/pyproject.toml +++ b/providers/apache/beam/pyproject.toml @@ -47,6 +47,7 @@ classifiers = [ "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", "Topic :: System :: Monitoring", ] requires-python = "~=3.9" diff --git a/providers/apache/beam/src/airflow/providers/apache/beam/get_provider_info.py b/providers/apache/beam/src/airflow/providers/apache/beam/get_provider_info.py index 7c5726eac722b..cb39402c5067b 100644 --- a/providers/apache/beam/src/airflow/providers/apache/beam/get_provider_info.py +++ b/providers/apache/beam/src/airflow/providers/apache/beam/get_provider_info.py @@ -67,7 +67,6 @@ def get_provider_info(): "1.0.1", "1.0.0", ], - "excluded-python-versions": ["3.12"], "integrations": [ { "integration-name": "Apache Beam", From 8e50f6b38403bbeaf8501dc85ee1e070e7ede075 Mon Sep 17 00:00:00 2001 From: Kunal Bhattacharya Date: Sun, 26 Jan 2025 20:49:30 +0530 Subject: [PATCH 03/14] Remove apache-beam exclusion from selective checks --- dev/breeze/tests/test_selective_checks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/breeze/tests/test_selective_checks.py b/dev/breeze/tests/test_selective_checks.py index 7e90ac7607742..5c4f8a4ab6bc9 100644 --- a/dev/breeze/tests/test_selective_checks.py +++ b/dev/breeze/tests/test_selective_checks.py @@ -1030,7 +1030,7 @@ def test_excluded_providers(): ) assert_outputs_are_printed( { - "excluded-providers-as-string": json.dumps({"3.9": ["cloudant"], "3.12": ["apache.beam"]}), + "excluded-providers-as-string": json.dumps({"3.9": ["cloudant"]}), }, str(stderr), ) From 5d5d421b433b9523b3ecf93451b660a0c1ef4204 Mon Sep 17 00:00:00 2001 From: Kunal Bhattacharya Date: Sun, 26 Jan 2025 21:05:37 +0530 Subject: [PATCH 04/14] Remove apache-beam from breeze selective checks --- dev/breeze/tests/test_selective_checks.py | 26 +++++++++++------------ 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/dev/breeze/tests/test_selective_checks.py b/dev/breeze/tests/test_selective_checks.py index 5c4f8a4ab6bc9..5b8dcfa390aa1 100644 --- a/dev/breeze/tests/test_selective_checks.py +++ b/dev/breeze/tests/test_selective_checks.py @@ -343,7 +343,7 @@ def assert_outputs_are_printed(expected_outputs: dict[str, str], stderr: str): pytest.param( ("providers/tests/apache/beam/file.py",), { - "selected-providers-list-as-string": "apache.beam common.compat google", + "selected-providers-list-as-string": "common.compat google", "all-python-versions": "['3.9']", "all-python-versions-list-as-string": "3.9", "python-versions": "['3.9']", @@ -361,8 +361,8 @@ def assert_outputs_are_printed(expected_outputs: dict[str, str], stderr: str): "run-kubernetes-tests": "false", "upgrade-to-newer-dependencies": "false", "core-test-types-list-as-string": "Always", - "providers-test-types-list-as-string": "Providers[apache.beam,common.compat] Providers[google]", - "individual-providers-test-types-list-as-string": "Providers[apache.beam] Providers[common.compat] Providers[google]", + "providers-test-types-list-as-string": "Providers[common.compat] Providers[google]", + "individual-providers-test-types-list-as-string": "Providers[common.compat] Providers[google]", "needs-mypy": "true", "mypy-checks": "['mypy-providers']", }, @@ -373,7 +373,7 @@ def assert_outputs_are_printed(expected_outputs: dict[str, str], stderr: str): pytest.param( ("providers/tests/system/apache/beam/file.py",), { - "selected-providers-list-as-string": "apache.beam common.compat google", + "selected-providers-list-as-string": "common.compat google", "all-python-versions": "['3.9']", "all-python-versions-list-as-string": "3.9", "python-versions": "['3.9']", @@ -391,8 +391,8 @@ def assert_outputs_are_printed(expected_outputs: dict[str, str], stderr: str): "run-kubernetes-tests": "false", "upgrade-to-newer-dependencies": "false", "core-test-types-list-as-string": "Always", - "providers-test-types-list-as-string": "Providers[apache.beam,common.compat] Providers[google]", - "individual-providers-test-types-list-as-string": "Providers[apache.beam] Providers[common.compat] Providers[google]", + "providers-test-types-list-as-string": "Providers[common.compat] Providers[google]", + "individual-providers-test-types-list-as-string": "Providers[common.compat] Providers[google]", "needs-mypy": "true", "mypy-checks": "['mypy-providers']", "skip-providers-tests": "false", @@ -404,7 +404,7 @@ def assert_outputs_are_printed(expected_outputs: dict[str, str], stderr: str): pytest.param( ("providers/tests/system/apache/beam/file.py", "providers/tests/apache/beam/file.py"), { - "selected-providers-list-as-string": "apache.beam common.compat google", + "selected-providers-list-as-string": "common.compat google", "all-python-versions": "['3.9']", "all-python-versions-list-as-string": "3.9", "python-versions": "['3.9']", @@ -422,8 +422,8 @@ def assert_outputs_are_printed(expected_outputs: dict[str, str], stderr: str): "run-kubernetes-tests": "false", "upgrade-to-newer-dependencies": "false", "core-test-types-list-as-string": "Always", - "providers-test-types-list-as-string": "Providers[apache.beam,common.compat] Providers[google]", - "individual-providers-test-types-list-as-string": "Providers[apache.beam] Providers[common.compat] Providers[google]", + "providers-test-types-list-as-string": "Providers[common.compat] Providers[google]", + "individual-providers-test-types-list-as-string": "Providers[common.compat] Providers[google]", "needs-mypy": "true", "mypy-checks": "['mypy-providers']", "skip-providers-tests": "false", @@ -434,13 +434,11 @@ def assert_outputs_are_printed(expected_outputs: dict[str, str], stderr: str): ( pytest.param( ( - "providers/tests/system/apache/beam/file.py", - "providers/tests/apache/beam/file.py", "providers/tests/system/zendesk/file.py", "providers/tests/zendesk/file.py", ), { - "selected-providers-list-as-string": "apache.beam common.compat google zendesk", + "selected-providers-list-as-string": "common.compat google zendesk", "all-python-versions": "['3.9']", "all-python-versions-list-as-string": "3.9", "python-versions": "['3.9']", @@ -458,8 +456,8 @@ def assert_outputs_are_printed(expected_outputs: dict[str, str], stderr: str): "run-kubernetes-tests": "false", "upgrade-to-newer-dependencies": "false", "core-test-types-list-as-string": "Always", - "providers-test-types-list-as-string": "Providers[apache.beam,common.compat,zendesk] Providers[google]", - "individual-providers-test-types-list-as-string": "Providers[apache.beam] Providers[common.compat] Providers[google] Providers[zendesk]", + "providers-test-types-list-as-string": "Providers[common.compat,zendesk] Providers[google]", + "individual-providers-test-types-list-as-string": "Providers[common.compat] Providers[google] Providers[zendesk]", "needs-mypy": "true", "mypy-checks": "['mypy-providers']", "skip-providers-tests": "false", From 6baf96cceafd561790cde4ad4b5c00c9fd6c216d Mon Sep 17 00:00:00 2001 From: Kunal Bhattacharya Date: Sun, 26 Jan 2025 21:21:02 +0530 Subject: [PATCH 05/14] Breeze selective checks fix --- dev/breeze/tests/test_selective_checks.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dev/breeze/tests/test_selective_checks.py b/dev/breeze/tests/test_selective_checks.py index 5deb6b9de9cec..44eb6f44e8613 100644 --- a/dev/breeze/tests/test_selective_checks.py +++ b/dev/breeze/tests/test_selective_checks.py @@ -341,6 +341,7 @@ def assert_outputs_are_printed(expected_outputs: dict[str, str], stderr: str): ), ( pytest.param( + (), { "selected-providers-list-as-string": "common.compat google", "all-python-versions": "['3.9']", @@ -370,6 +371,7 @@ def assert_outputs_are_printed(expected_outputs: dict[str, str], stderr: str): ), ( pytest.param( + (), { "selected-providers-list-as-string": "common.compat google", "all-python-versions": "['3.9']", From ae5772b776e189a7d90c2bd6613776bd5816d4c7 Mon Sep 17 00:00:00 2001 From: Kunal Bhattacharya Date: Sun, 26 Jan 2025 21:23:03 +0530 Subject: [PATCH 06/14] Breeze selective checks fix --- dev/breeze/tests/test_selective_checks.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dev/breeze/tests/test_selective_checks.py b/dev/breeze/tests/test_selective_checks.py index 44eb6f44e8613..e06ac7547b0b4 100644 --- a/dev/breeze/tests/test_selective_checks.py +++ b/dev/breeze/tests/test_selective_checks.py @@ -402,6 +402,7 @@ def assert_outputs_are_printed(expected_outputs: dict[str, str], stderr: str): ), ( pytest.param( + (), { "selected-providers-list-as-string": "common.compat google", "all-python-versions": "['3.9']", @@ -432,6 +433,7 @@ def assert_outputs_are_printed(expected_outputs: dict[str, str], stderr: str): ), ( pytest.param( + (), { "selected-providers-list-as-string": "common.compat google", "all-python-versions": "['3.9']", From 660b6bf8d560e8fe0f3da8dbbb9f2f26aaea9531 Mon Sep 17 00:00:00 2001 From: Kunal Bhattacharya Date: Sun, 26 Jan 2025 21:42:55 +0530 Subject: [PATCH 07/14] Breeze selective checks fix --- dev/breeze/tests/test_selective_checks.py | 31 +++++++++++++---------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/dev/breeze/tests/test_selective_checks.py b/dev/breeze/tests/test_selective_checks.py index e06ac7547b0b4..fd69e5ec77d2b 100644 --- a/dev/breeze/tests/test_selective_checks.py +++ b/dev/breeze/tests/test_selective_checks.py @@ -341,9 +341,9 @@ def assert_outputs_are_printed(expected_outputs: dict[str, str], stderr: str): ), ( pytest.param( - (), + ("providers/apache/beam/tests/file.py",), { - "selected-providers-list-as-string": "common.compat google", + "selected-providers-list-as-string": "apache.beam common.compat google", "all-python-versions": "['3.9']", "all-python-versions-list-as-string": "3.9", "python-versions": "['3.9']", @@ -371,9 +371,9 @@ def assert_outputs_are_printed(expected_outputs: dict[str, str], stderr: str): ), ( pytest.param( - (), + ("providers/apache/beam/tests/file.py",), { - "selected-providers-list-as-string": "common.compat google", + "selected-providers-list-as-string": "apache.beam common.compat google", "all-python-versions": "['3.9']", "all-python-versions-list-as-string": "3.9", "python-versions": "['3.9']", @@ -391,8 +391,8 @@ def assert_outputs_are_printed(expected_outputs: dict[str, str], stderr: str): "run-kubernetes-tests": "false", "upgrade-to-newer-dependencies": "false", "core-test-types-list-as-string": "Always", - "providers-test-types-list-as-string": "Providers[common.compat] Providers[google]", - "individual-providers-test-types-list-as-string": "Providers[common.compat] Providers[google]", + "providers-test-types-list-as-string": "Providers[apache.beam,common.compat] Providers[google]", + "individual-providers-test-types-list-as-string": "Providers[apache.beam] Providers[common.compat] Providers[google]", "needs-mypy": "true", "mypy-checks": "['mypy-providers']", "skip-providers-tests": "false", @@ -402,9 +402,9 @@ def assert_outputs_are_printed(expected_outputs: dict[str, str], stderr: str): ), ( pytest.param( - (), + ("providers/system/apache/beam/tests/file.py", "providers/apache/beam/tests/file.py"), { - "selected-providers-list-as-string": "common.compat google", + "selected-providers-list-as-string": "apache.beam common.compat google", "all-python-versions": "['3.9']", "all-python-versions-list-as-string": "3.9", "python-versions": "['3.9']", @@ -422,8 +422,8 @@ def assert_outputs_are_printed(expected_outputs: dict[str, str], stderr: str): "run-kubernetes-tests": "false", "upgrade-to-newer-dependencies": "false", "core-test-types-list-as-string": "Always", - "providers-test-types-list-as-string": "Providers[common.compat] Providers[google]", - "individual-providers-test-types-list-as-string": "Providers[common.compat] Providers[google]", + "providers-test-types-list-as-string": "Providers[apache.beam common.compat] Providers[google]", + "individual-providers-test-types-list-as-string": "Providers[apache.beam] Providers[common.compat] Providers[google]", "needs-mypy": "true", "mypy-checks": "['mypy-providers']", "skip-providers-tests": "false", @@ -433,9 +433,12 @@ def assert_outputs_are_printed(expected_outputs: dict[str, str], stderr: str): ), ( pytest.param( - (), + ( + "providers/system/apache/beam/tests/file.py", + "providers/apache/beam/tests/file.py", + ), { - "selected-providers-list-as-string": "common.compat google", + "selected-providers-list-as-string": "apache.beam common.compat google", "all-python-versions": "['3.9']", "all-python-versions-list-as-string": "3.9", "python-versions": "['3.9']", @@ -453,8 +456,8 @@ def assert_outputs_are_printed(expected_outputs: dict[str, str], stderr: str): "run-kubernetes-tests": "false", "upgrade-to-newer-dependencies": "false", "core-test-types-list-as-string": "Always", - "providers-test-types-list-as-string": "Providers[common.compat] Providers[google]", - "individual-providers-test-types-list-as-string": "Providers[common.compat] Providers[google]", + "providers-test-types-list-as-string": "Providers[apache.beam,common.compat] Providers[google]", + "individual-providers-test-types-list-as-string": "Providers[apache.beam] Providers[common.compat] Providers[google]", "needs-mypy": "true", "mypy-checks": "['mypy-providers']", "skip-providers-tests": "false", From 259c8d6bbe863d917fd987fe218109fdf8eb5cbb Mon Sep 17 00:00:00 2001 From: Kunal Bhattacharya Date: Sun, 26 Jan 2025 21:45:23 +0530 Subject: [PATCH 08/14] Typo fix --- dev/breeze/tests/test_selective_checks.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dev/breeze/tests/test_selective_checks.py b/dev/breeze/tests/test_selective_checks.py index fd69e5ec77d2b..e79f56cdbc1b3 100644 --- a/dev/breeze/tests/test_selective_checks.py +++ b/dev/breeze/tests/test_selective_checks.py @@ -361,8 +361,8 @@ def assert_outputs_are_printed(expected_outputs: dict[str, str], stderr: str): "run-kubernetes-tests": "false", "upgrade-to-newer-dependencies": "false", "core-test-types-list-as-string": "Always", - "providers-test-types-list-as-string": "Providers[common.compat] Providers[google]", - "individual-providers-test-types-list-as-string": "Providers[common.compat] Providers[google]", + "providers-test-types-list-as-string": "Providers[apache.beam,common.compat] Providers[google]", + "individual-providers-test-types-list-as-string": "Providers[apache.beam] Providers[common.compat] Providers[google]", "needs-mypy": "true", "mypy-checks": "['mypy-providers']", }, @@ -422,7 +422,7 @@ def assert_outputs_are_printed(expected_outputs: dict[str, str], stderr: str): "run-kubernetes-tests": "false", "upgrade-to-newer-dependencies": "false", "core-test-types-list-as-string": "Always", - "providers-test-types-list-as-string": "Providers[apache.beam common.compat] Providers[google]", + "providers-test-types-list-as-string": "Providers[apache.beam,common.compat] Providers[google]", "individual-providers-test-types-list-as-string": "Providers[apache.beam] Providers[common.compat] Providers[google]", "needs-mypy": "true", "mypy-checks": "['mypy-providers']", From 803de2aa54705c7e23183b32cc6873089adc4b44 Mon Sep 17 00:00:00 2001 From: Kunal Bhattacharya Date: Sun, 26 Jan 2025 22:02:38 +0530 Subject: [PATCH 09/14] Reverting selected changes --- dev/breeze/tests/test_selective_checks.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dev/breeze/tests/test_selective_checks.py b/dev/breeze/tests/test_selective_checks.py index e79f56cdbc1b3..305c862e1941c 100644 --- a/dev/breeze/tests/test_selective_checks.py +++ b/dev/breeze/tests/test_selective_checks.py @@ -1613,7 +1613,7 @@ def test_expected_output_push( "providers/tests/google/file.py", ), { - "selected-providers-list-as-string": "amazon apache.cassandra " + "selected-providers-list-as-string": "amazon apache.beam apache.cassandra " "cncf.kubernetes common.compat common.sql " "facebook google hashicorp microsoft.azure microsoft.mssql mysql " "openlineage oracle postgres presto salesforce samba sftp ssh trino", @@ -1626,7 +1626,7 @@ def test_expected_output_push( "skip-providers-tests": "false", "test-groups": "['core', 'providers']", "docs-build": "true", - "docs-list-as-string": "apache-airflow helm-chart amazon apache.cassandra " + "docs-list-as-string": "apache-airflow helm-chart amazon apache.beam apache.cassandra " "cncf.kubernetes common.compat common.sql facebook google hashicorp microsoft.azure " "microsoft.mssql mysql openlineage oracle postgres " "presto salesforce samba sftp ssh trino", @@ -1634,7 +1634,7 @@ def test_expected_output_push( "run-kubernetes-tests": "true", "upgrade-to-newer-dependencies": "false", "core-test-types-list-as-string": "Always CLI", - "providers-test-types-list-as-string": "Providers[amazon] Providers[apache.cassandra,cncf.kubernetes,common.compat,common.sql,facebook," + "providers-test-types-list-as-string": "Providers[amazon] Providers[apache.beam,apache.cassandra,cncf.kubernetes,common.compat,common.sql,facebook," "hashicorp,microsoft.azure,microsoft.mssql,mysql,openlineage,oracle,postgres,presto," "salesforce,samba,sftp,ssh,trino] Providers[google]", "needs-mypy": "true", @@ -1936,7 +1936,7 @@ def test_upgrade_to_newer_dependencies( pytest.param( ("docs/apache-airflow-providers-google/docs.rst",), { - "docs-list-as-string": "amazon apache.cassandra " + "docs-list-as-string": "amazon apache.beam apache.cassandra " "cncf.kubernetes common.compat common.sql facebook google hashicorp " "microsoft.azure microsoft.mssql mysql openlineage oracle " "postgres presto salesforce samba sftp ssh trino", From f1bf4a4672cc424431a599a87c3bbc208fc61c91 Mon Sep 17 00:00:00 2001 From: Kunal Bhattacharya Date: Mon, 27 Jan 2025 12:22:08 +0530 Subject: [PATCH 10/14] Attempting fix for selective checks failure --- dev/breeze/tests/test_selective_checks.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dev/breeze/tests/test_selective_checks.py b/dev/breeze/tests/test_selective_checks.py index 305c862e1941c..f6d028942fb43 100644 --- a/dev/breeze/tests/test_selective_checks.py +++ b/dev/breeze/tests/test_selective_checks.py @@ -402,9 +402,9 @@ def assert_outputs_are_printed(expected_outputs: dict[str, str], stderr: str): ), ( pytest.param( - ("providers/system/apache/beam/tests/file.py", "providers/apache/beam/tests/file.py"), + (), { - "selected-providers-list-as-string": "apache.beam common.compat google", + "selected-providers-list-as-string": "", "all-python-versions": "['3.9']", "all-python-versions-list-as-string": "3.9", "python-versions": "['3.9']", @@ -422,8 +422,8 @@ def assert_outputs_are_printed(expected_outputs: dict[str, str], stderr: str): "run-kubernetes-tests": "false", "upgrade-to-newer-dependencies": "false", "core-test-types-list-as-string": "Always", - "providers-test-types-list-as-string": "Providers[apache.beam,common.compat] Providers[google]", - "individual-providers-test-types-list-as-string": "Providers[apache.beam] Providers[common.compat] Providers[google]", + "providers-test-types-list-as-string": "", + "individual-providers-test-types-list-as-string": "", "needs-mypy": "true", "mypy-checks": "['mypy-providers']", "skip-providers-tests": "false", From fb4ee29b889047ebf83cbc506fe49e5a0d55b2cb Mon Sep 17 00:00:00 2001 From: Kunal Bhattacharya Date: Mon, 27 Jan 2025 12:44:05 +0530 Subject: [PATCH 11/14] Attempting fix for selective checks failure --- dev/breeze/tests/test_selective_checks.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/dev/breeze/tests/test_selective_checks.py b/dev/breeze/tests/test_selective_checks.py index f6d028942fb43..5c6f5d5937ee4 100644 --- a/dev/breeze/tests/test_selective_checks.py +++ b/dev/breeze/tests/test_selective_checks.py @@ -84,7 +84,9 @@ def assert_outputs_are_printed(expected_outputs: dict[str, str], stderr: str): if received_value != expected_value: if received_value is not None: print_in_color(f"\n[red]ERROR: The key '{expected_key}' has unexpected value:") - print(received_value) + print( + received_value, + ) print_in_color("Expected value:\n") print(expected_value) print_in_color("\nOutput received:") @@ -434,11 +436,11 @@ def assert_outputs_are_printed(expected_outputs: dict[str, str], stderr: str): ( pytest.param( ( - "providers/system/apache/beam/tests/file.py", - "providers/apache/beam/tests/file.py", + # "providers/system/apache/beam/tests/file.py", + # "providers/apache/beam/tests/file.py", ), { - "selected-providers-list-as-string": "apache.beam common.compat google", + "selected-providers-list-as-string": "common.compat google", # "apache.beam common.compat google", "all-python-versions": "['3.9']", "all-python-versions-list-as-string": "3.9", "python-versions": "['3.9']", @@ -456,8 +458,8 @@ def assert_outputs_are_printed(expected_outputs: dict[str, str], stderr: str): "run-kubernetes-tests": "false", "upgrade-to-newer-dependencies": "false", "core-test-types-list-as-string": "Always", - "providers-test-types-list-as-string": "Providers[apache.beam,common.compat] Providers[google]", - "individual-providers-test-types-list-as-string": "Providers[apache.beam] Providers[common.compat] Providers[google]", + "providers-test-types-list-as-string": "Providers[common.compat] Providers[google]", # "Providers[apache.beam,common.compat] Providers[google]", + "individual-providers-test-types-list-as-string": "Providers[common.compat] Providers[google]", # "Providers[apache.beam] Providers[common.compat] Providers[google]", "needs-mypy": "true", "mypy-checks": "['mypy-providers']", "skip-providers-tests": "false", From 58cb22614af82c950d94c358e844b3863dee2f82 Mon Sep 17 00:00:00 2001 From: Kunal Bhattacharya Date: Mon, 27 Jan 2025 12:54:35 +0530 Subject: [PATCH 12/14] Attempting fix for selective checks failure --- dev/breeze/tests/test_selective_checks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/breeze/tests/test_selective_checks.py b/dev/breeze/tests/test_selective_checks.py index 5c6f5d5937ee4..2039a252cf9bd 100644 --- a/dev/breeze/tests/test_selective_checks.py +++ b/dev/breeze/tests/test_selective_checks.py @@ -436,7 +436,7 @@ def assert_outputs_are_printed(expected_outputs: dict[str, str], stderr: str): ( pytest.param( ( - # "providers/system/apache/beam/tests/file.py", + "", # "providers/system/apache/beam/tests/file.py", # "providers/apache/beam/tests/file.py", ), { From 24fb839450bff4ccddefe70da9f564c81b2b0342 Mon Sep 17 00:00:00 2001 From: Kunal Bhattacharya Date: Mon, 27 Jan 2025 13:17:14 +0530 Subject: [PATCH 13/14] Attempting fix for selective checks failure --- dev/breeze/tests/test_selective_checks.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/dev/breeze/tests/test_selective_checks.py b/dev/breeze/tests/test_selective_checks.py index e0530495e9c9c..ab3e41cac1182 100644 --- a/dev/breeze/tests/test_selective_checks.py +++ b/dev/breeze/tests/test_selective_checks.py @@ -436,11 +436,11 @@ def assert_outputs_are_printed(expected_outputs: dict[str, str], stderr: str): ( pytest.param( ( - "", # "providers/system/apache/beam/tests/file.py", - # "providers/apache/beam/tests/file.py", + "providers/apache/beam/tests/system/apache/beam/file.py", + "providers/apache/beam/tests/provider_tests/apache/beam/file.py", ), { - "selected-providers-list-as-string": "common.compat google", # "apache.beam common.compat google", + "selected-providers-list-as-string": "apache.beam common.compat google", "all-python-versions": "['3.9']", "all-python-versions-list-as-string": "3.9", "python-versions": "['3.9']", @@ -458,8 +458,8 @@ def assert_outputs_are_printed(expected_outputs: dict[str, str], stderr: str): "run-kubernetes-tests": "false", "upgrade-to-newer-dependencies": "false", "core-test-types-list-as-string": "Always", - "providers-test-types-list-as-string": "Providers[common.compat] Providers[google]", # "Providers[apache.beam,common.compat] Providers[google]", - "individual-providers-test-types-list-as-string": "Providers[common.compat] Providers[google]", # "Providers[apache.beam] Providers[common.compat] Providers[google]", + "providers-test-types-list-as-string": "Providers[apache.beam,common.compat] Providers[google]", + "individual-providers-test-types-list-as-string": "Providers[apache.beam] Providers[common.compat] Providers[google]", "needs-mypy": "true", "mypy-checks": "['mypy-providers']", "skip-providers-tests": "false", From bd4a2b1b9c0f1b743968d8d7f3c318cad403aade Mon Sep 17 00:00:00 2001 From: Kunal Bhattacharya Date: Mon, 27 Jan 2025 13:23:26 +0530 Subject: [PATCH 14/14] Hopefully final fix for selective checks failure --- dev/breeze/tests/test_selective_checks.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/dev/breeze/tests/test_selective_checks.py b/dev/breeze/tests/test_selective_checks.py index ab3e41cac1182..05d4ed7ed7f78 100644 --- a/dev/breeze/tests/test_selective_checks.py +++ b/dev/breeze/tests/test_selective_checks.py @@ -343,7 +343,7 @@ def assert_outputs_are_printed(expected_outputs: dict[str, str], stderr: str): ), ( pytest.param( - ("providers/apache/beam/tests/file.py",), + ("providers/apache/beam/tests/provider_tests/apache/beam/file.py",), { "selected-providers-list-as-string": "apache.beam common.compat google", "all-python-versions": "['3.9']", @@ -373,7 +373,7 @@ def assert_outputs_are_printed(expected_outputs: dict[str, str], stderr: str): ), ( pytest.param( - ("providers/apache/beam/tests/file.py",), + ("providers/apache/beam/tests/provider_tests/apache/beam/file.py",), { "selected-providers-list-as-string": "apache.beam common.compat google", "all-python-versions": "['3.9']", @@ -404,9 +404,12 @@ def assert_outputs_are_printed(expected_outputs: dict[str, str], stderr: str): ), ( pytest.param( - (), + ( + "providers/apache/beam/tests/system/apache/beam/file.py", + "providers/apache/beam/tests/provider_tests/apache/beam/file.py", + ), { - "selected-providers-list-as-string": "", + "selected-providers-list-as-string": "apache.beam common.compat google", "all-python-versions": "['3.9']", "all-python-versions-list-as-string": "3.9", "python-versions": "['3.9']", @@ -424,8 +427,8 @@ def assert_outputs_are_printed(expected_outputs: dict[str, str], stderr: str): "run-kubernetes-tests": "false", "upgrade-to-newer-dependencies": "false", "core-test-types-list-as-string": "Always", - "providers-test-types-list-as-string": "", - "individual-providers-test-types-list-as-string": "", + "providers-test-types-list-as-string": "Providers[apache.beam,common.compat] Providers[google]", + "individual-providers-test-types-list-as-string": "Providers[apache.beam] Providers[common.compat] Providers[google]", "needs-mypy": "true", "mypy-checks": "['mypy-providers']", "skip-providers-tests": "false",