From c22b8c17751c2bdb379b7245a971f8b1868b280f Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Wed, 5 Jul 2023 16:05:03 -0400 Subject: [PATCH 01/80] Update 2.50 release notes to include new Kafka topicPattern feature --- CHANGES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES.md b/CHANGES.md index b6df49a48ca..dac9654f233 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -61,6 +61,7 @@ * Support for X source added (Java/Python) ([#X](https://github.com/apache/beam/issues/X)). * Python GCSIO is now implemented with GCP GCS Client instead of apitools ([#25676](https://github.com/apache/beam/issues/25676)) +* Java KafkaIO now supports picking up topics via topicPattern ([#26948](https://github.com/apache/beam/pull/26948)) ## New Features / Improvements From 6c9c28dd2f825ac6b96362b7435f2094bf107b59 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Thu, 31 Aug 2023 13:32:01 -0400 Subject: [PATCH 02/80] Create groovy class for io performance tests Create gradle task and github actions config for GCS using this. --- ...stCommit_Java_IO_GCS_Performance_Tests.yml | 117 ++++++++++++++++++ .../gradle/IoPerformanceTestUtilities.groovy | 45 +++++++ .../org/apache/beam/examples/WriteBQ.java | 73 +++++++++++ it/google-cloud-platform/build.gradle | 5 +- 4 files changed, 239 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml create mode 100644 buildSrc/src/main/groovy/org/apache/beam/gradle/IoPerformanceTestUtilities.groovy create mode 100644 examples/java/src/main/java/org/apache/beam/examples/WriteBQ.java diff --git a/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml b/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml new file mode 100644 index 00000000000..c94e884effa --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml @@ -0,0 +1,117 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Java IO GCS Performance Tests + +on: + push: + tags: ['v*'] + branches: ['master', 'release-*'] + paths: ['it/google-cloud-platform/**','.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml'] + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.sender.login }}-${{ github.event.schedule }}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_IO_GCS_Performance_Tests: + if: | + github.event_name == 'push' || + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Java PostCommit IO GCS Performance Tests' + runs-on: [self-hosted, ubuntu-20.04, main] + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + strategy: + matrix: + job_name: ["beam_PostCommit_Java_IO_GCS_Performance_Tests"] + job_phrase: ["Run Java PostCommit IO GCS Performance Tests"] + steps: + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Checkout release branch + if: github.event_name == 'schedule' #This has scheduled runs run against the latest release + uses: actions/checkout@v3 + with: + ref: release-2.50.0 #TODO automate updating this + repository: apache/beam + - name: Checkout non-release branch + if: github.event_name != 'schedule' #This has triggered runs checkout the triggering branch + uses: actions/checkout@v3 + - name: Install Java + uses: actions/setup-java@v3.8.0 + with: + distribution: 'zulu' + java-version: '8' + - name: Setup Gradle + uses: gradle/gradle-build-action@v2 + with: + cache-read-only: false + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v1 + - name: Authenticate on GCP + uses: google-github-actions/setup-gcloud@v0 + with: + service_account_email: ${{ secrets.GCP_SA_EMAIL }} + service_account_key: ${{ secrets.GCP_SA_KEY }} + project_id: ${{ secrets.GCP_PROJECT_ID }} + export_default_credentials: true + - name: GCloud Docker credential helper + run: | + gcloud auth configure-docker us.gcr.io + - name: run scheduled javaPostcommitIOGCSPerformanceTests script + if: github.event_name == 'schedule' #This ensures only scheduled runs publish metrics publicly + with: + exportDataset: performance_tests + exportTable: io_performance_metrics + run: ./gradlew :it:google-cloud-platform:GCSPerformanceTest + env: + USER: github-actions + - name: run triggered javaPostcommitIOGCSPerformanceTests script + if: github.event_name != 'schedule' + run: ./gradlew :it:google-cloud-platform:GCSPerformanceTest + env: + USER: github-actions diff --git a/buildSrc/src/main/groovy/org/apache/beam/gradle/IoPerformanceTestUtilities.groovy b/buildSrc/src/main/groovy/org/apache/beam/gradle/IoPerformanceTestUtilities.groovy new file mode 100644 index 00000000000..c15243ebe33 --- /dev/null +++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/IoPerformanceTestUtilities.groovy @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.gradle + +import org.gradle.api.Project +import org.gradle.api.tasks.testing.Test + +import javax.inject.Inject + +class IoPerformanceTestUtilities { + abstract static class IoPerformanceTest extends Test { + @Inject + IoPerformanceTest(Project runningProject, String module, String testClass, Map systemProperties){ + group = "Verification" + description = "Runs IO Performance Test for $testClass" + outputs.upToDateWhen { false } + testClassesDirs = runningProject.findProject(":it:${module}").sourceSets.test.output.classesDirs + classpath = runningProject.sourceSets.test.runtimeClasspath + runningProject.findProject(":it:${module}").sourceSets.test.runtimeClasspath + + include "**/${testClass}.class" + + systemProperty 'exportDataset', System.getenv 'exportDataset' + systemProperty 'exportTable', System.getenv 'exportTable' + + for (entry in systemProperties){ + systemProperty entry.key, entry.value + } + } + } +} diff --git a/examples/java/src/main/java/org/apache/beam/examples/WriteBQ.java b/examples/java/src/main/java/org/apache/beam/examples/WriteBQ.java new file mode 100644 index 00000000000..6043118f558 --- /dev/null +++ b/examples/java/src/main/java/org/apache/beam/examples/WriteBQ.java @@ -0,0 +1,73 @@ +package org.apache.beam.examples; + +import com.google.api.services.bigquery.model.TableFieldSchema; +import com.google.api.services.bigquery.model.TableRow; +import com.google.api.services.bigquery.model.TableSchema; +import com.google.common.base.Splitter; +import org.apache.beam.sdk.Pipeline; +import org.apache.beam.sdk.io.GenerateSequence; +import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO; +import org.apache.beam.sdk.io.gcp.bigquery.BigQueryOptions; +import org.apache.beam.sdk.options.Description; +import org.apache.beam.sdk.options.PipelineOptions; +import org.apache.beam.sdk.options.PipelineOptionsFactory; +import org.apache.beam.sdk.transforms.Create; +import org.apache.beam.sdk.transforms.DoFn; +import org.apache.beam.sdk.transforms.ParDo; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class WriteBQ { + + public interface WriteBQOptions extends PipelineOptions, BigQueryOptions { + @Description("Table to write to") + String getTable(); + + void setTable(String value); + } + + public static void main(String[] args) { + + WriteBQOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(WriteBQOptions.class); + options.setTable("google.com:clouddfe:jjc_test.writebq2"); + options.setUseStorageWriteApi(true); + options.setStorageApiAppendThresholdRecordCount(100); + + Pipeline p = Pipeline.create(options); + + List fields = new ArrayList<>(); + fields.add(new TableFieldSchema().setName("name").setType("STRING")); + fields.add(new TableFieldSchema().setName("year").setType("INTEGER")); + fields.add(new TableFieldSchema().setName("country").setType("STRING")); + TableSchema schema = new TableSchema().setFields(fields); + + p + .apply(GenerateSequence.from(0).to(100_000)) + //Convert to TableRow + .apply("to TableRow", ParDo.of(new DoFn() { + @ProcessElement + public void processElement(ProcessContext c) { + TableRow row = new TableRow(); + + row.set("name", "name"); + row.set("year", c.element()); + row.set("country", "country"); + + c.output(row); + } + })) + // to BigQuery + // Using `writeTableRows` is slightly less performant than using write with `WithFormatFunction` + // due to the TableRow encoding. See `WriteWithFormatBQ` for an example. + .apply(BigQueryIO.writeTableRows() // Input type from prev stage is Row + .withSchema(schema) + .to(options.getTable()) + .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED) + .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_APPEND)); + p.run(); + } +} diff --git a/it/google-cloud-platform/build.gradle b/it/google-cloud-platform/build.gradle index f43b3f25720..48c7175187d 100644 --- a/it/google-cloud-platform/build.gradle +++ b/it/google-cloud-platform/build.gradle @@ -15,6 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +import org.apache.beam.gradle.IoPerformanceTestUtilities plugins { id 'org.apache.beam.module' } applyJavaNature( @@ -74,4 +75,6 @@ dependencies { testImplementation library.java.mockito_inline testRuntimeOnly project(path: ":runners:direct-java", configuration: "shadowTest") testRuntimeOnly library.java.slf4j_simple -} \ No newline at end of file +} + +tasks.register("GCSPerformanceTest", IoPerformanceTestUtilities.IoPerformanceTest, project, 'google-cloud-platform', 'FileBasedIOLT', ['configuration':'large','project':'apache-beam-testing', 'artifactBucket':'io-performance-temp']) \ No newline at end of file From 520c9d1637f09687fe8e6c9002892a879d75902e Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Thu, 31 Aug 2023 13:36:55 -0400 Subject: [PATCH 03/80] delete unnecessary class --- .../org/apache/beam/examples/WriteBQ.java | 73 ------------------- 1 file changed, 73 deletions(-) delete mode 100644 examples/java/src/main/java/org/apache/beam/examples/WriteBQ.java diff --git a/examples/java/src/main/java/org/apache/beam/examples/WriteBQ.java b/examples/java/src/main/java/org/apache/beam/examples/WriteBQ.java deleted file mode 100644 index 6043118f558..00000000000 --- a/examples/java/src/main/java/org/apache/beam/examples/WriteBQ.java +++ /dev/null @@ -1,73 +0,0 @@ -package org.apache.beam.examples; - -import com.google.api.services.bigquery.model.TableFieldSchema; -import com.google.api.services.bigquery.model.TableRow; -import com.google.api.services.bigquery.model.TableSchema; -import com.google.common.base.Splitter; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.io.GenerateSequence; -import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO; -import org.apache.beam.sdk.io.gcp.bigquery.BigQueryOptions; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.transforms.Create; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.ParDo; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - -public class WriteBQ { - - public interface WriteBQOptions extends PipelineOptions, BigQueryOptions { - @Description("Table to write to") - String getTable(); - - void setTable(String value); - } - - public static void main(String[] args) { - - WriteBQOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(WriteBQOptions.class); - options.setTable("google.com:clouddfe:jjc_test.writebq2"); - options.setUseStorageWriteApi(true); - options.setStorageApiAppendThresholdRecordCount(100); - - Pipeline p = Pipeline.create(options); - - List fields = new ArrayList<>(); - fields.add(new TableFieldSchema().setName("name").setType("STRING")); - fields.add(new TableFieldSchema().setName("year").setType("INTEGER")); - fields.add(new TableFieldSchema().setName("country").setType("STRING")); - TableSchema schema = new TableSchema().setFields(fields); - - p - .apply(GenerateSequence.from(0).to(100_000)) - //Convert to TableRow - .apply("to TableRow", ParDo.of(new DoFn() { - @ProcessElement - public void processElement(ProcessContext c) { - TableRow row = new TableRow(); - - row.set("name", "name"); - row.set("year", c.element()); - row.set("country", "country"); - - c.output(row); - } - })) - // to BigQuery - // Using `writeTableRows` is slightly less performant than using write with `WithFormatFunction` - // due to the TableRow encoding. See `WriteWithFormatBQ` for an example. - .apply(BigQueryIO.writeTableRows() // Input type from prev stage is Row - .withSchema(schema) - .to(options.getTable()) - .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED) - .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_APPEND)); - p.run(); - } -} From 062de236bc65fc7553c8ddb021a2f1174357d88a Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Thu, 31 Aug 2023 13:45:40 -0400 Subject: [PATCH 04/80] fix env call --- .../org/apache/beam/gradle/IoPerformanceTestUtilities.groovy | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/buildSrc/src/main/groovy/org/apache/beam/gradle/IoPerformanceTestUtilities.groovy b/buildSrc/src/main/groovy/org/apache/beam/gradle/IoPerformanceTestUtilities.groovy index c15243ebe33..844afd75f00 100644 --- a/buildSrc/src/main/groovy/org/apache/beam/gradle/IoPerformanceTestUtilities.groovy +++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/IoPerformanceTestUtilities.groovy @@ -34,8 +34,8 @@ class IoPerformanceTestUtilities { include "**/${testClass}.class" - systemProperty 'exportDataset', System.getenv 'exportDataset' - systemProperty 'exportTable', System.getenv 'exportTable' + systemProperty 'exportDataset', System.getenv('exportDataset') + systemProperty 'exportTable', System.getenv('exportTable') for (entry in systemProperties){ systemProperty entry.key, entry.value From 9c9f86bd4218f1effb20cd9db5f44fca70277466 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Thu, 31 Aug 2023 14:14:16 -0400 Subject: [PATCH 05/80] fix call to gradle --- .../beam_PostCommit_Java_IO_GCS_Performance_Tests.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml b/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml index c94e884effa..3a0800b4bcd 100644 --- a/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml +++ b/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml @@ -104,14 +104,17 @@ jobs: gcloud auth configure-docker us.gcr.io - name: run scheduled javaPostcommitIOGCSPerformanceTests script if: github.event_name == 'schedule' #This ensures only scheduled runs publish metrics publicly + uses: ./.github/actions/gradle-command-self-hosted-action with: + gradle-command: :it:google-cloud-platform:GCSPerformanceTest exportDataset: performance_tests exportTable: io_performance_metrics - run: ./gradlew :it:google-cloud-platform:GCSPerformanceTest env: USER: github-actions - name: run triggered javaPostcommitIOGCSPerformanceTests script if: github.event_name != 'schedule' - run: ./gradlew :it:google-cloud-platform:GCSPerformanceTest + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :it:google-cloud-platform:GCSPerformanceTest env: USER: github-actions From 925ce55318a90468e6493afd889699e64a9651e8 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Thu, 31 Aug 2023 14:32:23 -0400 Subject: [PATCH 06/80] run on hosted runner for testing --- .../beam_PostCommit_Java_IO_GCS_Performance_Tests.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml b/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml index 3a0800b4bcd..a7e24ba1d44 100644 --- a/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml +++ b/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml @@ -59,7 +59,8 @@ jobs: github.event_name == 'workflow_dispatch' || github.event_name == 'schedule' || github.event.comment.body == 'Run Java PostCommit IO GCS Performance Tests' - runs-on: [self-hosted, ubuntu-20.04, main] +# runs-on: [self-hosted, ubuntu-20.04, main] + runs-on: ubuntu-20.04 name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) strategy: matrix: From 117ef8b1c196472b03a7bd17292eaaa0bbaa483d Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Thu, 31 Aug 2023 14:35:58 -0400 Subject: [PATCH 07/80] add additional checkout --- .../workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml b/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml index a7e24ba1d44..75ca8a51a73 100644 --- a/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml +++ b/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml @@ -67,6 +67,7 @@ jobs: job_name: ["beam_PostCommit_Java_IO_GCS_Performance_Tests"] job_phrase: ["Run Java PostCommit IO GCS Performance Tests"] steps: + - uses: actions/checkout@v3 - name: Setup repository uses: ./.github/actions/setup-action with: From cb6e01b50aa66b554f1c8dcce76eef50c73b381b Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Thu, 31 Aug 2023 16:07:20 -0400 Subject: [PATCH 08/80] add destination for triggered tests --- .../workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml b/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml index 75ca8a51a73..fa38c5eeed7 100644 --- a/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml +++ b/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml @@ -118,5 +118,7 @@ jobs: uses: ./.github/actions/gradle-command-self-hosted-action with: gradle-command: :it:google-cloud-platform:GCSPerformanceTest + exportDataset: performance_tests + exportTable: io_performance_metrics env: USER: github-actions From 8ea6c51374b5d542c6abf8b6c3a81ba89b3b7a2e Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Fri, 1 Sep 2023 09:47:24 -0400 Subject: [PATCH 09/80] move env variables to correct location --- .../beam_PostCommit_Java_IO_GCS_Performance_Tests.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml b/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml index fa38c5eeed7..ebfb51de105 100644 --- a/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml +++ b/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml @@ -109,16 +109,16 @@ jobs: uses: ./.github/actions/gradle-command-self-hosted-action with: gradle-command: :it:google-cloud-platform:GCSPerformanceTest + env: exportDataset: performance_tests exportTable: io_performance_metrics - env: USER: github-actions - name: run triggered javaPostcommitIOGCSPerformanceTests script if: github.event_name != 'schedule' uses: ./.github/actions/gradle-command-self-hosted-action with: gradle-command: :it:google-cloud-platform:GCSPerformanceTest + env: exportDataset: performance_tests exportTable: io_performance_metrics - env: USER: github-actions From 320a4cc2a3f23a2aa351179a34c47225580991f8 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Fri, 1 Sep 2023 11:23:43 -0400 Subject: [PATCH 10/80] try uploading against separate dataset --- .../beam_PostCommit_Java_IO_GCS_Performance_Tests.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml b/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml index ebfb51de105..fd1d112406d 100644 --- a/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml +++ b/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml @@ -110,7 +110,7 @@ jobs: with: gradle-command: :it:google-cloud-platform:GCSPerformanceTest env: - exportDataset: performance_tests + exportDataset: jjc_test exportTable: io_performance_metrics USER: github-actions - name: run triggered javaPostcommitIOGCSPerformanceTests script @@ -119,6 +119,6 @@ jobs: with: gradle-command: :it:google-cloud-platform:GCSPerformanceTest env: - exportDataset: performance_tests + exportDataset: jjc_test exportTable: io_performance_metrics USER: github-actions From 1cd4e55026654670d45eb12fb57adf99a9fb73e1 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Fri, 1 Sep 2023 14:47:07 -0400 Subject: [PATCH 11/80] try without a user --- .../beam_PostCommit_Java_IO_GCS_Performance_Tests.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml b/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml index fd1d112406d..6f10a7d136a 100644 --- a/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml +++ b/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml @@ -112,7 +112,6 @@ jobs: env: exportDataset: jjc_test exportTable: io_performance_metrics - USER: github-actions - name: run triggered javaPostcommitIOGCSPerformanceTests script if: github.event_name != 'schedule' uses: ./.github/actions/gradle-command-self-hosted-action @@ -120,5 +119,4 @@ jobs: gradle-command: :it:google-cloud-platform:GCSPerformanceTest env: exportDataset: jjc_test - exportTable: io_performance_metrics - USER: github-actions + exportTable: io_performance_metrics \ No newline at end of file From 4fc5b8e419c0444184c1d418d40e60f6149360c6 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Tue, 5 Sep 2023 10:39:02 -0400 Subject: [PATCH 12/80] update branch checkout, try to view the failure log --- .../beam_PostCommit_Java_IO_GCS_Performance_Tests.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml b/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml index 6f10a7d136a..541a8da59ae 100644 --- a/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml +++ b/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml @@ -78,7 +78,7 @@ jobs: if: github.event_name == 'schedule' #This has scheduled runs run against the latest release uses: actions/checkout@v3 with: - ref: release-2.50.0 #TODO automate updating this + ref: v2.50.0 #TODO automate updating this repository: apache/beam - name: Checkout non-release branch if: github.event_name != 'schedule' #This has triggered runs checkout the triggering branch @@ -119,4 +119,6 @@ jobs: gradle-command: :it:google-cloud-platform:GCSPerformanceTest env: exportDataset: jjc_test - exportTable: io_performance_metrics \ No newline at end of file + exportTable: io_performance_metrics + - name: view failed test + run: cat file:///home/runner/work/beam/beam/it/google-cloud-platform/build/reports/tests/GCSPerformanceTest/index.html \ No newline at end of file From 59069f2b9d9ac3bb42837703db195f99b66a8351 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Tue, 5 Sep 2023 11:05:05 -0400 Subject: [PATCH 13/80] run on failure --- .../beam_PostCommit_Java_IO_GCS_Performance_Tests.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml b/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml index 541a8da59ae..2a6d671c710 100644 --- a/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml +++ b/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml @@ -121,4 +121,5 @@ jobs: exportDataset: jjc_test exportTable: io_performance_metrics - name: view failed test - run: cat file:///home/runner/work/beam/beam/it/google-cloud-platform/build/reports/tests/GCSPerformanceTest/index.html \ No newline at end of file + if: ${{ failure() }} + run: cat /home/runner/work/beam/beam/it/google-cloud-platform/build/reports/tests/GCSPerformanceTest/classes/org.apache.beam.it.gcp.storage.FileBasedIOLT.html \ No newline at end of file From 6f51976e942f31c3d2bc4cb7d173b615a26913e3 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Tue, 5 Sep 2023 12:45:57 -0400 Subject: [PATCH 14/80] update to use correct BigQuery instance --- .../beam_PostCommit_Java_IO_GCS_Performance_Tests.yml | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml b/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml index 2a6d671c710..06f7d64a3aa 100644 --- a/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml +++ b/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml @@ -110,7 +110,7 @@ jobs: with: gradle-command: :it:google-cloud-platform:GCSPerformanceTest env: - exportDataset: jjc_test + exportDataset: performance_tests exportTable: io_performance_metrics - name: run triggered javaPostcommitIOGCSPerformanceTests script if: github.event_name != 'schedule' @@ -118,8 +118,5 @@ jobs: with: gradle-command: :it:google-cloud-platform:GCSPerformanceTest env: - exportDataset: jjc_test - exportTable: io_performance_metrics - - name: view failed test - if: ${{ failure() }} - run: cat /home/runner/work/beam/beam/it/google-cloud-platform/build/reports/tests/GCSPerformanceTest/classes/org.apache.beam.it.gcp.storage.FileBasedIOLT.html \ No newline at end of file + exportDataset: performance_tests + exportTable: io_performance_metrics_test \ No newline at end of file From df716cb4aead47b0e3478ed44918501eb4f73ef9 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Tue, 5 Sep 2023 13:15:24 -0400 Subject: [PATCH 15/80] convert to matrix --- ..._PostCommit_Java_IO_Performance_Tests.yml} | 31 ++++++++--------- it/build.gradle | 33 +++++++++++++++++++ it/google-cloud-platform/build.gradle | 3 +- 3 files changed, 48 insertions(+), 19 deletions(-) rename .github/workflows/{beam_PostCommit_Java_IO_GCS_Performance_Tests.yml => beam_PostCommit_Java_IO_Performance_Tests.yml} (79%) create mode 100644 it/build.gradle diff --git a/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml b/.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml similarity index 79% rename from .github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml rename to .github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml index 06f7d64a3aa..57a02979bf6 100644 --- a/.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml +++ b/.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml @@ -13,13 +13,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -name: PostCommit Java IO GCS Performance Tests +name: PostCommit Java IO Performance Tests on: push: tags: ['v*'] branches: ['master', 'release-*'] - paths: ['it/google-cloud-platform/**','.github/workflows/beam_PostCommit_Java_IO_GCS_Performance_Tests.yml'] + paths: ['it/google-cloud-platform/**','.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml'] issue_comment: types: [created] schedule: @@ -53,36 +53,34 @@ env: GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} jobs: - beam_PostCommit_Java_IO_GCS_Performance_Tests: + beam_PostCommit_Java_IO_Performance_Tests: if: | github.event_name == 'push' || github.event_name == 'workflow_dispatch' || github.event_name == 'schedule' || - github.event.comment.body == 'Run Java PostCommit IO GCS Performance Tests' + github.event.comment.body == 'Run Java PostCommit IO Performance Tests' # runs-on: [self-hosted, ubuntu-20.04, main] runs-on: ubuntu-20.04 name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) strategy: matrix: - job_name: ["beam_PostCommit_Java_IO_GCS_Performance_Tests"] - job_phrase: ["Run Java PostCommit IO GCS Performance Tests"] + job_name: ["beam_PostCommit_Java_IO_Performance_Tests"] + job_phrase: ["Run Java PostCommit IO Performance Tests"] + test_case: ["GCSPerformanceTest", "BigTablePerformanceTest"] steps: - uses: actions/checkout@v3 - name: Setup repository uses: ./.github/actions/setup-action with: - comment_phrase: ${{ matrix.job_phrase }} + comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.test_case }} github_token: ${{ secrets.GITHUB_TOKEN }} - github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.test_case }}) - name: Checkout release branch if: github.event_name == 'schedule' #This has scheduled runs run against the latest release uses: actions/checkout@v3 with: ref: v2.50.0 #TODO automate updating this repository: apache/beam - - name: Checkout non-release branch - if: github.event_name != 'schedule' #This has triggered runs checkout the triggering branch - uses: actions/checkout@v3 - name: Install Java uses: actions/setup-java@v3.8.0 with: @@ -101,22 +99,19 @@ jobs: service_account_key: ${{ secrets.GCP_SA_KEY }} project_id: ${{ secrets.GCP_PROJECT_ID }} export_default_credentials: true - - name: GCloud Docker credential helper - run: | - gcloud auth configure-docker us.gcr.io - - name: run scheduled javaPostcommitIOGCSPerformanceTests script + - name: run scheduled javaPostcommitIOPerformanceTests script if: github.event_name == 'schedule' #This ensures only scheduled runs publish metrics publicly uses: ./.github/actions/gradle-command-self-hosted-action with: - gradle-command: :it:google-cloud-platform:GCSPerformanceTest + gradle-command: :it:${{ matrix.test_case }} env: exportDataset: performance_tests exportTable: io_performance_metrics - - name: run triggered javaPostcommitIOGCSPerformanceTests script + - name: run triggered javaPostcommitIOPerformanceTests script if: github.event_name != 'schedule' uses: ./.github/actions/gradle-command-self-hosted-action with: - gradle-command: :it:google-cloud-platform:GCSPerformanceTest + gradle-command: :it:${{ matrix.test_case }} env: exportDataset: performance_tests exportTable: io_performance_metrics_test \ No newline at end of file diff --git a/it/build.gradle b/it/build.gradle new file mode 100644 index 00000000000..35ccbba4c36 --- /dev/null +++ b/it/build.gradle @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +plugins { id 'org.apache.beam.module' } +applyJavaNature( + automaticModuleName: 'org.apache.beam.it', +) + +description = "Apache Beam :: IT" +ext.summary = "Integration test utilities suites." + +//These registrations exist to make our matrix Github Action simple to configure +tasks.register('GCSPerformanceTest') { + dependsOn(":it:google-cloud-platform:GCSPerformanceTest") +} + +tasks.register('BigTablePerformanceTest') { + dependsOn(":it:google-cloud-platform:BigTablePerformanceTest") +} \ No newline at end of file diff --git a/it/google-cloud-platform/build.gradle b/it/google-cloud-platform/build.gradle index 48c7175187d..0917ddd3e21 100644 --- a/it/google-cloud-platform/build.gradle +++ b/it/google-cloud-platform/build.gradle @@ -77,4 +77,5 @@ dependencies { testRuntimeOnly library.java.slf4j_simple } -tasks.register("GCSPerformanceTest", IoPerformanceTestUtilities.IoPerformanceTest, project, 'google-cloud-platform', 'FileBasedIOLT', ['configuration':'large','project':'apache-beam-testing', 'artifactBucket':'io-performance-temp']) \ No newline at end of file +tasks.register("GCSPerformanceTest", IoPerformanceTestUtilities.IoPerformanceTest, project, 'google-cloud-platform', 'FileBasedIOLT', ['configuration':'large','project':'apache-beam-testing', 'artifactBucket':'io-performance-temp']) +tasks.register("BigTablePerformanceTest", IoPerformanceTestUtilities.IoPerformanceTest, project, 'google-cloud-platform', 'BigTableIOLT', ['configuration':'large','project':'apache-beam-testing', 'artifactBucket':'io-performance-temp']) \ No newline at end of file From 4bf0826a7846f671ed07389b874fb7ad5b833440 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Tue, 5 Sep 2023 13:47:39 -0400 Subject: [PATCH 16/80] add result reporting --- .../workflows/beam_PostCommit_Java_IO_Performance_Tests.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml b/.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml index 57a02979bf6..f2cd3fd2731 100644 --- a/.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml +++ b/.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml @@ -61,7 +61,7 @@ jobs: github.event.comment.body == 'Run Java PostCommit IO Performance Tests' # runs-on: [self-hosted, ubuntu-20.04, main] runs-on: ubuntu-20.04 - name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.test_case }}) strategy: matrix: job_name: ["beam_PostCommit_Java_IO_Performance_Tests"] @@ -114,4 +114,6 @@ jobs: gradle-command: :it:${{ matrix.test_case }} env: exportDataset: performance_tests - exportTable: io_performance_metrics_test \ No newline at end of file + exportTable: io_performance_metrics_test + - name: read failure results + run: cat /home/runner/work/beam/beam/it/google-cloud-platform/build/reports/tests/BigTablePerformanceTest/classes/org.apache.beam.it.gcp.storage.BigTableIOLT.html From d40d04b0e2480ede63965cae645a2b43129c169d Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Tue, 5 Sep 2023 13:52:55 -0400 Subject: [PATCH 17/80] add failure clause --- .github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml b/.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml index f2cd3fd2731..ae8e63f44f2 100644 --- a/.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml +++ b/.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml @@ -116,4 +116,5 @@ jobs: exportDataset: performance_tests exportTable: io_performance_metrics_test - name: read failure results + if: ${{ failure() }} run: cat /home/runner/work/beam/beam/it/google-cloud-platform/build/reports/tests/BigTablePerformanceTest/classes/org.apache.beam.it.gcp.storage.BigTableIOLT.html From 2739e927476271ee92e0563c9dd5343c19ea82f6 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Tue, 5 Sep 2023 15:26:26 -0400 Subject: [PATCH 18/80] remove failure clause, update to run on self-hosted --- .../beam_PostCommit_Java_IO_Performance_Tests.yml | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml b/.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml index ae8e63f44f2..2f232af20e0 100644 --- a/.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml +++ b/.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml @@ -59,8 +59,7 @@ jobs: github.event_name == 'workflow_dispatch' || github.event_name == 'schedule' || github.event.comment.body == 'Run Java PostCommit IO Performance Tests' -# runs-on: [self-hosted, ubuntu-20.04, main] - runs-on: ubuntu-20.04 + runs-on: [self-hosted, ubuntu-20.04, main] name: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.test_case }}) strategy: matrix: @@ -114,7 +113,4 @@ jobs: gradle-command: :it:${{ matrix.test_case }} env: exportDataset: performance_tests - exportTable: io_performance_metrics_test - - name: read failure results - if: ${{ failure() }} - run: cat /home/runner/work/beam/beam/it/google-cloud-platform/build/reports/tests/BigTablePerformanceTest/classes/org.apache.beam.it.gcp.storage.BigTableIOLT.html + exportTable: io_performance_metrics_test \ No newline at end of file From bd6efeb15d7dc2b4126d950697b06e4b4964df3b Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Wed, 6 Sep 2023 11:47:14 -0400 Subject: [PATCH 19/80] address comments, clean up build --- .../workflows/beam_PostCommit_Java_IO_Performance_Tests.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml b/.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml index 2f232af20e0..48f43357a20 100644 --- a/.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml +++ b/.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml @@ -78,7 +78,7 @@ jobs: if: github.event_name == 'schedule' #This has scheduled runs run against the latest release uses: actions/checkout@v3 with: - ref: v2.50.0 #TODO automate updating this + ref: v2.50.0 #TODO(https://github.com/apache/beam/issues/28330) automate updating this repository: apache/beam - name: Install Java uses: actions/setup-java@v3.8.0 @@ -89,8 +89,6 @@ jobs: uses: gradle/gradle-build-action@v2 with: cache-read-only: false - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v1 - name: Authenticate on GCP uses: google-github-actions/setup-gcloud@v0 with: From 226a655b387db51685ea838f44be850e1790a309 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Wed, 6 Sep 2023 11:51:57 -0400 Subject: [PATCH 20/80] clarify branching --- .github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml b/.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml index 48f43357a20..ffd5751fd8b 100644 --- a/.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml +++ b/.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml @@ -97,7 +97,7 @@ jobs: project_id: ${{ secrets.GCP_PROJECT_ID }} export_default_credentials: true - name: run scheduled javaPostcommitIOPerformanceTests script - if: github.event_name == 'schedule' #This ensures only scheduled runs publish metrics publicly + if: github.event_name == 'schedule' #This ensures only scheduled runs publish metrics publicly by changing which exportTable is configured uses: ./.github/actions/gradle-command-self-hosted-action with: gradle-command: :it:${{ matrix.test_case }} From d058ac9344f2340624fe18012f102c411462124d Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Fri, 27 Oct 2023 11:26:20 -0400 Subject: [PATCH 21/80] Add error handling base implementation & test DLQ enabled class --- .../java/org/apache/beam/sdk/Pipeline.java | 21 +++ .../beam/sdk/errorhandling/DeadLetter.java | 73 +++++++++ .../sdk/errorhandling/DeadLetterHandler.java | 91 +++++++++++ .../beam/sdk/errorhandling/ErrorHandler.java | 147 ++++++++++++++++++ .../errorhandling/DLQEnabledPTransform.java | 81 ++++++++++ .../sdk/errorhandling/ErrorHandlerTest.java | 80 ++++++++++ 6 files changed, 493 insertions(+) create mode 100644 sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/DeadLetter.java create mode 100644 sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/DeadLetterHandler.java create mode 100644 sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/ErrorHandler.java create mode 100644 sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/DLQEnabledPTransform.java create mode 100644 sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/ErrorHandlerTest.java diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/Pipeline.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/Pipeline.java index bd0215e1326..700c8521346 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/Pipeline.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/Pipeline.java @@ -22,6 +22,7 @@ import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Iterables.transform; import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; +import java.util.ArrayList; import java.util.Collection; import java.util.HashSet; import java.util.List; @@ -30,6 +31,8 @@ import javax.annotation.Nonnull; import org.apache.beam.sdk.annotations.Internal; import org.apache.beam.sdk.coders.CoderRegistry; +import org.apache.beam.sdk.errorhandling.ErrorHandler; +import org.apache.beam.sdk.errorhandling.ErrorHandler.PTransformErrorHandler; import org.apache.beam.sdk.io.Read; import org.apache.beam.sdk.options.PipelineOptions; import org.apache.beam.sdk.options.PipelineOptionsFactory; @@ -318,6 +321,7 @@ public PipelineResult run(PipelineOptions options) { LOG.debug("Running {} via {}", this, runner); try { validate(options); + validateErrorHandlers(); return runner.run(this); } catch (UserCodeException e) { // This serves to replace the stack with one that ends here and @@ -343,6 +347,12 @@ public SchemaRegistry getSchemaRegistry() { return schemaRegistry; } + public ErrorHandler registerErrorHandler(PTransform,T> sinkTransform){ + ErrorHandler errorHandler = new PTransformErrorHandler<>(sinkTransform); + errorHandlers.add(errorHandler); + return errorHandler; + } + ///////////////////////////////////////////////////////////////////////////// // Below here are operations that aren't normally called by users. @@ -511,6 +521,8 @@ public static OutputT applyTran private final Multimap> instancePerName = ArrayListMultimap.create(); private final PipelineOptions defaultOptions; + private final List> errorHandlers = new ArrayList<>(); + private Pipeline(TransformHierarchy transforms, PipelineOptions options) { this.transforms = transforms; this.defaultOptions = options; @@ -715,4 +727,13 @@ public boolean apply(@Nonnull final Map.Entry> input) { return input != null && input.getValue().size() == 1; } } + + private void validateErrorHandlers(){ + for (ErrorHandler errorHandler : errorHandlers){ + if (!errorHandler.isClosed()){ + throw new IllegalStateException("One or more ErrorHandlers aren't closed, and this pipeline" + + "cannot be run. See the ErrorHandler documentation for expected usage"); + } + } + } } diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/DeadLetter.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/DeadLetter.java new file mode 100644 index 00000000000..74e861b2798 --- /dev/null +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/DeadLetter.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.errorhandling; + +import com.google.auto.value.AutoValue; +import java.io.Serializable; +import javax.annotation.Nullable; +import org.apache.beam.sdk.schemas.AutoValueSchema; +import org.apache.beam.sdk.schemas.annotations.DefaultSchema; + +@AutoValue +@DefaultSchema(AutoValueSchema.class) +public abstract class DeadLetter implements Serializable { + + /** The failing record, encoded as JSON */ + public abstract String getHumanReadableRecord(); + + /**Nullable to account for failing to encode, or if there is no coder for the record + at the time of failure */ + @Nullable + @SuppressWarnings("mutable") + public abstract byte[] getEncodedRecord(); + + /**The coder for the record, or null if there is no coder */ + @Nullable + public abstract String getCoder(); + + /**The exception itself, e.g. IOException. Null if there is a failure without an exception*/ + @Nullable + public abstract String getException(); + + + /**The description of what was being attempted when the failure occurred*/ + public abstract String getDescription(); + + /**The particular sub-transform that failed.*/ + public abstract String getFailingTransform(); + + public static Builder builder() { + return new AutoValue_DeadLetter.Builder(); + } + + @AutoValue.Builder + public abstract static class Builder { + public abstract Builder setHumanReadableRecord(String humanReadableRecord); + + public abstract Builder setEncodedRecord(@Nullable byte[] encodedRecord); + + public abstract Builder setCoder(@Nullable String coder); + + public abstract Builder setException(@Nullable String exception); + + public abstract Builder setDescription(String description); + + public abstract Builder setFailingTransform(String failingTransform); + public abstract DeadLetter build(); + } +} diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/DeadLetterHandler.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/DeadLetterHandler.java new file mode 100644 index 00000000000..c4a9c742612 --- /dev/null +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/DeadLetterHandler.java @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.errorhandling; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.ObjectWriter; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import javax.annotation.Nullable; +import org.apache.beam.sdk.coders.Coder; +import org.apache.beam.sdk.transforms.DoFn; +import org.apache.beam.sdk.util.Preconditions; +import org.apache.beam.sdk.values.TupleTag; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public interface DeadLetterHandler { + + DeadLetterHandler throwingHandler = new ThrowingDeadLetterHandler(); + + DeadLetterHandler recordingHandler = new RecordingDeadLetterHandler(); + + TupleTag deadLetterTag = new TupleTag<>(); + + void handle(DoFn.ProcessContext c, T record,@Nullable Coder coder, @Nullable Exception exception, String description, String failingTransform) throws Exception; + + class ThrowingDeadLetterHandler implements DeadLetterHandler { + + @Override + public void handle(DoFn.ProcessContext c, T record,@Nullable Coder coder, @Nullable Exception exception, String description, String failingTransform) + throws Exception { + if (exception != null) throw exception; + } + } + + class RecordingDeadLetterHandler implements DeadLetterHandler { + + private static final Logger LOG = LoggerFactory.getLogger(RecordingDeadLetterHandler.class); + + @Override + public void handle(DoFn.ProcessContext c, T record, @Nullable Coder coder, @Nullable Exception exception, String description, String failingTransform) + throws Exception { + Preconditions.checkArgumentNotNull(record); + ObjectWriter objectWriter = new ObjectMapper().writer().withDefaultPrettyPrinter(); + + DeadLetter.Builder deadLetterBuilder = DeadLetter.builder() + .setHumanReadableRecord(objectWriter.writeValueAsString(record)) + .setDescription(description) + .setFailingTransform(failingTransform); + + //Its possible for us to want to handle an error scenario where no actual exception objet exists + if (exception != null){ + deadLetterBuilder.setException(exception.toString()); + } + + //We will sometimes not have a coder for a failing record, for example if it has already been + //modified within the dofn. + if (coder != null){ + deadLetterBuilder.setCoder(coder.toString()); + + try { + ByteArrayOutputStream stream = new ByteArrayOutputStream(); + coder.encode(record, stream); + byte[] bytes = stream.toByteArray(); + deadLetterBuilder.setEncodedRecord(bytes); + } catch (IOException e){ + LOG.error("Unable to encode failing record using provided coder." + + " DeadLetter will be published without encoded bytes", e); + } + } + DeadLetter deadLetter = deadLetterBuilder.build(); + c.output(deadLetterTag,deadLetter); + } + } + +} diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/ErrorHandler.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/ErrorHandler.java new file mode 100644 index 00000000000..042b84c2333 --- /dev/null +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/ErrorHandler.java @@ -0,0 +1,147 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.errorhandling; + +import java.util.ArrayList; +import java.util.List; +import javax.annotation.Nullable; +import org.apache.beam.sdk.annotations.Internal; +import org.apache.beam.sdk.transforms.Flatten; +import org.apache.beam.sdk.transforms.PTransform; +import org.apache.beam.sdk.util.Preconditions; +import org.apache.beam.sdk.values.PCollection; +import org.apache.beam.sdk.values.PCollectionList; +import org.apache.beam.sdk.values.POutput; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * An Error Handler is a utility object used for plumbing error PCollections to a configured sink + * Error Handlers must be closed before a pipeline is run to properly pipe error collections to the + * sink, and the pipeline will be rejected if any handlers aren't closed. + * @param The type of the error object. This will usually be a {@link DeadLetter}, but can + * be any type + * @param The return type of the sink PTransform. + *

+ * Usage of Error Handlers: + *

+ * Simple usage with one DLQ + *

{@code
+ * PCollection records = ...;
+ * try (ErrorHandler errorHandler = pipeline.registerErrorHandler(SomeSink.write())) {
+ *  PCollection results = records.apply(SomeIO.write().withDeadLetterQueue(errorHandler));
+ * }
+ * results.apply(SomeOtherTransform);
+ * }
+ * + * Usage with multiple DLQ stages + *
{@code
+ * PCollection records = ...;
+ * try (ErrorHandler errorHandler = pipeline.registerErrorHandler(SomeSink.write())) {
+ *  PCollection results = records.apply(SomeIO.write().withDeadLetterQueue(errorHandler))
+ *                        .apply(OtherTransform.builder().withDeadLetterQueue(errorHandler));
+ * }
+ * results.apply(SomeOtherTransform);
+ * }
+ */ +public interface ErrorHandler extends AutoCloseable { + + void addErrorCollection(PCollection errorCollection); + + boolean isClosed(); + + T getOutput(); + + class PTransformErrorHandler implements ErrorHandler { + + private static final Logger LOG = LoggerFactory.getLogger(PTransformErrorHandler.class); + private final PTransform, T> sinkTransform; + + private final List> errorCollections = new ArrayList<>(); + + @Nullable + private T sinkOutput = null; + + private boolean closed = false; + + /** + * Constructs a new ErrorHandler, but should not be called directly. Instead, call + * pipeline.registerErrorHandler to ensure safe pipeline construction + */ + @Internal + public PTransformErrorHandler(PTransform, T> sinkTransform) { + this.sinkTransform = sinkTransform; + } + + @Override + public void addErrorCollection(PCollection errorCollection) { + errorCollections.add(errorCollection); + } + + @Override + public boolean isClosed() { + return closed; + } + + @Override + public T getOutput() { + if (!this.isClosed()) { + throw new IllegalStateException( + "ErrorHandler must be finalized before the output can be returned"); + } + //make the static analysis checker happy + Preconditions.checkArgumentNotNull(sinkOutput); + return sinkOutput; + } + + @Override + public void close() { + if (errorCollections.isEmpty()) { + LOG.warn("Empty list of error pcollections passed to ErrorHandler."); + return; + } + sinkOutput = PCollectionList.of(errorCollections) + .apply(Flatten.pCollections()) + .apply(sinkTransform); + closed = true; + } + } + + @Internal + class NoOpErrorHandler implements ErrorHandler { + + @Override + public void addErrorCollection(PCollection errorCollection) { + } + + @Override + public boolean isClosed() { + throw new IllegalArgumentException("No Op handler should not be closed"); + } + + @Override + public T getOutput() { + throw new IllegalArgumentException("No Op handler has no output"); + } + + @Override + public void close() { + throw new IllegalArgumentException("No Op handler should not be closed"); + } + } +} diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/DLQEnabledPTransform.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/DLQEnabledPTransform.java new file mode 100644 index 00000000000..d6d8d1bd687 --- /dev/null +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/DLQEnabledPTransform.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.errorhandling; + +import org.apache.beam.sdk.coders.BigEndianIntegerCoder; +import org.apache.beam.sdk.errorhandling.ErrorHandler.NoOpErrorHandler; +import org.apache.beam.sdk.transforms.DoFn; +import org.apache.beam.sdk.transforms.PTransform; +import org.apache.beam.sdk.transforms.ParDo; +import org.apache.beam.sdk.values.PCollection; +import org.apache.beam.sdk.values.PCollectionTuple; +import org.apache.beam.sdk.values.TupleTag; +import org.apache.beam.sdk.values.TupleTagList; + +/** + * Dummy PTransform that is configurable with a DLQ + */ +public class DLQEnabledPTransform extends PTransform, PCollection> { + + private ErrorHandler errorHandler = new NoOpErrorHandler<>(); + + private DeadLetterHandler deadLetterHandler = DeadLetterHandler.throwingHandler; + + private static final TupleTag RECORDS = new TupleTag<>(); + + public DLQEnabledPTransform() { + } + + public DLQEnabledPTransform withDeadLetterQueue(ErrorHandler errorHandler) { + this.errorHandler = errorHandler; + this.deadLetterHandler = DeadLetterHandler.recordingHandler; + return this; + } + + @Override + public PCollection expand(PCollection input) { + PCollectionTuple pCollectionTuple = + input.apply("NoOpDoFn", ParDo.of(new NoOpDoFn(deadLetterHandler)) + .withOutputTags(RECORDS, TupleTagList.of(DeadLetterHandler.deadLetterTag))); + + errorHandler.addErrorCollection(pCollectionTuple.get(DeadLetterHandler.deadLetterTag)); + + return pCollectionTuple.get(RECORDS); + } + + + public static class NoOpDoFn extends DoFn { + + private DeadLetterHandler deadLetterHandler; + + public NoOpDoFn(DeadLetterHandler deadLetterHandler){ + this.deadLetterHandler = deadLetterHandler; + } + + @ProcessElement + public void processElement(ProcessContext context) throws Exception { + Integer element = context.element(); + if (element % 2 == 0) { + context.output(element); + } else { + deadLetterHandler.handle(context,element, BigEndianIntegerCoder.of(), new RuntimeException(), "Integer was odd", "NoOpDoFn"); + } + + } + } +} diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/ErrorHandlerTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/ErrorHandlerTest.java new file mode 100644 index 00000000000..31157f0ea21 --- /dev/null +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/ErrorHandlerTest.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.errorhandling; + +import org.apache.beam.sdk.testing.TestPipeline; +import org.apache.beam.sdk.transforms.Create; +import org.apache.beam.sdk.transforms.PTransform; +import org.apache.beam.sdk.values.PCollection; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +@RunWith(JUnit4.class) +public class ErrorHandlerTest { + @Rule + public final TestPipeline pipeline = TestPipeline.create(); + @Rule + public ExpectedException thrown = ExpectedException.none(); + + + @Test + public void testGoodErrorHandlerUsage() throws Exception { + try (ErrorHandler> eh = pipeline.registerErrorHandler(new DummySinkTransform<>())){ + + } + + pipeline.run(); + } + + @Test + public void testBadErrorHandlerUsage(){ + + pipeline.registerErrorHandler(new DummySinkTransform>()); + + thrown.expect(IllegalStateException.class); + + pipeline.run(); + } + + @Test + public void testDLQEnabledPTransform(){ + PCollection record = pipeline.apply(Create.of(1,2,3,4)); + record.apply(new DLQEnabledPTransform()); + + pipeline.run(); + } + @Test + public void testErrorHandlerWithDLQTransform() throws Exception { + PCollection record = pipeline.apply(Create.of(1,2,3,4)); + try (ErrorHandler> eh = pipeline.registerErrorHandler(new DummySinkTransform<>())){ + record.apply(new DLQEnabledPTransform().withDeadLetterQueue(eh)); + } + + pipeline.run(); + } + public static class DummySinkTransform> extends PTransform { + + @Override + public T expand(T input) { + return input; + } + } +} From 8c9dd945ea5944c33489aa731324fa4cbc3cf745 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Fri, 27 Oct 2023 14:05:52 -0400 Subject: [PATCH 22/80] Add test cases --- .../beam/sdk/errorhandling/DeadLetter.java | 3 +- .../sdk/errorhandling/DeadLetterHandler.java | 4 +- .../beam/sdk/errorhandling/ErrorHandler.java | 2 +- .../errorhandling/DLQEnabledPTransform.java | 24 ++- .../errorhandling/DeadLetterHandlerTest.java | 140 ++++++++++++++++++ .../sdk/errorhandling/ErrorHandlerTest.java | 5 + 6 files changed, 172 insertions(+), 6 deletions(-) create mode 100644 sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/DeadLetterHandlerTest.java diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/DeadLetter.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/DeadLetter.java index 74e861b2798..8d33abbc577 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/DeadLetter.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/DeadLetter.java @@ -44,7 +44,6 @@ public abstract class DeadLetter implements Serializable { @Nullable public abstract String getException(); - /**The description of what was being attempted when the failure occurred*/ public abstract String getDescription(); @@ -59,6 +58,7 @@ public static Builder builder() { public abstract static class Builder { public abstract Builder setHumanReadableRecord(String humanReadableRecord); + @SuppressWarnings("mutable") public abstract Builder setEncodedRecord(@Nullable byte[] encodedRecord); public abstract Builder setCoder(@Nullable String coder); @@ -68,6 +68,7 @@ public abstract static class Builder { public abstract Builder setDescription(String description); public abstract Builder setFailingTransform(String failingTransform); + public abstract DeadLetter build(); } } diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/DeadLetterHandler.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/DeadLetterHandler.java index c4a9c742612..71b32045ed8 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/DeadLetterHandler.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/DeadLetterHandler.java @@ -37,12 +37,12 @@ public interface DeadLetterHandler { TupleTag deadLetterTag = new TupleTag<>(); - void handle(DoFn.ProcessContext c, T record,@Nullable Coder coder, @Nullable Exception exception, String description, String failingTransform) throws Exception; + void handle(DoFn.ProcessContext c, T record, @Nullable Coder coder, @Nullable Exception exception, String description, String failingTransform) throws Exception; class ThrowingDeadLetterHandler implements DeadLetterHandler { @Override - public void handle(DoFn.ProcessContext c, T record,@Nullable Coder coder, @Nullable Exception exception, String description, String failingTransform) + public void handle(DoFn.ProcessContext c, T record, @Nullable Coder coder, @Nullable Exception exception, String description, String failingTransform) throws Exception { if (exception != null) throw exception; } diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/ErrorHandler.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/ErrorHandler.java index 042b84c2333..f558bc946f1 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/ErrorHandler.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/ErrorHandler.java @@ -111,6 +111,7 @@ public T getOutput() { @Override public void close() { + closed = true; if (errorCollections.isEmpty()) { LOG.warn("Empty list of error pcollections passed to ErrorHandler."); return; @@ -118,7 +119,6 @@ public void close() { sinkOutput = PCollectionList.of(errorCollections) .apply(Flatten.pCollections()) .apply(sinkTransform); - closed = true; } } diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/DLQEnabledPTransform.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/DLQEnabledPTransform.java index d6d8d1bd687..82097b3a2e2 100644 --- a/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/DLQEnabledPTransform.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/DLQEnabledPTransform.java @@ -18,7 +18,11 @@ package org.apache.beam.sdk.errorhandling; import org.apache.beam.sdk.coders.BigEndianIntegerCoder; +import org.apache.beam.sdk.coders.Coder; import org.apache.beam.sdk.errorhandling.ErrorHandler.NoOpErrorHandler; +import org.apache.beam.sdk.schemas.NoSuchSchemaException; +import org.apache.beam.sdk.schemas.SchemaCoder; +import org.apache.beam.sdk.schemas.SchemaRegistry; import org.apache.beam.sdk.transforms.DoFn; import org.apache.beam.sdk.transforms.PTransform; import org.apache.beam.sdk.transforms.ParDo; @@ -26,6 +30,7 @@ import org.apache.beam.sdk.values.PCollectionTuple; import org.apache.beam.sdk.values.TupleTag; import org.apache.beam.sdk.values.TupleTagList; +import org.apache.beam.sdk.values.TypeDescriptor; /** * Dummy PTransform that is configurable with a DLQ @@ -53,9 +58,24 @@ public PCollection expand(PCollection input) { input.apply("NoOpDoFn", ParDo.of(new NoOpDoFn(deadLetterHandler)) .withOutputTags(RECORDS, TupleTagList.of(DeadLetterHandler.deadLetterTag))); - errorHandler.addErrorCollection(pCollectionTuple.get(DeadLetterHandler.deadLetterTag)); + Coder deadLetterCoder; + + try { + SchemaRegistry schemaRegistry = input.getPipeline().getSchemaRegistry(); + deadLetterCoder = + SchemaCoder.of( + schemaRegistry.getSchema(DeadLetter.class), + TypeDescriptor.of(DeadLetter.class), + schemaRegistry.getToRowFunction(DeadLetter.class), + schemaRegistry.getFromRowFunction(DeadLetter.class)); + } catch (NoSuchSchemaException e) { + throw new RuntimeException(e); + } + + errorHandler.addErrorCollection( + pCollectionTuple.get(DeadLetterHandler.deadLetterTag).setCoder(deadLetterCoder)); - return pCollectionTuple.get(RECORDS); + return pCollectionTuple.get(RECORDS).setCoder(BigEndianIntegerCoder.of()); } diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/DeadLetterHandlerTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/DeadLetterHandlerTest.java new file mode 100644 index 00000000000..919d4a62900 --- /dev/null +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/DeadLetterHandlerTest.java @@ -0,0 +1,140 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.errorhandling; + +import static org.apache.beam.sdk.errorhandling.DeadLetterHandler.deadLetterTag; +import static org.mockito.Mockito.verify; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.List; +import org.apache.beam.sdk.coders.BigEndianIntegerCoder; +import org.apache.beam.sdk.coders.Coder; +import org.apache.beam.sdk.coders.CoderException; +import org.apache.beam.sdk.transforms.DoFn.ProcessContext; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; +import org.mockito.Mock; +import org.mockito.junit.MockitoJUnit; +import org.mockito.junit.MockitoRule; + +@RunWith(JUnit4.class) +public class DeadLetterHandlerTest { + + @Rule + public final MockitoRule mockito = MockitoJUnit.rule(); + + @Rule + public ExpectedException thrown = ExpectedException.none(); + + @Mock + private ProcessContext processContext; + + @Test + public void testThrowingHandlerWithException() throws Exception { + DeadLetterHandler handler = DeadLetterHandler.throwingHandler; + + thrown.expect(RuntimeException.class); + + handler.handle(processContext,new Object(),null, new RuntimeException(), "desc", "transform"); + } + + @Test + public void testThrowingHandlerWithNoException() throws Exception { + DeadLetterHandler handler = DeadLetterHandler.throwingHandler; + + handler.handle(processContext,new Object(),null, null, "desc", "transform"); + } + + @Test + public void testRecordingHandler() throws Exception { + DeadLetterHandler handler = DeadLetterHandler.recordingHandler; + + handler.handle(processContext,5, BigEndianIntegerCoder.of(), new RuntimeException(), "desc", "transform"); + + DeadLetter expected = DeadLetter.builder() + .setHumanReadableRecord("5") + .setEncodedRecord(new byte[]{0, 0, 0, 5}) + .setCoder("BigEndianIntegerCoder") + .setException("java.lang.RuntimeException") + .setDescription("desc") + .setFailingTransform("transform") + .build(); + + verify(processContext).output(deadLetterTag, expected); + } + + @Test + public void testNoCoder() throws Exception { + DeadLetterHandler handler = DeadLetterHandler.recordingHandler; + + handler.handle(processContext,5, null, new RuntimeException(), "desc", "transform"); + + DeadLetter expected = DeadLetter.builder() + .setHumanReadableRecord("5") + .setException("java.lang.RuntimeException") + .setDescription("desc") + .setFailingTransform("transform") + .build(); + + verify(processContext).output(deadLetterTag, expected); + } + + @Test + public void testFailingCoder() throws Exception { + DeadLetterHandler handler = DeadLetterHandler.recordingHandler; + + Coder failingCoder = new Coder() { + @Override + public void encode(Integer value, OutputStream outStream) throws CoderException, IOException { + throw new IOException(); + } + + @Override + public Integer decode(InputStream inStream) throws CoderException, IOException { + return null; + } + + @Override + public List> getCoderArguments() { + return null; + } + + @Override + public void verifyDeterministic() throws NonDeterministicException { + + } + }; + + handler.handle(processContext,5, failingCoder, new RuntimeException(), "desc", "transform"); + + DeadLetter expected = DeadLetter.builder() + .setHumanReadableRecord("5") + .setCoder(failingCoder.toString()) + .setException("java.lang.RuntimeException") + .setDescription("desc") + .setFailingTransform("transform") + .build(); + + verify(processContext).output(deadLetterTag, expected); + } +} diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/ErrorHandlerTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/ErrorHandlerTest.java index 31157f0ea21..32afee45a15 100644 --- a/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/ErrorHandlerTest.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/ErrorHandlerTest.java @@ -17,12 +17,14 @@ */ package org.apache.beam.sdk.errorhandling; +import org.apache.beam.sdk.testing.NeedsRunner; import org.apache.beam.sdk.testing.TestPipeline; import org.apache.beam.sdk.transforms.Create; import org.apache.beam.sdk.transforms.PTransform; import org.apache.beam.sdk.values.PCollection; import org.junit.Rule; import org.junit.Test; +import org.junit.experimental.categories.Category; import org.junit.rules.ExpectedException; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; @@ -36,6 +38,7 @@ public class ErrorHandlerTest { @Test + @Category(NeedsRunner.class) public void testGoodErrorHandlerUsage() throws Exception { try (ErrorHandler> eh = pipeline.registerErrorHandler(new DummySinkTransform<>())){ @@ -55,6 +58,7 @@ public void testBadErrorHandlerUsage(){ } @Test + @Category(NeedsRunner.class) public void testDLQEnabledPTransform(){ PCollection record = pipeline.apply(Create.of(1,2,3,4)); record.apply(new DLQEnabledPTransform()); @@ -62,6 +66,7 @@ public void testDLQEnabledPTransform(){ pipeline.run(); } @Test + @Category(NeedsRunner.class) public void testErrorHandlerWithDLQTransform() throws Exception { PCollection record = pipeline.apply(Create.of(1,2,3,4)); try (ErrorHandler> eh = pipeline.registerErrorHandler(new DummySinkTransform<>())){ From 4d23fe84a47e73fb92641ea06875828ec2064568 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Fri, 27 Oct 2023 16:31:36 -0400 Subject: [PATCH 23/80] apply spotless --- .../java/org/apache/beam/sdk/Pipeline.java | 18 +-- .../beam/sdk/errorhandling/DeadLetter.java | 14 ++- .../sdk/errorhandling/DeadLetterHandler.java | 56 ++++++--- .../beam/sdk/errorhandling/ErrorHandler.java | 35 +++--- .../errorhandling/DLQEnabledPTransform.java | 25 ++-- .../errorhandling/DeadLetterHandlerTest.java | 111 +++++++++--------- .../sdk/errorhandling/ErrorHandlerTest.java | 27 ++--- 7 files changed, 155 insertions(+), 131 deletions(-) diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/Pipeline.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/Pipeline.java index 700c8521346..19b7d9ded5a 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/Pipeline.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/Pipeline.java @@ -347,8 +347,9 @@ public SchemaRegistry getSchemaRegistry() { return schemaRegistry; } - public ErrorHandler registerErrorHandler(PTransform,T> sinkTransform){ - ErrorHandler errorHandler = new PTransformErrorHandler<>(sinkTransform); + public ErrorHandler registerErrorHandler( + PTransform, T> sinkTransform) { + ErrorHandler errorHandler = new PTransformErrorHandler<>(sinkTransform); errorHandlers.add(errorHandler); return errorHandler; } @@ -521,7 +522,7 @@ public static OutputT applyTran private final Multimap> instancePerName = ArrayListMultimap.create(); private final PipelineOptions defaultOptions; - private final List> errorHandlers = new ArrayList<>(); + private final List> errorHandlers = new ArrayList<>(); private Pipeline(TransformHierarchy transforms, PipelineOptions options) { this.transforms = transforms; @@ -728,11 +729,12 @@ public boolean apply(@Nonnull final Map.Entry> input) { } } - private void validateErrorHandlers(){ - for (ErrorHandler errorHandler : errorHandlers){ - if (!errorHandler.isClosed()){ - throw new IllegalStateException("One or more ErrorHandlers aren't closed, and this pipeline" - + "cannot be run. See the ErrorHandler documentation for expected usage"); + private void validateErrorHandlers() { + for (ErrorHandler errorHandler : errorHandlers) { + if (!errorHandler.isClosed()) { + throw new IllegalStateException( + "One or more ErrorHandlers aren't closed, and this pipeline" + + "cannot be run. See the ErrorHandler documentation for expected usage"); } } } diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/DeadLetter.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/DeadLetter.java index 8d33abbc577..f09f0be6355 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/DeadLetter.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/DeadLetter.java @@ -30,24 +30,26 @@ public abstract class DeadLetter implements Serializable { /** The failing record, encoded as JSON */ public abstract String getHumanReadableRecord(); - /**Nullable to account for failing to encode, or if there is no coder for the record - at the time of failure */ + /** + * Nullable to account for failing to encode, or if there is no coder for the record at the time + * of failure + */ @Nullable @SuppressWarnings("mutable") public abstract byte[] getEncodedRecord(); - /**The coder for the record, or null if there is no coder */ + /** The coder for the record, or null if there is no coder */ @Nullable public abstract String getCoder(); - /**The exception itself, e.g. IOException. Null if there is a failure without an exception*/ + /** The exception itself, e.g. IOException. Null if there is a failure without an exception */ @Nullable public abstract String getException(); - /**The description of what was being attempted when the failure occurred*/ + /** The description of what was being attempted when the failure occurred */ public abstract String getDescription(); - /**The particular sub-transform that failed.*/ + /** The particular sub-transform that failed. */ public abstract String getFailingTransform(); public static Builder builder() { diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/DeadLetterHandler.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/DeadLetterHandler.java index 71b32045ed8..51625f50fd5 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/DeadLetterHandler.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/DeadLetterHandler.java @@ -37,12 +37,25 @@ public interface DeadLetterHandler { TupleTag deadLetterTag = new TupleTag<>(); - void handle(DoFn.ProcessContext c, T record, @Nullable Coder coder, @Nullable Exception exception, String description, String failingTransform) throws Exception; + void handle( + DoFn.ProcessContext c, + T record, + @Nullable Coder coder, + @Nullable Exception exception, + String description, + String failingTransform) + throws Exception; class ThrowingDeadLetterHandler implements DeadLetterHandler { @Override - public void handle(DoFn.ProcessContext c, T record, @Nullable Coder coder, @Nullable Exception exception, String description, String failingTransform) + public void handle( + DoFn.ProcessContext c, + T record, + @Nullable Coder coder, + @Nullable Exception exception, + String description, + String failingTransform) throws Exception { if (exception != null) throw exception; } @@ -53,24 +66,32 @@ class RecordingDeadLetterHandler implements DeadLetterHandler { private static final Logger LOG = LoggerFactory.getLogger(RecordingDeadLetterHandler.class); @Override - public void handle(DoFn.ProcessContext c, T record, @Nullable Coder coder, @Nullable Exception exception, String description, String failingTransform) + public void handle( + DoFn.ProcessContext c, + T record, + @Nullable Coder coder, + @Nullable Exception exception, + String description, + String failingTransform) throws Exception { Preconditions.checkArgumentNotNull(record); ObjectWriter objectWriter = new ObjectMapper().writer().withDefaultPrettyPrinter(); - DeadLetter.Builder deadLetterBuilder = DeadLetter.builder() - .setHumanReadableRecord(objectWriter.writeValueAsString(record)) - .setDescription(description) - .setFailingTransform(failingTransform); + DeadLetter.Builder deadLetterBuilder = + DeadLetter.builder() + .setHumanReadableRecord(objectWriter.writeValueAsString(record)) + .setDescription(description) + .setFailingTransform(failingTransform); - //Its possible for us to want to handle an error scenario where no actual exception objet exists - if (exception != null){ + // Its possible for us to want to handle an error scenario where no actual exception objet + // exists + if (exception != null) { deadLetterBuilder.setException(exception.toString()); } - //We will sometimes not have a coder for a failing record, for example if it has already been - //modified within the dofn. - if (coder != null){ + // We will sometimes not have a coder for a failing record, for example if it has already been + // modified within the dofn. + if (coder != null) { deadLetterBuilder.setCoder(coder.toString()); try { @@ -78,14 +99,15 @@ public void handle(DoFn.ProcessContext c, T record, @Nullable Coder coder.encode(record, stream); byte[] bytes = stream.toByteArray(); deadLetterBuilder.setEncodedRecord(bytes); - } catch (IOException e){ - LOG.error("Unable to encode failing record using provided coder." - + " DeadLetter will be published without encoded bytes", e); + } catch (IOException e) { + LOG.error( + "Unable to encode failing record using provided coder." + + " DeadLetter will be published without encoded bytes", + e); } } DeadLetter deadLetter = deadLetterBuilder.build(); - c.output(deadLetterTag,deadLetter); + c.output(deadLetterTag, deadLetter); } } - } diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/ErrorHandler.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/ErrorHandler.java index f558bc946f1..4d00b9a2f4d 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/ErrorHandler.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/ErrorHandler.java @@ -34,23 +34,21 @@ * An Error Handler is a utility object used for plumbing error PCollections to a configured sink * Error Handlers must be closed before a pipeline is run to properly pipe error collections to the * sink, and the pipeline will be rejected if any handlers aren't closed. - * @param The type of the error object. This will usually be a {@link DeadLetter}, but can - * be any type + * + * @param The type of the error object. This will usually be a {@link DeadLetter}, but can be + * any type * @param The return type of the sink PTransform. - *

- * Usage of Error Handlers: - *

- * Simple usage with one DLQ - *

{@code
+ *     

Usage of Error Handlers: + *

Simple usage with one DLQ + *

{@code
  * PCollection records = ...;
  * try (ErrorHandler errorHandler = pipeline.registerErrorHandler(SomeSink.write())) {
  *  PCollection results = records.apply(SomeIO.write().withDeadLetterQueue(errorHandler));
  * }
  * results.apply(SomeOtherTransform);
  * }
- * - * Usage with multiple DLQ stages - *
{@code
+ *     Usage with multiple DLQ stages
+ *     
{@code
  * PCollection records = ...;
  * try (ErrorHandler errorHandler = pipeline.registerErrorHandler(SomeSink.write())) {
  *  PCollection results = records.apply(SomeIO.write().withDeadLetterQueue(errorHandler))
@@ -67,15 +65,14 @@ public interface ErrorHandler extends AutoCloseable {
 
   T getOutput();
 
-  class PTransformErrorHandler implements ErrorHandler {
+  class PTransformErrorHandler implements ErrorHandler {
 
     private static final Logger LOG = LoggerFactory.getLogger(PTransformErrorHandler.class);
     private final PTransform, T> sinkTransform;
 
     private final List> errorCollections = new ArrayList<>();
 
-    @Nullable
-    private T sinkOutput = null;
+    @Nullable private T sinkOutput = null;
 
     private boolean closed = false;
 
@@ -104,7 +101,7 @@ public T getOutput() {
         throw new IllegalStateException(
             "ErrorHandler must be finalized before the output can be returned");
       }
-      //make the static analysis checker happy
+      // make the static analysis checker happy
       Preconditions.checkArgumentNotNull(sinkOutput);
       return sinkOutput;
     }
@@ -116,18 +113,16 @@ public void close() {
         LOG.warn("Empty list of error pcollections passed to ErrorHandler.");
         return;
       }
-      sinkOutput = PCollectionList.of(errorCollections)
-          .apply(Flatten.pCollections())
-          .apply(sinkTransform);
+      sinkOutput =
+          PCollectionList.of(errorCollections).apply(Flatten.pCollections()).apply(sinkTransform);
     }
   }
 
   @Internal
-  class NoOpErrorHandler implements ErrorHandler {
+  class NoOpErrorHandler implements ErrorHandler {
 
     @Override
-    public void addErrorCollection(PCollection errorCollection) {
-    }
+    public void addErrorCollection(PCollection errorCollection) {}
 
     @Override
     public boolean isClosed() {
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/DLQEnabledPTransform.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/DLQEnabledPTransform.java
index 82097b3a2e2..36606dc6631 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/DLQEnabledPTransform.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/DLQEnabledPTransform.java
@@ -32,9 +32,7 @@
 import org.apache.beam.sdk.values.TupleTagList;
 import org.apache.beam.sdk.values.TypeDescriptor;
 
-/**
- * Dummy PTransform that is configurable with a DLQ
- */
+/** Dummy PTransform that is configurable with a DLQ */
 public class DLQEnabledPTransform extends PTransform, PCollection> {
 
   private ErrorHandler errorHandler = new NoOpErrorHandler<>();
@@ -43,8 +41,7 @@ public class DLQEnabledPTransform extends PTransform, PColl
 
   private static final TupleTag RECORDS = new TupleTag<>();
 
-  public DLQEnabledPTransform() {
-  }
+  public DLQEnabledPTransform() {}
 
   public DLQEnabledPTransform withDeadLetterQueue(ErrorHandler errorHandler) {
     this.errorHandler = errorHandler;
@@ -55,8 +52,10 @@ public DLQEnabledPTransform withDeadLetterQueue(ErrorHandler erro
   @Override
   public PCollection expand(PCollection input) {
     PCollectionTuple pCollectionTuple =
-        input.apply("NoOpDoFn", ParDo.of(new NoOpDoFn(deadLetterHandler))
-            .withOutputTags(RECORDS, TupleTagList.of(DeadLetterHandler.deadLetterTag)));
+        input.apply(
+            "NoOpDoFn",
+            ParDo.of(new NoOpDoFn(deadLetterHandler))
+                .withOutputTags(RECORDS, TupleTagList.of(DeadLetterHandler.deadLetterTag)));
 
     Coder deadLetterCoder;
 
@@ -78,12 +77,11 @@ public PCollection expand(PCollection input) {
     return pCollectionTuple.get(RECORDS).setCoder(BigEndianIntegerCoder.of());
   }
 
-
   public static class NoOpDoFn extends DoFn {
 
     private DeadLetterHandler deadLetterHandler;
 
-    public NoOpDoFn(DeadLetterHandler deadLetterHandler){
+    public NoOpDoFn(DeadLetterHandler deadLetterHandler) {
       this.deadLetterHandler = deadLetterHandler;
     }
 
@@ -93,9 +91,14 @@ public void processElement(ProcessContext context) throws Exception {
       if (element % 2 == 0) {
         context.output(element);
       } else {
-        deadLetterHandler.handle(context,element, BigEndianIntegerCoder.of(), new RuntimeException(), "Integer was odd", "NoOpDoFn");
+        deadLetterHandler.handle(
+            context,
+            element,
+            BigEndianIntegerCoder.of(),
+            new RuntimeException(),
+            "Integer was odd",
+            "NoOpDoFn");
       }
-
     }
   }
 }
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/DeadLetterHandlerTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/DeadLetterHandlerTest.java
index 919d4a62900..e8207a98515 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/DeadLetterHandlerTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/DeadLetterHandlerTest.java
@@ -40,14 +40,11 @@
 @RunWith(JUnit4.class)
 public class DeadLetterHandlerTest {
 
-  @Rule
-  public final MockitoRule mockito = MockitoJUnit.rule();
+  @Rule public final MockitoRule mockito = MockitoJUnit.rule();
 
-  @Rule
-  public ExpectedException thrown = ExpectedException.none();
+  @Rule public ExpectedException thrown = ExpectedException.none();
 
-  @Mock
-  private ProcessContext processContext;
+  @Mock private ProcessContext processContext;
 
   @Test
   public void testThrowingHandlerWithException() throws Exception {
@@ -55,30 +52,32 @@ public void testThrowingHandlerWithException() throws Exception {
 
     thrown.expect(RuntimeException.class);
 
-    handler.handle(processContext,new Object(),null, new RuntimeException(), "desc", "transform");
+    handler.handle(processContext, new Object(), null, new RuntimeException(), "desc", "transform");
   }
 
   @Test
   public void testThrowingHandlerWithNoException() throws Exception {
     DeadLetterHandler handler = DeadLetterHandler.throwingHandler;
 
-    handler.handle(processContext,new Object(),null, null, "desc", "transform");
+    handler.handle(processContext, new Object(), null, null, "desc", "transform");
   }
 
   @Test
   public void testRecordingHandler() throws Exception {
     DeadLetterHandler handler = DeadLetterHandler.recordingHandler;
 
-    handler.handle(processContext,5, BigEndianIntegerCoder.of(), new RuntimeException(), "desc", "transform");
+    handler.handle(
+        processContext, 5, BigEndianIntegerCoder.of(), new RuntimeException(), "desc", "transform");
 
-    DeadLetter expected = DeadLetter.builder()
-        .setHumanReadableRecord("5")
-        .setEncodedRecord(new byte[]{0, 0, 0, 5})
-        .setCoder("BigEndianIntegerCoder")
-        .setException("java.lang.RuntimeException")
-        .setDescription("desc")
-        .setFailingTransform("transform")
-        .build();
+    DeadLetter expected =
+        DeadLetter.builder()
+            .setHumanReadableRecord("5")
+            .setEncodedRecord(new byte[] {0, 0, 0, 5})
+            .setCoder("BigEndianIntegerCoder")
+            .setException("java.lang.RuntimeException")
+            .setDescription("desc")
+            .setFailingTransform("transform")
+            .build();
 
     verify(processContext).output(deadLetterTag, expected);
   }
@@ -87,14 +86,15 @@ public void testRecordingHandler() throws Exception {
   public void testNoCoder() throws Exception {
     DeadLetterHandler handler = DeadLetterHandler.recordingHandler;
 
-    handler.handle(processContext,5, null, new RuntimeException(), "desc", "transform");
+    handler.handle(processContext, 5, null, new RuntimeException(), "desc", "transform");
 
-    DeadLetter expected = DeadLetter.builder()
-        .setHumanReadableRecord("5")
-        .setException("java.lang.RuntimeException")
-        .setDescription("desc")
-        .setFailingTransform("transform")
-        .build();
+    DeadLetter expected =
+        DeadLetter.builder()
+            .setHumanReadableRecord("5")
+            .setException("java.lang.RuntimeException")
+            .setDescription("desc")
+            .setFailingTransform("transform")
+            .build();
 
     verify(processContext).output(deadLetterTag, expected);
   }
@@ -103,37 +103,38 @@ public void testNoCoder() throws Exception {
   public void testFailingCoder() throws Exception {
     DeadLetterHandler handler = DeadLetterHandler.recordingHandler;
 
-    Coder failingCoder = new Coder() {
-      @Override
-      public void encode(Integer value, OutputStream outStream) throws CoderException, IOException {
-        throw new IOException();
-      }
-
-      @Override
-      public Integer decode(InputStream inStream) throws CoderException, IOException {
-        return null;
-      }
-
-      @Override
-      public List> getCoderArguments() {
-        return null;
-      }
-
-      @Override
-      public void verifyDeterministic() throws NonDeterministicException {
-
-      }
-    };
-
-    handler.handle(processContext,5, failingCoder, new RuntimeException(), "desc", "transform");
-
-    DeadLetter expected = DeadLetter.builder()
-        .setHumanReadableRecord("5")
-        .setCoder(failingCoder.toString())
-        .setException("java.lang.RuntimeException")
-        .setDescription("desc")
-        .setFailingTransform("transform")
-        .build();
+    Coder failingCoder =
+        new Coder() {
+          @Override
+          public void encode(Integer value, OutputStream outStream)
+              throws CoderException, IOException {
+            throw new IOException();
+          }
+
+          @Override
+          public Integer decode(InputStream inStream) throws CoderException, IOException {
+            return null;
+          }
+
+          @Override
+          public List> getCoderArguments() {
+            return null;
+          }
+
+          @Override
+          public void verifyDeterministic() throws NonDeterministicException {}
+        };
+
+    handler.handle(processContext, 5, failingCoder, new RuntimeException(), "desc", "transform");
+
+    DeadLetter expected =
+        DeadLetter.builder()
+            .setHumanReadableRecord("5")
+            .setCoder(failingCoder.toString())
+            .setException("java.lang.RuntimeException")
+            .setDescription("desc")
+            .setFailingTransform("transform")
+            .build();
 
     verify(processContext).output(deadLetterTag, expected);
   }
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/ErrorHandlerTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/ErrorHandlerTest.java
index 32afee45a15..2c9763191c4 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/ErrorHandlerTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/ErrorHandlerTest.java
@@ -31,24 +31,20 @@
 
 @RunWith(JUnit4.class)
 public class ErrorHandlerTest {
-  @Rule
-  public final TestPipeline pipeline = TestPipeline.create();
-  @Rule
-  public ExpectedException thrown = ExpectedException.none();
-
+  @Rule public final TestPipeline pipeline = TestPipeline.create();
+  @Rule public ExpectedException thrown = ExpectedException.none();
 
   @Test
   @Category(NeedsRunner.class)
   public void testGoodErrorHandlerUsage() throws Exception {
-    try (ErrorHandler> eh = pipeline.registerErrorHandler(new DummySinkTransform<>())){
-
-    }
+    try (ErrorHandler> eh =
+        pipeline.registerErrorHandler(new DummySinkTransform<>())) {}
 
     pipeline.run();
   }
 
   @Test
-  public void testBadErrorHandlerUsage(){
+  public void testBadErrorHandlerUsage() {
 
     pipeline.registerErrorHandler(new DummySinkTransform>());
 
@@ -59,23 +55,26 @@ public void testBadErrorHandlerUsage(){
 
   @Test
   @Category(NeedsRunner.class)
-  public void testDLQEnabledPTransform(){
-    PCollection record = pipeline.apply(Create.of(1,2,3,4));
+  public void testDLQEnabledPTransform() {
+    PCollection record = pipeline.apply(Create.of(1, 2, 3, 4));
     record.apply(new DLQEnabledPTransform());
 
     pipeline.run();
   }
+
   @Test
   @Category(NeedsRunner.class)
   public void testErrorHandlerWithDLQTransform() throws Exception {
-    PCollection record = pipeline.apply(Create.of(1,2,3,4));
-    try (ErrorHandler> eh = pipeline.registerErrorHandler(new DummySinkTransform<>())){
+    PCollection record = pipeline.apply(Create.of(1, 2, 3, 4));
+    try (ErrorHandler> eh =
+        pipeline.registerErrorHandler(new DummySinkTransform<>())) {
       record.apply(new DLQEnabledPTransform().withDeadLetterQueue(eh));
     }
 
     pipeline.run();
   }
-  public static class DummySinkTransform> extends PTransform {
+
+  public static class DummySinkTransform> extends PTransform {
 
     @Override
     public T expand(T input) {

From 31432b71643ed9f54fc5b6a698fb39aae53d5a92 Mon Sep 17 00:00:00 2001
From: johnjcasey 
Date: Mon, 30 Oct 2023 10:48:32 -0400
Subject: [PATCH 24/80] Fix Checkstyles

---
 .../java/org/apache/beam/sdk/Pipeline.java    |  6 ++---
 .../beam/sdk/errorhandling/DeadLetter.java    | 10 ++++----
 .../sdk/errorhandling/DeadLetterHandler.java  | 12 ++++++----
 .../beam/sdk/errorhandling/ErrorHandler.java  | 20 ++++++++--------
 .../beam/sdk/errorhandling/package-info.java  | 23 +++++++++++++++++++
 .../errorhandling/DLQEnabledPTransform.java   |  8 +++----
 .../errorhandling/DeadLetterHandlerTest.java  | 18 +++++++--------
 7 files changed, 61 insertions(+), 36 deletions(-)
 create mode 100644 sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/package-info.java

diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/Pipeline.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/Pipeline.java
index 19b7d9ded5a..b37289c893f 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/Pipeline.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/Pipeline.java
@@ -347,9 +347,9 @@ public SchemaRegistry getSchemaRegistry() {
     return schemaRegistry;
   }
 
-  public  ErrorHandler registerErrorHandler(
-      PTransform, T> sinkTransform) {
-    ErrorHandler errorHandler = new PTransformErrorHandler<>(sinkTransform);
+  public  ErrorHandler registerErrorHandler(
+      PTransform, T> sinkTransform) {
+    ErrorHandler errorHandler = new PTransformErrorHandler<>(sinkTransform);
     errorHandlers.add(errorHandler);
     return errorHandler;
   }
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/DeadLetter.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/DeadLetter.java
index f09f0be6355..aa4fb4dab31 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/DeadLetter.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/DeadLetter.java
@@ -27,26 +27,26 @@
 @DefaultSchema(AutoValueSchema.class)
 public abstract class DeadLetter implements Serializable {
 
-  /** The failing record, encoded as JSON */
+  /** The failing record, encoded as JSON. */
   public abstract String getHumanReadableRecord();
 
   /**
    * Nullable to account for failing to encode, or if there is no coder for the record at the time
-   * of failure
+   * of failure.
    */
   @Nullable
   @SuppressWarnings("mutable")
   public abstract byte[] getEncodedRecord();
 
-  /** The coder for the record, or null if there is no coder */
+  /** The coder for the record, or null if there is no coder. */
   @Nullable
   public abstract String getCoder();
 
-  /** The exception itself, e.g. IOException. Null if there is a failure without an exception */
+  /** The exception itself, e.g. IOException. Null if there is a failure without an exception. */
   @Nullable
   public abstract String getException();
 
-  /** The description of what was being attempted when the failure occurred */
+  /** The description of what was being attempted when the failure occurred. */
   public abstract String getDescription();
 
   /** The particular sub-transform that failed. */
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/DeadLetterHandler.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/DeadLetterHandler.java
index 51625f50fd5..6d8463de587 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/DeadLetterHandler.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/DeadLetterHandler.java
@@ -31,11 +31,11 @@
 
 public interface DeadLetterHandler {
 
-  DeadLetterHandler throwingHandler = new ThrowingDeadLetterHandler();
+  DeadLetterHandler THROWING_HANDLER = new ThrowingDeadLetterHandler();
 
-  DeadLetterHandler recordingHandler = new RecordingDeadLetterHandler();
+  DeadLetterHandler RECORDING_HANDLER = new RecordingDeadLetterHandler();
 
-  TupleTag deadLetterTag = new TupleTag<>();
+  TupleTag DEAD_LETTER_TAG = new TupleTag<>();
 
    void handle(
       DoFn.ProcessContext c,
@@ -57,7 +57,9 @@ public  void handle(
         String description,
         String failingTransform)
         throws Exception {
-      if (exception != null) throw exception;
+      if (exception != null) {
+        throw exception;
+      }
     }
   }
 
@@ -107,7 +109,7 @@ public  void handle(
         }
       }
       DeadLetter deadLetter = deadLetterBuilder.build();
-      c.output(deadLetterTag, deadLetter);
+      c.output(DEAD_LETTER_TAG, deadLetter);
     }
   }
 }
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/ErrorHandler.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/ErrorHandler.java
index 4d00b9a2f4d..2ece941978c 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/ErrorHandler.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/ErrorHandler.java
@@ -35,7 +35,7 @@
  * Error Handlers must be closed before a pipeline is run to properly pipe error collections to the
  * sink, and the pipeline will be rejected if any handlers aren't closed.
  *
- * @param  The type of the error object. This will usually be a {@link DeadLetter}, but can be
+ * @param  The type of the error object. This will usually be a {@link DeadLetter}, but can be
  *     any type
  * @param  The return type of the sink PTransform.
  *     

Usage of Error Handlers: @@ -57,20 +57,20 @@ * results.apply(SomeOtherTransform); * }

*/ -public interface ErrorHandler extends AutoCloseable { +public interface ErrorHandler extends AutoCloseable { - void addErrorCollection(PCollection errorCollection); + void addErrorCollection(PCollection errorCollection); boolean isClosed(); T getOutput(); - class PTransformErrorHandler implements ErrorHandler { + class PTransformErrorHandler implements ErrorHandler { private static final Logger LOG = LoggerFactory.getLogger(PTransformErrorHandler.class); - private final PTransform, T> sinkTransform; + private final PTransform, T> sinkTransform; - private final List> errorCollections = new ArrayList<>(); + private final List> errorCollections = new ArrayList<>(); @Nullable private T sinkOutput = null; @@ -81,12 +81,12 @@ class PTransformErrorHandler implements ErrorHandler * pipeline.registerErrorHandler to ensure safe pipeline construction */ @Internal - public PTransformErrorHandler(PTransform, T> sinkTransform) { + public PTransformErrorHandler(PTransform, T> sinkTransform) { this.sinkTransform = sinkTransform; } @Override - public void addErrorCollection(PCollection errorCollection) { + public void addErrorCollection(PCollection errorCollection) { errorCollections.add(errorCollection); } @@ -119,10 +119,10 @@ public void close() { } @Internal - class NoOpErrorHandler implements ErrorHandler { + class NoOpErrorHandler implements ErrorHandler { @Override - public void addErrorCollection(PCollection errorCollection) {} + public void addErrorCollection(PCollection errorCollection) {} @Override public boolean isClosed() { diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/package-info.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/package-info.java new file mode 100644 index 00000000000..1cd399be565 --- /dev/null +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/package-info.java @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** Provides utilities for handling errors in Pipelines. */ +@DefaultAnnotation(NonNull.class) +package org.apache.beam.sdk.errorhandling; + +import edu.umd.cs.findbugs.annotations.DefaultAnnotation; +import org.checkerframework.checker.nullness.qual.NonNull; diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/DLQEnabledPTransform.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/DLQEnabledPTransform.java index 36606dc6631..183cc300dad 100644 --- a/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/DLQEnabledPTransform.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/DLQEnabledPTransform.java @@ -37,7 +37,7 @@ public class DLQEnabledPTransform extends PTransform, PColl private ErrorHandler errorHandler = new NoOpErrorHandler<>(); - private DeadLetterHandler deadLetterHandler = DeadLetterHandler.throwingHandler; + private DeadLetterHandler deadLetterHandler = DeadLetterHandler.THROWING_HANDLER; private static final TupleTag RECORDS = new TupleTag<>(); @@ -45,7 +45,7 @@ public DLQEnabledPTransform() {} public DLQEnabledPTransform withDeadLetterQueue(ErrorHandler errorHandler) { this.errorHandler = errorHandler; - this.deadLetterHandler = DeadLetterHandler.recordingHandler; + this.deadLetterHandler = DeadLetterHandler.RECORDING_HANDLER; return this; } @@ -55,7 +55,7 @@ public PCollection expand(PCollection input) { input.apply( "NoOpDoFn", ParDo.of(new NoOpDoFn(deadLetterHandler)) - .withOutputTags(RECORDS, TupleTagList.of(DeadLetterHandler.deadLetterTag))); + .withOutputTags(RECORDS, TupleTagList.of(DeadLetterHandler.DEAD_LETTER_TAG))); Coder deadLetterCoder; @@ -72,7 +72,7 @@ public PCollection expand(PCollection input) { } errorHandler.addErrorCollection( - pCollectionTuple.get(DeadLetterHandler.deadLetterTag).setCoder(deadLetterCoder)); + pCollectionTuple.get(DeadLetterHandler.DEAD_LETTER_TAG).setCoder(deadLetterCoder)); return pCollectionTuple.get(RECORDS).setCoder(BigEndianIntegerCoder.of()); } diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/DeadLetterHandlerTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/DeadLetterHandlerTest.java index e8207a98515..f918152f9fa 100644 --- a/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/DeadLetterHandlerTest.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/DeadLetterHandlerTest.java @@ -17,7 +17,7 @@ */ package org.apache.beam.sdk.errorhandling; -import static org.apache.beam.sdk.errorhandling.DeadLetterHandler.deadLetterTag; +import static org.apache.beam.sdk.errorhandling.DeadLetterHandler.DEAD_LETTER_TAG; import static org.mockito.Mockito.verify; import java.io.IOException; @@ -48,7 +48,7 @@ public class DeadLetterHandlerTest { @Test public void testThrowingHandlerWithException() throws Exception { - DeadLetterHandler handler = DeadLetterHandler.throwingHandler; + DeadLetterHandler handler = DeadLetterHandler.THROWING_HANDLER; thrown.expect(RuntimeException.class); @@ -57,14 +57,14 @@ public void testThrowingHandlerWithException() throws Exception { @Test public void testThrowingHandlerWithNoException() throws Exception { - DeadLetterHandler handler = DeadLetterHandler.throwingHandler; + DeadLetterHandler handler = DeadLetterHandler.THROWING_HANDLER; handler.handle(processContext, new Object(), null, null, "desc", "transform"); } @Test public void testRecordingHandler() throws Exception { - DeadLetterHandler handler = DeadLetterHandler.recordingHandler; + DeadLetterHandler handler = DeadLetterHandler.RECORDING_HANDLER; handler.handle( processContext, 5, BigEndianIntegerCoder.of(), new RuntimeException(), "desc", "transform"); @@ -79,12 +79,12 @@ public void testRecordingHandler() throws Exception { .setFailingTransform("transform") .build(); - verify(processContext).output(deadLetterTag, expected); + verify(processContext).output(DEAD_LETTER_TAG, expected); } @Test public void testNoCoder() throws Exception { - DeadLetterHandler handler = DeadLetterHandler.recordingHandler; + DeadLetterHandler handler = DeadLetterHandler.RECORDING_HANDLER; handler.handle(processContext, 5, null, new RuntimeException(), "desc", "transform"); @@ -96,12 +96,12 @@ public void testNoCoder() throws Exception { .setFailingTransform("transform") .build(); - verify(processContext).output(deadLetterTag, expected); + verify(processContext).output(DEAD_LETTER_TAG, expected); } @Test public void testFailingCoder() throws Exception { - DeadLetterHandler handler = DeadLetterHandler.recordingHandler; + DeadLetterHandler handler = DeadLetterHandler.RECORDING_HANDLER; Coder failingCoder = new Coder() { @@ -136,6 +136,6 @@ public void verifyDeterministic() throws NonDeterministicException {} .setFailingTransform("transform") .build(); - verify(processContext).output(deadLetterTag, expected); + verify(processContext).output(DEAD_LETTER_TAG, expected); } } From a9dae916aa52a757afdb1a606a587e94218a5b27 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Mon, 30 Oct 2023 13:09:44 -0400 Subject: [PATCH 25/80] Fix Checkstyles --- .../org/apache/beam/sdk/errorhandling/DLQEnabledPTransform.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/DLQEnabledPTransform.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/DLQEnabledPTransform.java index 183cc300dad..093008e282d 100644 --- a/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/DLQEnabledPTransform.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/DLQEnabledPTransform.java @@ -32,7 +32,7 @@ import org.apache.beam.sdk.values.TupleTagList; import org.apache.beam.sdk.values.TypeDescriptor; -/** Dummy PTransform that is configurable with a DLQ */ +/** Dummy PTransform that is configurable with a DLQ. */ public class DLQEnabledPTransform extends PTransform, PCollection> { private ErrorHandler errorHandler = new NoOpErrorHandler<>(); From 64dde49fd87151960bcd4b3ada56b4a74c7061ff Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Mon, 30 Oct 2023 13:13:52 -0400 Subject: [PATCH 26/80] make DLH serializable --- .../org/apache/beam/sdk/errorhandling/DeadLetterHandler.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/DeadLetterHandler.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/DeadLetterHandler.java index 6d8463de587..f9d9dfe72ba 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/DeadLetterHandler.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/DeadLetterHandler.java @@ -21,6 +21,7 @@ import com.fasterxml.jackson.databind.ObjectWriter; import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.io.Serializable; import javax.annotation.Nullable; import org.apache.beam.sdk.coders.Coder; import org.apache.beam.sdk.transforms.DoFn; @@ -29,7 +30,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public interface DeadLetterHandler { +public interface DeadLetterHandler extends Serializable { DeadLetterHandler THROWING_HANDLER = new ThrowingDeadLetterHandler(); From c82185d4cfd80e0b16d9183d8da8b89a6326c5d1 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Mon, 30 Oct 2023 13:43:46 -0400 Subject: [PATCH 27/80] rename dead letter to bad record --- .../{DeadLetter.java => BadRecord.java} | 6 +-- ...tterHandler.java => BadRecordHandler.java} | 22 +++++----- .../beam/sdk/errorhandling/ErrorHandler.java | 2 +- ...ansform.java => BRHEnabledPTransform.java} | 40 +++++++++---------- ...lerTest.java => BadRecordHandlerTest.java} | 33 +++++++-------- .../sdk/errorhandling/ErrorHandlerTest.java | 10 ++--- 6 files changed, 57 insertions(+), 56 deletions(-) rename sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/{DeadLetter.java => BadRecord.java} (94%) rename sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/{DeadLetterHandler.java => BadRecordHandler.java} (85%) rename sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/{DLQEnabledPTransform.java => BRHEnabledPTransform.java} (68%) rename sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/{DeadLetterHandlerTest.java => BadRecordHandlerTest.java} (81%) diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/DeadLetter.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/BadRecord.java similarity index 94% rename from sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/DeadLetter.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/BadRecord.java index aa4fb4dab31..b76c4160eb2 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/DeadLetter.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/BadRecord.java @@ -25,7 +25,7 @@ @AutoValue @DefaultSchema(AutoValueSchema.class) -public abstract class DeadLetter implements Serializable { +public abstract class BadRecord implements Serializable { /** The failing record, encoded as JSON. */ public abstract String getHumanReadableRecord(); @@ -53,7 +53,7 @@ public abstract class DeadLetter implements Serializable { public abstract String getFailingTransform(); public static Builder builder() { - return new AutoValue_DeadLetter.Builder(); + return new AutoValue_BadRecord.Builder(); } @AutoValue.Builder @@ -71,6 +71,6 @@ public abstract static class Builder { public abstract Builder setFailingTransform(String failingTransform); - public abstract DeadLetter build(); + public abstract BadRecord build(); } } diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/DeadLetterHandler.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/BadRecordHandler.java similarity index 85% rename from sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/DeadLetterHandler.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/BadRecordHandler.java index f9d9dfe72ba..1be6528eb06 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/DeadLetterHandler.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/BadRecordHandler.java @@ -30,13 +30,13 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public interface DeadLetterHandler extends Serializable { +public interface BadRecordHandler extends Serializable { - DeadLetterHandler THROWING_HANDLER = new ThrowingDeadLetterHandler(); + BadRecordHandler THROWING_HANDLER = new ThrowingBadRecordHandler(); - DeadLetterHandler RECORDING_HANDLER = new RecordingDeadLetterHandler(); + BadRecordHandler RECORDING_HANDLER = new RecordingBadRecordHandler(); - TupleTag DEAD_LETTER_TAG = new TupleTag<>(); + TupleTag BAD_RECORD_TAG = new TupleTag<>(); void handle( DoFn.ProcessContext c, @@ -47,7 +47,7 @@ void handle( String failingTransform) throws Exception; - class ThrowingDeadLetterHandler implements DeadLetterHandler { + class ThrowingBadRecordHandler implements BadRecordHandler { @Override public void handle( @@ -64,9 +64,9 @@ public void handle( } } - class RecordingDeadLetterHandler implements DeadLetterHandler { + class RecordingBadRecordHandler implements BadRecordHandler { - private static final Logger LOG = LoggerFactory.getLogger(RecordingDeadLetterHandler.class); + private static final Logger LOG = LoggerFactory.getLogger(RecordingBadRecordHandler.class); @Override public void handle( @@ -80,8 +80,8 @@ public void handle( Preconditions.checkArgumentNotNull(record); ObjectWriter objectWriter = new ObjectMapper().writer().withDefaultPrettyPrinter(); - DeadLetter.Builder deadLetterBuilder = - DeadLetter.builder() + BadRecord.Builder deadLetterBuilder = + BadRecord.builder() .setHumanReadableRecord(objectWriter.writeValueAsString(record)) .setDescription(description) .setFailingTransform(failingTransform); @@ -109,8 +109,8 @@ public void handle( e); } } - DeadLetter deadLetter = deadLetterBuilder.build(); - c.output(DEAD_LETTER_TAG, deadLetter); + BadRecord badRecord = deadLetterBuilder.build(); + c.output(BAD_RECORD_TAG, badRecord); } } } diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/ErrorHandler.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/ErrorHandler.java index 2ece941978c..82715aeea19 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/ErrorHandler.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/ErrorHandler.java @@ -35,7 +35,7 @@ * Error Handlers must be closed before a pipeline is run to properly pipe error collections to the * sink, and the pipeline will be rejected if any handlers aren't closed. * - * @param The type of the error object. This will usually be a {@link DeadLetter}, but can be + * @param The type of the error object. This will usually be a {@link BadRecord}, but can be * any type * @param The return type of the sink PTransform. *

Usage of Error Handlers: diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/DLQEnabledPTransform.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/BRHEnabledPTransform.java similarity index 68% rename from sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/DLQEnabledPTransform.java rename to sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/BRHEnabledPTransform.java index 093008e282d..61a5e0843ae 100644 --- a/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/DLQEnabledPTransform.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/BRHEnabledPTransform.java @@ -32,20 +32,20 @@ import org.apache.beam.sdk.values.TupleTagList; import org.apache.beam.sdk.values.TypeDescriptor; -/** Dummy PTransform that is configurable with a DLQ. */ -public class DLQEnabledPTransform extends PTransform, PCollection> { +/** Dummy PTransform that is configurable with a Bad Record Handler. */ +public class BRHEnabledPTransform extends PTransform, PCollection> { - private ErrorHandler errorHandler = new NoOpErrorHandler<>(); + private ErrorHandler errorHandler = new NoOpErrorHandler<>(); - private DeadLetterHandler deadLetterHandler = DeadLetterHandler.THROWING_HANDLER; + private BadRecordHandler badRecordHandler = BadRecordHandler.THROWING_HANDLER; private static final TupleTag RECORDS = new TupleTag<>(); - public DLQEnabledPTransform() {} + public BRHEnabledPTransform() {} - public DLQEnabledPTransform withDeadLetterQueue(ErrorHandler errorHandler) { + public BRHEnabledPTransform withBadRecordHandler(ErrorHandler errorHandler) { this.errorHandler = errorHandler; - this.deadLetterHandler = DeadLetterHandler.RECORDING_HANDLER; + this.badRecordHandler = BadRecordHandler.RECORDING_HANDLER; return this; } @@ -54,35 +54,35 @@ public PCollection expand(PCollection input) { PCollectionTuple pCollectionTuple = input.apply( "NoOpDoFn", - ParDo.of(new NoOpDoFn(deadLetterHandler)) - .withOutputTags(RECORDS, TupleTagList.of(DeadLetterHandler.DEAD_LETTER_TAG))); + ParDo.of(new NoOpDoFn(badRecordHandler)) + .withOutputTags(RECORDS, TupleTagList.of(BadRecordHandler.BAD_RECORD_TAG))); - Coder deadLetterCoder; + Coder badRecordCoder; try { SchemaRegistry schemaRegistry = input.getPipeline().getSchemaRegistry(); - deadLetterCoder = + badRecordCoder = SchemaCoder.of( - schemaRegistry.getSchema(DeadLetter.class), - TypeDescriptor.of(DeadLetter.class), - schemaRegistry.getToRowFunction(DeadLetter.class), - schemaRegistry.getFromRowFunction(DeadLetter.class)); + schemaRegistry.getSchema(BadRecord.class), + TypeDescriptor.of(BadRecord.class), + schemaRegistry.getToRowFunction(BadRecord.class), + schemaRegistry.getFromRowFunction(BadRecord.class)); } catch (NoSuchSchemaException e) { throw new RuntimeException(e); } errorHandler.addErrorCollection( - pCollectionTuple.get(DeadLetterHandler.DEAD_LETTER_TAG).setCoder(deadLetterCoder)); + pCollectionTuple.get(BadRecordHandler.BAD_RECORD_TAG).setCoder(badRecordCoder)); return pCollectionTuple.get(RECORDS).setCoder(BigEndianIntegerCoder.of()); } public static class NoOpDoFn extends DoFn { - private DeadLetterHandler deadLetterHandler; + private BadRecordHandler badRecordHandler; - public NoOpDoFn(DeadLetterHandler deadLetterHandler) { - this.deadLetterHandler = deadLetterHandler; + public NoOpDoFn(BadRecordHandler badRecordHandler) { + this.badRecordHandler = badRecordHandler; } @ProcessElement @@ -91,7 +91,7 @@ public void processElement(ProcessContext context) throws Exception { if (element % 2 == 0) { context.output(element); } else { - deadLetterHandler.handle( + badRecordHandler.handle( context, element, BigEndianIntegerCoder.of(), diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/DeadLetterHandlerTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/BadRecordHandlerTest.java similarity index 81% rename from sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/DeadLetterHandlerTest.java rename to sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/BadRecordHandlerTest.java index f918152f9fa..6edbe73ac4b 100644 --- a/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/DeadLetterHandlerTest.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/BadRecordHandlerTest.java @@ -17,7 +17,8 @@ */ package org.apache.beam.sdk.errorhandling; -import static org.apache.beam.sdk.errorhandling.DeadLetterHandler.DEAD_LETTER_TAG; +import static org.apache.beam.sdk.errorhandling.BadRecordHandler.BAD_RECORD_TAG; +import static org.apache.beam.sdk.errorhandling.BadRecordHandler.DEAD_LETTER_TAG; import static org.mockito.Mockito.verify; import java.io.IOException; @@ -38,7 +39,7 @@ import org.mockito.junit.MockitoRule; @RunWith(JUnit4.class) -public class DeadLetterHandlerTest { +public class BadRecordHandlerTest { @Rule public final MockitoRule mockito = MockitoJUnit.rule(); @@ -48,7 +49,7 @@ public class DeadLetterHandlerTest { @Test public void testThrowingHandlerWithException() throws Exception { - DeadLetterHandler handler = DeadLetterHandler.THROWING_HANDLER; + BadRecordHandler handler = BadRecordHandler.THROWING_HANDLER; thrown.expect(RuntimeException.class); @@ -57,20 +58,20 @@ public void testThrowingHandlerWithException() throws Exception { @Test public void testThrowingHandlerWithNoException() throws Exception { - DeadLetterHandler handler = DeadLetterHandler.THROWING_HANDLER; + BadRecordHandler handler = BadRecordHandler.THROWING_HANDLER; handler.handle(processContext, new Object(), null, null, "desc", "transform"); } @Test public void testRecordingHandler() throws Exception { - DeadLetterHandler handler = DeadLetterHandler.RECORDING_HANDLER; + BadRecordHandler handler = BadRecordHandler.RECORDING_HANDLER; handler.handle( processContext, 5, BigEndianIntegerCoder.of(), new RuntimeException(), "desc", "transform"); - DeadLetter expected = - DeadLetter.builder() + BadRecord expected = + BadRecord.builder() .setHumanReadableRecord("5") .setEncodedRecord(new byte[] {0, 0, 0, 5}) .setCoder("BigEndianIntegerCoder") @@ -79,29 +80,29 @@ public void testRecordingHandler() throws Exception { .setFailingTransform("transform") .build(); - verify(processContext).output(DEAD_LETTER_TAG, expected); + verify(processContext).output(BAD_RECORD_TAG, expected); } @Test public void testNoCoder() throws Exception { - DeadLetterHandler handler = DeadLetterHandler.RECORDING_HANDLER; + BadRecordHandler handler = BadRecordHandler.RECORDING_HANDLER; handler.handle(processContext, 5, null, new RuntimeException(), "desc", "transform"); - DeadLetter expected = - DeadLetter.builder() + BadRecord expected = + BadRecord.builder() .setHumanReadableRecord("5") .setException("java.lang.RuntimeException") .setDescription("desc") .setFailingTransform("transform") .build(); - verify(processContext).output(DEAD_LETTER_TAG, expected); + verify(processContext).output(BAD_RECORD_TAG, expected); } @Test public void testFailingCoder() throws Exception { - DeadLetterHandler handler = DeadLetterHandler.RECORDING_HANDLER; + BadRecordHandler handler = BadRecordHandler.RECORDING_HANDLER; Coder failingCoder = new Coder() { @@ -127,8 +128,8 @@ public void verifyDeterministic() throws NonDeterministicException {} handler.handle(processContext, 5, failingCoder, new RuntimeException(), "desc", "transform"); - DeadLetter expected = - DeadLetter.builder() + BadRecord expected = + BadRecord.builder() .setHumanReadableRecord("5") .setCoder(failingCoder.toString()) .setException("java.lang.RuntimeException") @@ -136,6 +137,6 @@ public void verifyDeterministic() throws NonDeterministicException {} .setFailingTransform("transform") .build(); - verify(processContext).output(DEAD_LETTER_TAG, expected); + verify(processContext).output(BAD_RECORD_TAG, expected); } } diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/ErrorHandlerTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/ErrorHandlerTest.java index 2c9763191c4..7394a910d56 100644 --- a/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/ErrorHandlerTest.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/ErrorHandlerTest.java @@ -55,20 +55,20 @@ public void testBadErrorHandlerUsage() { @Test @Category(NeedsRunner.class) - public void testDLQEnabledPTransform() { + public void testBRHEnabledPTransform() { PCollection record = pipeline.apply(Create.of(1, 2, 3, 4)); - record.apply(new DLQEnabledPTransform()); + record.apply(new BRHEnabledPTransform()); pipeline.run(); } @Test @Category(NeedsRunner.class) - public void testErrorHandlerWithDLQTransform() throws Exception { + public void testErrorHandlerWithBRHTransform() throws Exception { PCollection record = pipeline.apply(Create.of(1, 2, 3, 4)); - try (ErrorHandler> eh = + try (ErrorHandler> eh = pipeline.registerErrorHandler(new DummySinkTransform<>())) { - record.apply(new DLQEnabledPTransform().withDeadLetterQueue(eh)); + record.apply(new BRHEnabledPTransform().withBadRecordHandler(eh)); } pipeline.run(); From 78d45a80d239261ec276eadba6bdcb6ae8892ef8 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Tue, 31 Oct 2023 08:20:03 -0500 Subject: [PATCH 28/80] make DLH serializable --- .../org/apache/beam/sdk/errorhandling/BadRecordHandlerTest.java | 1 - 1 file changed, 1 deletion(-) diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/BadRecordHandlerTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/BadRecordHandlerTest.java index 6edbe73ac4b..a98e1d8930e 100644 --- a/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/BadRecordHandlerTest.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/BadRecordHandlerTest.java @@ -18,7 +18,6 @@ package org.apache.beam.sdk.errorhandling; import static org.apache.beam.sdk.errorhandling.BadRecordHandler.BAD_RECORD_TAG; -import static org.apache.beam.sdk.errorhandling.BadRecordHandler.DEAD_LETTER_TAG; import static org.mockito.Mockito.verify; import java.io.IOException; From 6c365497f19bd7ebdd0c363f003a303f6a4c9e27 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Tue, 31 Oct 2023 11:34:00 -0500 Subject: [PATCH 29/80] Change bad record router name, and use multioutputreceiver instead of process context --- ...ecordHandler.java => BadRecordRouter.java} | 40 ++++++++-------- .../errorhandling/BRHEnabledPTransform.java | 25 +++++----- ...dlerTest.java => BadRecordRouterTest.java} | 46 +++++++++++-------- 3 files changed, 60 insertions(+), 51 deletions(-) rename sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/{BadRecordHandler.java => BadRecordRouter.java} (75%) rename sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/{BadRecordHandlerTest.java => BadRecordRouterTest.java} (69%) diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/BadRecordHandler.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/BadRecordRouter.java similarity index 75% rename from sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/BadRecordHandler.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/BadRecordRouter.java index 1be6528eb06..84218e29db8 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/BadRecordHandler.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/BadRecordRouter.java @@ -24,22 +24,22 @@ import java.io.Serializable; import javax.annotation.Nullable; import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.transforms.DoFn; +import org.apache.beam.sdk.transforms.DoFn.MultiOutputReceiver; import org.apache.beam.sdk.util.Preconditions; import org.apache.beam.sdk.values.TupleTag; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public interface BadRecordHandler extends Serializable { +public interface BadRecordRouter extends Serializable { - BadRecordHandler THROWING_HANDLER = new ThrowingBadRecordHandler(); + BadRecordRouter THROWING_ROUTER = new ThrowingBadRecordRouter(); - BadRecordHandler RECORDING_HANDLER = new RecordingBadRecordHandler(); + BadRecordRouter RECORDING_ROUTER = new RecordingBadRecordRouter(); TupleTag BAD_RECORD_TAG = new TupleTag<>(); - void handle( - DoFn.ProcessContext c, + void route( + MultiOutputReceiver outputReceiver, T record, @Nullable Coder coder, @Nullable Exception exception, @@ -47,11 +47,11 @@ void handle( String failingTransform) throws Exception; - class ThrowingBadRecordHandler implements BadRecordHandler { + class ThrowingBadRecordRouter implements BadRecordRouter { @Override - public void handle( - DoFn.ProcessContext c, + public void route( + MultiOutputReceiver outputReceiver, T record, @Nullable Coder coder, @Nullable Exception exception, @@ -64,13 +64,13 @@ public void handle( } } - class RecordingBadRecordHandler implements BadRecordHandler { + class RecordingBadRecordRouter implements BadRecordRouter { - private static final Logger LOG = LoggerFactory.getLogger(RecordingBadRecordHandler.class); + private static final Logger LOG = LoggerFactory.getLogger(RecordingBadRecordRouter.class); @Override - public void handle( - DoFn.ProcessContext c, + public void route( + MultiOutputReceiver outputReceiver, T record, @Nullable Coder coder, @Nullable Exception exception, @@ -80,7 +80,7 @@ public void handle( Preconditions.checkArgumentNotNull(record); ObjectWriter objectWriter = new ObjectMapper().writer().withDefaultPrettyPrinter(); - BadRecord.Builder deadLetterBuilder = + BadRecord.Builder badRecordBuilder = BadRecord.builder() .setHumanReadableRecord(objectWriter.writeValueAsString(record)) .setDescription(description) @@ -89,28 +89,28 @@ public void handle( // Its possible for us to want to handle an error scenario where no actual exception objet // exists if (exception != null) { - deadLetterBuilder.setException(exception.toString()); + badRecordBuilder.setException(exception.toString()); } // We will sometimes not have a coder for a failing record, for example if it has already been // modified within the dofn. if (coder != null) { - deadLetterBuilder.setCoder(coder.toString()); + badRecordBuilder.setCoder(coder.toString()); try { ByteArrayOutputStream stream = new ByteArrayOutputStream(); coder.encode(record, stream); byte[] bytes = stream.toByteArray(); - deadLetterBuilder.setEncodedRecord(bytes); + badRecordBuilder.setEncodedRecord(bytes); } catch (IOException e) { LOG.error( "Unable to encode failing record using provided coder." - + " DeadLetter will be published without encoded bytes", + + " BadRecord will be published without encoded bytes", e); } } - BadRecord badRecord = deadLetterBuilder.build(); - c.output(BAD_RECORD_TAG, badRecord); + BadRecord badRecord = badRecordBuilder.build(); + outputReceiver.get(BAD_RECORD_TAG).output(badRecord); } } } diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/BRHEnabledPTransform.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/BRHEnabledPTransform.java index 61a5e0843ae..4b6932ed3f4 100644 --- a/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/BRHEnabledPTransform.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/BRHEnabledPTransform.java @@ -37,7 +37,7 @@ public class BRHEnabledPTransform extends PTransform, PColl private ErrorHandler errorHandler = new NoOpErrorHandler<>(); - private BadRecordHandler badRecordHandler = BadRecordHandler.THROWING_HANDLER; + private BadRecordRouter badRecordRouter = BadRecordRouter.THROWING_ROUTER; private static final TupleTag RECORDS = new TupleTag<>(); @@ -45,7 +45,7 @@ public BRHEnabledPTransform() {} public BRHEnabledPTransform withBadRecordHandler(ErrorHandler errorHandler) { this.errorHandler = errorHandler; - this.badRecordHandler = BadRecordHandler.RECORDING_HANDLER; + this.badRecordRouter = BadRecordRouter.RECORDING_ROUTER; return this; } @@ -54,8 +54,8 @@ public PCollection expand(PCollection input) { PCollectionTuple pCollectionTuple = input.apply( "NoOpDoFn", - ParDo.of(new NoOpDoFn(badRecordHandler)) - .withOutputTags(RECORDS, TupleTagList.of(BadRecordHandler.BAD_RECORD_TAG))); + ParDo.of(new NoOpDoFn(badRecordRouter)) + .withOutputTags(RECORDS, TupleTagList.of(BadRecordRouter.BAD_RECORD_TAG))); Coder badRecordCoder; @@ -72,27 +72,26 @@ public PCollection expand(PCollection input) { } errorHandler.addErrorCollection( - pCollectionTuple.get(BadRecordHandler.BAD_RECORD_TAG).setCoder(badRecordCoder)); + pCollectionTuple.get(BadRecordRouter.BAD_RECORD_TAG).setCoder(badRecordCoder)); return pCollectionTuple.get(RECORDS).setCoder(BigEndianIntegerCoder.of()); } public static class NoOpDoFn extends DoFn { - private BadRecordHandler badRecordHandler; + private BadRecordRouter badRecordRouter; - public NoOpDoFn(BadRecordHandler badRecordHandler) { - this.badRecordHandler = badRecordHandler; + public NoOpDoFn(BadRecordRouter badRecordRouter) { + this.badRecordRouter = badRecordRouter; } @ProcessElement - public void processElement(ProcessContext context) throws Exception { - Integer element = context.element(); + public void processElement(@Element Integer element, MultiOutputReceiver receiver) throws Exception { if (element % 2 == 0) { - context.output(element); + receiver.get(RECORDS).output(element); } else { - badRecordHandler.handle( - context, + badRecordRouter.route( + receiver, element, BigEndianIntegerCoder.of(), new RuntimeException(), diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/BadRecordHandlerTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/BadRecordRouterTest.java similarity index 69% rename from sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/BadRecordHandlerTest.java rename to sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/BadRecordRouterTest.java index a98e1d8930e..093d593a044 100644 --- a/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/BadRecordHandlerTest.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/BadRecordRouterTest.java @@ -17,8 +17,9 @@ */ package org.apache.beam.sdk.errorhandling; -import static org.apache.beam.sdk.errorhandling.BadRecordHandler.BAD_RECORD_TAG; +import static org.apache.beam.sdk.errorhandling.BadRecordRouter.BAD_RECORD_TAG; import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; import java.io.IOException; import java.io.InputStream; @@ -27,7 +28,8 @@ import org.apache.beam.sdk.coders.BigEndianIntegerCoder; import org.apache.beam.sdk.coders.Coder; import org.apache.beam.sdk.coders.CoderException; -import org.apache.beam.sdk.transforms.DoFn.ProcessContext; +import org.apache.beam.sdk.transforms.DoFn.MultiOutputReceiver; +import org.apache.beam.sdk.transforms.DoFn.OutputReceiver; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; @@ -38,36 +40,40 @@ import org.mockito.junit.MockitoRule; @RunWith(JUnit4.class) -public class BadRecordHandlerTest { +public class BadRecordRouterTest { @Rule public final MockitoRule mockito = MockitoJUnit.rule(); @Rule public ExpectedException thrown = ExpectedException.none(); - @Mock private ProcessContext processContext; + @Mock private MultiOutputReceiver outputReceiver; + + @Mock private OutputReceiver badRecordOutputReceiver; @Test public void testThrowingHandlerWithException() throws Exception { - BadRecordHandler handler = BadRecordHandler.THROWING_HANDLER; + BadRecordRouter handler = BadRecordRouter.THROWING_ROUTER; thrown.expect(RuntimeException.class); - handler.handle(processContext, new Object(), null, new RuntimeException(), "desc", "transform"); + handler.route(outputReceiver, new Object(), null, new RuntimeException(), "desc", "transform"); } @Test public void testThrowingHandlerWithNoException() throws Exception { - BadRecordHandler handler = BadRecordHandler.THROWING_HANDLER; + BadRecordRouter handler = BadRecordRouter.THROWING_ROUTER; - handler.handle(processContext, new Object(), null, null, "desc", "transform"); + handler.route(outputReceiver, new Object(), null, null, "desc", "transform"); } @Test public void testRecordingHandler() throws Exception { - BadRecordHandler handler = BadRecordHandler.RECORDING_HANDLER; + when(outputReceiver.get(BAD_RECORD_TAG)).thenReturn(badRecordOutputReceiver); + + BadRecordRouter handler = BadRecordRouter.RECORDING_ROUTER; - handler.handle( - processContext, 5, BigEndianIntegerCoder.of(), new RuntimeException(), "desc", "transform"); + handler.route( + outputReceiver, 5, BigEndianIntegerCoder.of(), new RuntimeException(), "desc", "transform"); BadRecord expected = BadRecord.builder() @@ -79,14 +85,16 @@ public void testRecordingHandler() throws Exception { .setFailingTransform("transform") .build(); - verify(processContext).output(BAD_RECORD_TAG, expected); + verify(badRecordOutputReceiver).output(expected); } @Test public void testNoCoder() throws Exception { - BadRecordHandler handler = BadRecordHandler.RECORDING_HANDLER; + when(outputReceiver.get(BAD_RECORD_TAG)).thenReturn(badRecordOutputReceiver); - handler.handle(processContext, 5, null, new RuntimeException(), "desc", "transform"); + BadRecordRouter handler = BadRecordRouter.RECORDING_ROUTER; + + handler.route(outputReceiver, 5, null, new RuntimeException(), "desc", "transform"); BadRecord expected = BadRecord.builder() @@ -96,12 +104,14 @@ public void testNoCoder() throws Exception { .setFailingTransform("transform") .build(); - verify(processContext).output(BAD_RECORD_TAG, expected); + verify(badRecordOutputReceiver).output(expected); } @Test public void testFailingCoder() throws Exception { - BadRecordHandler handler = BadRecordHandler.RECORDING_HANDLER; + when(outputReceiver.get(BAD_RECORD_TAG)).thenReturn(badRecordOutputReceiver); + + BadRecordRouter handler = BadRecordRouter.RECORDING_ROUTER; Coder failingCoder = new Coder() { @@ -125,7 +135,7 @@ public List> getCoderArguments() { public void verifyDeterministic() throws NonDeterministicException {} }; - handler.handle(processContext, 5, failingCoder, new RuntimeException(), "desc", "transform"); + handler.route(outputReceiver, 5, failingCoder, new RuntimeException(), "desc", "transform"); BadRecord expected = BadRecord.builder() @@ -136,6 +146,6 @@ public void verifyDeterministic() throws NonDeterministicException {} .setFailingTransform("transform") .build(); - verify(processContext).output(BAD_RECORD_TAG, expected); + verify(badRecordOutputReceiver).output(expected); } } From 44036be956729b7433be593206358e94887eaf8b Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Tue, 31 Oct 2023 14:39:14 -0500 Subject: [PATCH 30/80] Refactor BadRecord to be nested --- .../beam/sdk/errorhandling/BadRecord.java | 101 ++++++++++++------ .../sdk/errorhandling/BadRecordRouter.java | 23 ++-- .../beam/sdk/errorhandling/ErrorHandler.java | 4 +- .../errorhandling/BRHEnabledPTransform.java | 3 +- .../errorhandling/BadRecordRouterTest.java | 47 +++++--- 5 files changed, 121 insertions(+), 57 deletions(-) diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/BadRecord.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/BadRecord.java index b76c4160eb2..fe19b19629c 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/BadRecord.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/BadRecord.java @@ -27,30 +27,11 @@ @DefaultSchema(AutoValueSchema.class) public abstract class BadRecord implements Serializable { - /** The failing record, encoded as JSON. */ - public abstract String getHumanReadableRecord(); + /** Information about the record that failed */ + public abstract Record getRecord(); - /** - * Nullable to account for failing to encode, or if there is no coder for the record at the time - * of failure. - */ - @Nullable - @SuppressWarnings("mutable") - public abstract byte[] getEncodedRecord(); - - /** The coder for the record, or null if there is no coder. */ - @Nullable - public abstract String getCoder(); - - /** The exception itself, e.g. IOException. Null if there is a failure without an exception. */ - @Nullable - public abstract String getException(); - - /** The description of what was being attempted when the failure occurred. */ - public abstract String getDescription(); - - /** The particular sub-transform that failed. */ - public abstract String getFailingTransform(); + /** Information about why the record failed */ + public abstract Failure getFailure(); public static Builder builder() { return new AutoValue_BadRecord.Builder(); @@ -58,19 +39,79 @@ public static Builder builder() { @AutoValue.Builder public abstract static class Builder { - public abstract Builder setHumanReadableRecord(String humanReadableRecord); + public abstract Builder setRecord(Record record); + + public abstract Builder setFailure(Failure error); + + public abstract BadRecord build(); + } + + @AutoValue + @DefaultSchema(AutoValueSchema.class) + public abstract static class Record implements Serializable { + + /** The failing record, encoded as JSON. */ + public abstract String getHumanReadableRecord(); + + /** + * Nullable to account for failing to encode, or if there is no coder for the record at the time + * of failure. + */ + @Nullable @SuppressWarnings("mutable") - public abstract Builder setEncodedRecord(@Nullable byte[] encodedRecord); + public abstract byte[] getEncodedRecord(); - public abstract Builder setCoder(@Nullable String coder); + /** The coder for the record, or null if there is no coder. */ + @Nullable + public abstract String getCoder(); - public abstract Builder setException(@Nullable String exception); + public static Builder builder() { + return new AutoValue_BadRecord_Record.Builder(); + } - public abstract Builder setDescription(String description); + @AutoValue.Builder + public abstract static class Builder { - public abstract Builder setFailingTransform(String failingTransform); + public abstract Builder setHumanReadableRecord(String humanReadableRecord); - public abstract BadRecord build(); + @SuppressWarnings("mutable") + public abstract Builder setEncodedRecord(@Nullable byte[] encodedRecord); + + public abstract Builder setCoder(@Nullable String coder); + + public abstract Record build(); + } + } + + @AutoValue + @DefaultSchema(AutoValueSchema.class) + public abstract static class Failure implements Serializable { + + /** The exception itself, e.g. IOException. Null if there is a failure without an exception. */ + @Nullable + public abstract String getException(); + + /** The description of what was being attempted when the failure occurred. */ + public abstract String getDescription(); + + /** The particular sub-transform that failed. */ + public abstract String getFailingTransform(); + + public static Builder builder() { + return new AutoValue_BadRecord_Failure.Builder(); + } + + @AutoValue.Builder + public abstract static class Builder { + + public abstract Builder setException(@Nullable String exception); + + public abstract Builder setDescription(String description); + + public abstract Builder setFailingTransform(String failingTransform); + + public abstract Failure build(); + } } } diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/BadRecordRouter.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/BadRecordRouter.java index 84218e29db8..3c178273012 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/BadRecordRouter.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/BadRecordRouter.java @@ -24,6 +24,8 @@ import java.io.Serializable; import javax.annotation.Nullable; import org.apache.beam.sdk.coders.Coder; +import org.apache.beam.sdk.errorhandling.BadRecord.Failure; +import org.apache.beam.sdk.errorhandling.BadRecord.Record; import org.apache.beam.sdk.transforms.DoFn.MultiOutputReceiver; import org.apache.beam.sdk.util.Preconditions; import org.apache.beam.sdk.values.TupleTag; @@ -80,28 +82,27 @@ public void route( Preconditions.checkArgumentNotNull(record); ObjectWriter objectWriter = new ObjectMapper().writer().withDefaultPrettyPrinter(); - BadRecord.Builder badRecordBuilder = - BadRecord.builder() - .setHumanReadableRecord(objectWriter.writeValueAsString(record)) - .setDescription(description) - .setFailingTransform(failingTransform); + BadRecord.Record.Builder recordBuilder = + Record.builder().setHumanReadableRecord(objectWriter.writeValueAsString(record)); + BadRecord.Failure.Builder failureBuilder = + Failure.builder().setDescription(description).setFailingTransform(failingTransform); // Its possible for us to want to handle an error scenario where no actual exception objet // exists if (exception != null) { - badRecordBuilder.setException(exception.toString()); + failureBuilder.setException(exception.toString()); } // We will sometimes not have a coder for a failing record, for example if it has already been // modified within the dofn. if (coder != null) { - badRecordBuilder.setCoder(coder.toString()); + recordBuilder.setCoder(coder.toString()); try { ByteArrayOutputStream stream = new ByteArrayOutputStream(); coder.encode(record, stream); byte[] bytes = stream.toByteArray(); - badRecordBuilder.setEncodedRecord(bytes); + recordBuilder.setEncodedRecord(bytes); } catch (IOException e) { LOG.error( "Unable to encode failing record using provided coder." @@ -109,7 +110,11 @@ public void route( e); } } - BadRecord badRecord = badRecordBuilder.build(); + BadRecord badRecord = + BadRecord.builder() + .setRecord(recordBuilder.build()) + .setFailure(failureBuilder.build()) + .build(); outputReceiver.get(BAD_RECORD_TAG).output(badRecord); } } diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/ErrorHandler.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/ErrorHandler.java index 82715aeea19..d5752672bf1 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/ErrorHandler.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/ErrorHandler.java @@ -35,8 +35,8 @@ * Error Handlers must be closed before a pipeline is run to properly pipe error collections to the * sink, and the pipeline will be rejected if any handlers aren't closed. * - * @param The type of the error object. This will usually be a {@link BadRecord}, but can be - * any type + * @param The type of the error object. This will usually be a {@link BadRecord}, but can be any + * type * @param The return type of the sink PTransform. *

Usage of Error Handlers: *

Simple usage with one DLQ diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/BRHEnabledPTransform.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/BRHEnabledPTransform.java index 4b6932ed3f4..7c2fee4523b 100644 --- a/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/BRHEnabledPTransform.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/BRHEnabledPTransform.java @@ -86,7 +86,8 @@ public NoOpDoFn(BadRecordRouter badRecordRouter) { } @ProcessElement - public void processElement(@Element Integer element, MultiOutputReceiver receiver) throws Exception { + public void processElement(@Element Integer element, MultiOutputReceiver receiver) + throws Exception { if (element % 2 == 0) { receiver.get(RECORDS).output(element); } else { diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/BadRecordRouterTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/BadRecordRouterTest.java index 093d593a044..e6a686ec8a0 100644 --- a/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/BadRecordRouterTest.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/BadRecordRouterTest.java @@ -28,6 +28,8 @@ import org.apache.beam.sdk.coders.BigEndianIntegerCoder; import org.apache.beam.sdk.coders.Coder; import org.apache.beam.sdk.coders.CoderException; +import org.apache.beam.sdk.errorhandling.BadRecord.Failure; +import org.apache.beam.sdk.errorhandling.BadRecord.Record; import org.apache.beam.sdk.transforms.DoFn.MultiOutputReceiver; import org.apache.beam.sdk.transforms.DoFn.OutputReceiver; import org.junit.Rule; @@ -77,12 +79,18 @@ public void testRecordingHandler() throws Exception { BadRecord expected = BadRecord.builder() - .setHumanReadableRecord("5") - .setEncodedRecord(new byte[] {0, 0, 0, 5}) - .setCoder("BigEndianIntegerCoder") - .setException("java.lang.RuntimeException") - .setDescription("desc") - .setFailingTransform("transform") + .setRecord( + Record.builder() + .setHumanReadableRecord("5") + .setEncodedRecord(new byte[] {0, 0, 0, 5}) + .setCoder("BigEndianIntegerCoder") + .build()) + .setFailure( + Failure.builder() + .setException("java.lang.RuntimeException") + .setDescription("desc") + .setFailingTransform("transform") + .build()) .build(); verify(badRecordOutputReceiver).output(expected); @@ -98,10 +106,13 @@ public void testNoCoder() throws Exception { BadRecord expected = BadRecord.builder() - .setHumanReadableRecord("5") - .setException("java.lang.RuntimeException") - .setDescription("desc") - .setFailingTransform("transform") + .setRecord(Record.builder().setHumanReadableRecord("5").build()) + .setFailure( + Failure.builder() + .setException("java.lang.RuntimeException") + .setDescription("desc") + .setFailingTransform("transform") + .build()) .build(); verify(badRecordOutputReceiver).output(expected); @@ -139,11 +150,17 @@ public void verifyDeterministic() throws NonDeterministicException {} BadRecord expected = BadRecord.builder() - .setHumanReadableRecord("5") - .setCoder(failingCoder.toString()) - .setException("java.lang.RuntimeException") - .setDescription("desc") - .setFailingTransform("transform") + .setRecord( + Record.builder() + .setHumanReadableRecord("5") + .setCoder(failingCoder.toString()) + .build()) + .setFailure( + Failure.builder() + .setException("java.lang.RuntimeException") + .setDescription("desc") + .setFailingTransform("transform") + .build()) .build(); verify(badRecordOutputReceiver).output(expected); From 06ca1665cfe74a7e176c926a7d51fd4be7f8ba15 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Tue, 31 Oct 2023 15:13:35 -0500 Subject: [PATCH 31/80] clean up checkstyle --- .../java/org/apache/beam/sdk/errorhandling/BadRecord.java | 4 ++-- .../apache/beam/sdk/errorhandling/BRHEnabledPTransform.java | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/BadRecord.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/BadRecord.java index fe19b19629c..7ea1e59cc16 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/BadRecord.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/BadRecord.java @@ -27,10 +27,10 @@ @DefaultSchema(AutoValueSchema.class) public abstract class BadRecord implements Serializable { - /** Information about the record that failed */ + /** Information about the record that failed. */ public abstract Record getRecord(); - /** Information about why the record failed */ + /** Information about why the record failed. */ public abstract Failure getFailure(); public static Builder builder() { diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/BRHEnabledPTransform.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/BRHEnabledPTransform.java index 7c2fee4523b..65fa95eb6d4 100644 --- a/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/BRHEnabledPTransform.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/BRHEnabledPTransform.java @@ -54,7 +54,7 @@ public PCollection expand(PCollection input) { PCollectionTuple pCollectionTuple = input.apply( "NoOpDoFn", - ParDo.of(new NoOpDoFn(badRecordRouter)) + ParDo.of(new OddIsBad(badRecordRouter)) .withOutputTags(RECORDS, TupleTagList.of(BadRecordRouter.BAD_RECORD_TAG))); Coder badRecordCoder; @@ -77,11 +77,11 @@ public PCollection expand(PCollection input) { return pCollectionTuple.get(RECORDS).setCoder(BigEndianIntegerCoder.of()); } - public static class NoOpDoFn extends DoFn { + public static class OddIsBad extends DoFn { private BadRecordRouter badRecordRouter; - public NoOpDoFn(BadRecordRouter badRecordRouter) { + public OddIsBad(BadRecordRouter badRecordRouter) { this.badRecordRouter = badRecordRouter; } From a26d605d8933351fa00e6367a89a26631873fdd1 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Tue, 31 Oct 2023 16:04:15 -0500 Subject: [PATCH 32/80] Update error handler test --- .../org/apache/beam/sdk/errorhandling/ErrorHandlerTest.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/ErrorHandlerTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/ErrorHandlerTest.java index 7394a910d56..25d8d7524a4 100644 --- a/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/ErrorHandlerTest.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/ErrorHandlerTest.java @@ -59,6 +59,8 @@ public void testBRHEnabledPTransform() { PCollection record = pipeline.apply(Create.of(1, 2, 3, 4)); record.apply(new BRHEnabledPTransform()); + thrown.expect(RuntimeException.class); + pipeline.run(); } From 5a6e8d0d7caec91661f914702e6176fede24430a Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Wed, 1 Nov 2023 11:38:03 -0500 Subject: [PATCH 33/80] Add metrics for counting error records, and for measuring feature usage --- .../construction/PTransformTranslation.java | 6 ++ .../beam/sdk/annotations/FeatureMetrics.java | 37 ++++++++++ .../sdk/errorhandling/BadRecordRouter.java | 18 ++--- .../beam/sdk/errorhandling/ErrorHandler.java | 67 ++++++++++++++----- 4 files changed, 103 insertions(+), 25 deletions(-) create mode 100644 sdks/java/core/src/main/java/org/apache/beam/sdk/annotations/FeatureMetrics.java diff --git a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/PTransformTranslation.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/PTransformTranslation.java index 8f415e718e9..11deab1c1f2 100644 --- a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/PTransformTranslation.java +++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/PTransformTranslation.java @@ -21,6 +21,8 @@ import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkState; import java.io.IOException; +import java.lang.annotation.Annotation; +import java.nio.charset.StandardCharsets; import java.util.Collection; import java.util.Collections; import java.util.Comparator; @@ -502,6 +504,10 @@ public RunnerApi.PTransform translate( SchemaTranslation.schemaToProto(configRow.getSchema(), true).toByteArray())); } + for (Annotation annotation : appliedPTransform.getTransform().getClass().getDeclaredAnnotations()) { + transformBuilder.putAnnotations(annotation.annotationType().getName(),ByteString.copyFrom(annotation.toString(),StandardCharsets.UTF_8)); + } + return transformBuilder.build(); } } diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/annotations/FeatureMetrics.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/annotations/FeatureMetrics.java new file mode 100644 index 00000000000..fe3d1f3ffca --- /dev/null +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/annotations/FeatureMetrics.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.annotations; + +import java.lang.annotation.Documented; +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +/** Annotations used to mark a PTransform as using a specific feature. */ +public class FeatureMetrics { + + /** Signifies usage of the Error Handler */ + @Retention(RetentionPolicy.RUNTIME) + @Target({ + ElementType.TYPE + }) + @Documented + public @interface ErrorHandler {} + +} diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/BadRecordRouter.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/BadRecordRouter.java index 3c178273012..fc78b1b21fc 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/BadRecordRouter.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/BadRecordRouter.java @@ -40,10 +40,10 @@ public interface BadRecordRouter extends Serializable { TupleTag BAD_RECORD_TAG = new TupleTag<>(); - void route( + void route( MultiOutputReceiver outputReceiver, - T record, - @Nullable Coder coder, + RecordT record, + @Nullable Coder coder, @Nullable Exception exception, String description, String failingTransform) @@ -52,10 +52,10 @@ void route( class ThrowingBadRecordRouter implements BadRecordRouter { @Override - public void route( + public void route( MultiOutputReceiver outputReceiver, - T record, - @Nullable Coder coder, + RecordT record, + @Nullable Coder coder, @Nullable Exception exception, String description, String failingTransform) @@ -71,10 +71,10 @@ class RecordingBadRecordRouter implements BadRecordRouter { private static final Logger LOG = LoggerFactory.getLogger(RecordingBadRecordRouter.class); @Override - public void route( + public void route( MultiOutputReceiver outputReceiver, - T record, - @Nullable Coder coder, + RecordT record, + @Nullable Coder coder, @Nullable Exception exception, String description, String failingTransform) diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/ErrorHandler.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/ErrorHandler.java index d5752672bf1..a79a6c28d3b 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/ErrorHandler.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/ErrorHandler.java @@ -20,9 +20,14 @@ import java.util.ArrayList; import java.util.List; import javax.annotation.Nullable; +import org.apache.beam.sdk.annotations.FeatureMetrics; import org.apache.beam.sdk.annotations.Internal; +import org.apache.beam.sdk.metrics.Counter; +import org.apache.beam.sdk.metrics.Metrics; +import org.apache.beam.sdk.transforms.DoFn; import org.apache.beam.sdk.transforms.Flatten; import org.apache.beam.sdk.transforms.PTransform; +import org.apache.beam.sdk.transforms.ParDo; import org.apache.beam.sdk.util.Preconditions; import org.apache.beam.sdk.values.PCollection; import org.apache.beam.sdk.values.PCollectionList; @@ -35,9 +40,9 @@ * Error Handlers must be closed before a pipeline is run to properly pipe error collections to the * sink, and the pipeline will be rejected if any handlers aren't closed. * - * @param The type of the error object. This will usually be a {@link BadRecord}, but can be any + * @param The type of the error object. This will usually be a {@link BadRecord}, but can be any * type - * @param The return type of the sink PTransform. + * @param The return type of the sink PTransform. *

Usage of Error Handlers: *

Simple usage with one DLQ *

{@code
@@ -57,22 +62,22 @@
  * results.apply(SomeOtherTransform);
  * }
*/ -public interface ErrorHandler extends AutoCloseable { +public interface ErrorHandler extends AutoCloseable { - void addErrorCollection(PCollection errorCollection); + void addErrorCollection(PCollection errorCollection); boolean isClosed(); - T getOutput(); + OutputT getOutput(); - class PTransformErrorHandler implements ErrorHandler { + class PTransformErrorHandler implements ErrorHandler { private static final Logger LOG = LoggerFactory.getLogger(PTransformErrorHandler.class); - private final PTransform, T> sinkTransform; + private final PTransform, OutputT> sinkTransform; - private final List> errorCollections = new ArrayList<>(); + private final List> errorCollections = new ArrayList<>(); - @Nullable private T sinkOutput = null; + @Nullable private OutputT sinkOutput = null; private boolean closed = false; @@ -81,12 +86,12 @@ class PTransformErrorHandler implements ErrorHandler * pipeline.registerErrorHandler to ensure safe pipeline construction */ @Internal - public PTransformErrorHandler(PTransform, T> sinkTransform) { + public PTransformErrorHandler(PTransform, OutputT> sinkTransform) { this.sinkTransform = sinkTransform; } @Override - public void addErrorCollection(PCollection errorCollection) { + public void addErrorCollection(PCollection errorCollection) { errorCollections.add(errorCollection); } @@ -96,7 +101,7 @@ public boolean isClosed() { } @Override - public T getOutput() { + public OutputT getOutput() { if (!this.isClosed()) { throw new IllegalStateException( "ErrorHandler must be finalized before the output can be returned"); @@ -113,16 +118,46 @@ public void close() { LOG.warn("Empty list of error pcollections passed to ErrorHandler."); return; } + LOG.debug("{} error collections are being sent to {}", errorCollections.size(), sinkTransform.getName()); sinkOutput = - PCollectionList.of(errorCollections).apply(Flatten.pCollections()).apply(sinkTransform); + PCollectionList.of(errorCollections).apply(Flatten.pCollections()).apply(new WriteErrorMetrics(sinkTransform)).apply(sinkTransform); + } + + @FeatureMetrics.ErrorHandler + public class WriteErrorMetrics extends PTransform,PCollection> { + + private final Counter errorCounter; + + public WriteErrorMetrics(PTransform sinkTransform){ + errorCounter = Metrics.counter("ErrorMetrics", sinkTransform.getName() + "-input"); + } + + @Override + public PCollection expand(PCollection input) { + return input.apply(ParDo.of(new CountErrors(errorCounter))); + } + + public class CountErrors extends DoFn { + + private final Counter errorCounter; + + public CountErrors(Counter errorCounter){ + this.errorCounter = errorCounter; + } + @ProcessElement + public void processElement(@Element ErrorT error, OutputReceiver receiver){ + errorCounter.inc(); + receiver.output(error); + } + } } } @Internal - class NoOpErrorHandler implements ErrorHandler { + class NoOpErrorHandler implements ErrorHandler { @Override - public void addErrorCollection(PCollection errorCollection) {} + public void addErrorCollection(PCollection errorCollection) {} @Override public boolean isClosed() { @@ -130,7 +165,7 @@ public boolean isClosed() { } @Override - public T getOutput() { + public OutputT getOutput() { throw new IllegalArgumentException("No Op handler has no output"); } From 70c89917de6acc290bf90b4b96b3eb1a875ce3bd Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Wed, 1 Nov 2023 11:39:41 -0500 Subject: [PATCH 34/80] apply spotless --- .../construction/PTransformTranslation.java | 7 +++-- .../beam/sdk/annotations/FeatureMetrics.java | 5 +--- .../beam/sdk/errorhandling/ErrorHandler.java | 26 ++++++++++++------- 3 files changed, 23 insertions(+), 15 deletions(-) diff --git a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/PTransformTranslation.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/PTransformTranslation.java index 11deab1c1f2..8061146c84d 100644 --- a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/PTransformTranslation.java +++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/PTransformTranslation.java @@ -504,8 +504,11 @@ public RunnerApi.PTransform translate( SchemaTranslation.schemaToProto(configRow.getSchema(), true).toByteArray())); } - for (Annotation annotation : appliedPTransform.getTransform().getClass().getDeclaredAnnotations()) { - transformBuilder.putAnnotations(annotation.annotationType().getName(),ByteString.copyFrom(annotation.toString(),StandardCharsets.UTF_8)); + for (Annotation annotation : + appliedPTransform.getTransform().getClass().getDeclaredAnnotations()) { + transformBuilder.putAnnotations( + annotation.annotationType().getName(), + ByteString.copyFrom(annotation.toString(), StandardCharsets.UTF_8)); } return transformBuilder.build(); diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/annotations/FeatureMetrics.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/annotations/FeatureMetrics.java index fe3d1f3ffca..85025b30bf8 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/annotations/FeatureMetrics.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/annotations/FeatureMetrics.java @@ -28,10 +28,7 @@ public class FeatureMetrics { /** Signifies usage of the Error Handler */ @Retention(RetentionPolicy.RUNTIME) - @Target({ - ElementType.TYPE - }) + @Target({ElementType.TYPE}) @Documented public @interface ErrorHandler {} - } diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/ErrorHandler.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/ErrorHandler.java index a79a6c28d3b..c1cb21fb05f 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/ErrorHandler.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/ErrorHandler.java @@ -40,8 +40,8 @@ * Error Handlers must be closed before a pipeline is run to properly pipe error collections to the * sink, and the pipeline will be rejected if any handlers aren't closed. * - * @param The type of the error object. This will usually be a {@link BadRecord}, but can be any - * type + * @param The type of the error object. This will usually be a {@link BadRecord}, but can + * be any type * @param The return type of the sink PTransform. *

Usage of Error Handlers: *

Simple usage with one DLQ @@ -70,7 +70,8 @@ public interface ErrorHandler extends AutoClose OutputT getOutput(); - class PTransformErrorHandler implements ErrorHandler { + class PTransformErrorHandler + implements ErrorHandler { private static final Logger LOG = LoggerFactory.getLogger(PTransformErrorHandler.class); private final PTransform, OutputT> sinkTransform; @@ -118,17 +119,23 @@ public void close() { LOG.warn("Empty list of error pcollections passed to ErrorHandler."); return; } - LOG.debug("{} error collections are being sent to {}", errorCollections.size(), sinkTransform.getName()); + LOG.debug( + "{} error collections are being sent to {}", + errorCollections.size(), + sinkTransform.getName()); sinkOutput = - PCollectionList.of(errorCollections).apply(Flatten.pCollections()).apply(new WriteErrorMetrics(sinkTransform)).apply(sinkTransform); + PCollectionList.of(errorCollections) + .apply(Flatten.pCollections()) + .apply(new WriteErrorMetrics(sinkTransform)) + .apply(sinkTransform); } @FeatureMetrics.ErrorHandler - public class WriteErrorMetrics extends PTransform,PCollection> { + public class WriteErrorMetrics extends PTransform, PCollection> { private final Counter errorCounter; - public WriteErrorMetrics(PTransform sinkTransform){ + public WriteErrorMetrics(PTransform sinkTransform) { errorCounter = Metrics.counter("ErrorMetrics", sinkTransform.getName() + "-input"); } @@ -141,11 +148,12 @@ public class CountErrors extends DoFn { private final Counter errorCounter; - public CountErrors(Counter errorCounter){ + public CountErrors(Counter errorCounter) { this.errorCounter = errorCounter; } + @ProcessElement - public void processElement(@Element ErrorT error, OutputReceiver receiver){ + public void processElement(@Element ErrorT error, OutputReceiver receiver) { errorCounter.inc(); receiver.output(error); } From 36baf98fc6bffc4f6c95146c61fa72f77a338e5a Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Wed, 1 Nov 2023 12:24:22 -0500 Subject: [PATCH 35/80] fix checkstyle --- .../java/org/apache/beam/sdk/annotations/FeatureMetrics.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/annotations/FeatureMetrics.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/annotations/FeatureMetrics.java index 85025b30bf8..10396142032 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/annotations/FeatureMetrics.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/annotations/FeatureMetrics.java @@ -26,7 +26,7 @@ /** Annotations used to mark a PTransform as using a specific feature. */ public class FeatureMetrics { - /** Signifies usage of the Error Handler */ + /** Signifies usage of the Error Handler. */ @Retention(RetentionPolicy.RUNTIME) @Target({ElementType.TYPE}) @Documented From 2119c7698b6210f9495fd12dc7b6a87bd3abf6c0 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Wed, 1 Nov 2023 13:48:19 -0500 Subject: [PATCH 36/80] make metric reporting static --- .../org/apache/beam/sdk/errorhandling/ErrorHandler.java | 8 ++++---- .../apache/beam/sdk/errorhandling/ErrorHandlerTest.java | 1 - 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/ErrorHandler.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/ErrorHandler.java index c1cb21fb05f..acc071104c0 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/ErrorHandler.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/ErrorHandler.java @@ -126,12 +126,12 @@ public void close() { sinkOutput = PCollectionList.of(errorCollections) .apply(Flatten.pCollections()) - .apply(new WriteErrorMetrics(sinkTransform)) + .apply(new WriteErrorMetrics(sinkTransform)) .apply(sinkTransform); } @FeatureMetrics.ErrorHandler - public class WriteErrorMetrics extends PTransform, PCollection> { + public static class WriteErrorMetrics extends PTransform, PCollection> { private final Counter errorCounter; @@ -141,10 +141,10 @@ public WriteErrorMetrics(PTransform sinkTransform) { @Override public PCollection expand(PCollection input) { - return input.apply(ParDo.of(new CountErrors(errorCounter))); + return input.apply(ParDo.of(new CountErrors(errorCounter))); } - public class CountErrors extends DoFn { + public static class CountErrors extends DoFn { private final Counter errorCounter; diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/ErrorHandlerTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/ErrorHandlerTest.java index 25d8d7524a4..efedb5c43f2 100644 --- a/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/ErrorHandlerTest.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/ErrorHandlerTest.java @@ -54,7 +54,6 @@ public void testBadErrorHandlerUsage() { } @Test - @Category(NeedsRunner.class) public void testBRHEnabledPTransform() { PCollection record = pipeline.apply(Create.of(1, 2, 3, 4)); record.apply(new BRHEnabledPTransform()); From 798cfc36c3d79f7cc1d6382b57a8300055a09609 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Wed, 1 Nov 2023 14:41:07 -0500 Subject: [PATCH 37/80] spotless --- .../java/org/apache/beam/sdk/errorhandling/ErrorHandler.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/ErrorHandler.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/ErrorHandler.java index acc071104c0..6ae4333edf3 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/ErrorHandler.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/ErrorHandler.java @@ -131,7 +131,8 @@ public void close() { } @FeatureMetrics.ErrorHandler - public static class WriteErrorMetrics extends PTransform, PCollection> { + public static class WriteErrorMetrics + extends PTransform, PCollection> { private final Counter errorCounter; From c03bb2b9167bc2c3ae7b56ad44a885e6ce7f5f18 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Thu, 2 Nov 2023 09:06:10 -0500 Subject: [PATCH 38/80] Rework annotations to be an explicit label on a PTransform, instead of using java annotations --- .../construction/PTransformTranslation.java | 10 +++--- .../beam/sdk/annotations/FeatureMetrics.java | 34 ------------------- .../beam/sdk/errorhandling/BadRecord.java | 5 +-- .../sdk/errorhandling/BadRecordRouter.java | 26 +++++++++----- .../beam/sdk/errorhandling/ErrorHandler.java | 7 ++-- .../beam/sdk/transforms/PTransform.java | 14 ++++++++ 6 files changed, 42 insertions(+), 54 deletions(-) delete mode 100644 sdks/java/core/src/main/java/org/apache/beam/sdk/annotations/FeatureMetrics.java diff --git a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/PTransformTranslation.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/PTransformTranslation.java index 8061146c84d..6829e0d6b23 100644 --- a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/PTransformTranslation.java +++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/PTransformTranslation.java @@ -21,14 +21,13 @@ import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkState; import java.io.IOException; -import java.lang.annotation.Annotation; -import java.nio.charset.StandardCharsets; import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.ServiceLoader; import java.util.Set; import org.apache.beam.model.pipeline.v1.RunnerApi; @@ -504,11 +503,10 @@ public RunnerApi.PTransform translate( SchemaTranslation.schemaToProto(configRow.getSchema(), true).toByteArray())); } - for (Annotation annotation : - appliedPTransform.getTransform().getClass().getDeclaredAnnotations()) { + for (Entry annotation : + appliedPTransform.getTransform().getAnnotations().entrySet()) { transformBuilder.putAnnotations( - annotation.annotationType().getName(), - ByteString.copyFrom(annotation.toString(), StandardCharsets.UTF_8)); + annotation.getKey(), ByteString.copyFrom(annotation.getValue())); } return transformBuilder.build(); diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/annotations/FeatureMetrics.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/annotations/FeatureMetrics.java deleted file mode 100644 index 10396142032..00000000000 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/annotations/FeatureMetrics.java +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.annotations; - -import java.lang.annotation.Documented; -import java.lang.annotation.ElementType; -import java.lang.annotation.Retention; -import java.lang.annotation.RetentionPolicy; -import java.lang.annotation.Target; - -/** Annotations used to mark a PTransform as using a specific feature. */ -public class FeatureMetrics { - - /** Signifies usage of the Error Handler. */ - @Retention(RetentionPolicy.RUNTIME) - @Target({ElementType.TYPE}) - @Documented - public @interface ErrorHandler {} -} diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/BadRecord.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/BadRecord.java index 7ea1e59cc16..44fdf636ad2 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/BadRecord.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/BadRecord.java @@ -51,7 +51,8 @@ public abstract static class Builder { @DefaultSchema(AutoValueSchema.class) public abstract static class Record implements Serializable { - /** The failing record, encoded as JSON. */ + /** The failing record, encoded as JSON. Will be null if serialization as JSON fails. */ + @Nullable public abstract String getHumanReadableRecord(); /** @@ -73,7 +74,7 @@ public static Builder builder() { @AutoValue.Builder public abstract static class Builder { - public abstract Builder setHumanReadableRecord(String humanReadableRecord); + public abstract Builder setHumanReadableRecord(@Nullable String humanReadableRecord); @SuppressWarnings("mutable") public abstract Builder setEncodedRecord(@Nullable byte[] encodedRecord); diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/BadRecordRouter.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/BadRecordRouter.java index fc78b1b21fc..7f71e1208aa 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/BadRecordRouter.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/BadRecordRouter.java @@ -82,15 +82,12 @@ public void route( Preconditions.checkArgumentNotNull(record); ObjectWriter objectWriter = new ObjectMapper().writer().withDefaultPrettyPrinter(); - BadRecord.Record.Builder recordBuilder = - Record.builder().setHumanReadableRecord(objectWriter.writeValueAsString(record)); - BadRecord.Failure.Builder failureBuilder = - Failure.builder().setDescription(description).setFailingTransform(failingTransform); - - // Its possible for us to want to handle an error scenario where no actual exception objet - // exists - if (exception != null) { - failureBuilder.setException(exception.toString()); + // Build up record information + BadRecord.Record.Builder recordBuilder = Record.builder(); + try { + recordBuilder.setHumanReadableRecord(objectWriter.writeValueAsString(record)); + } catch (Exception e) { + LOG.error("Unable to serialize record as JSON. Human readable record will be null", e); } // We will sometimes not have a coder for a failing record, for example if it has already been @@ -110,6 +107,17 @@ public void route( e); } } + + // Build up failure information + BadRecord.Failure.Builder failureBuilder = + Failure.builder().setDescription(description).setFailingTransform(failingTransform); + + // Its possible for us to want to handle an error scenario where no actual exception objet + // exists + if (exception != null) { + failureBuilder.setException(exception.toString()); + } + BadRecord badRecord = BadRecord.builder() .setRecord(recordBuilder.build()) diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/ErrorHandler.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/ErrorHandler.java index 6ae4333edf3..21aad1cd7bf 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/ErrorHandler.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/ErrorHandler.java @@ -17,10 +17,10 @@ */ package org.apache.beam.sdk.errorhandling; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; import javax.annotation.Nullable; -import org.apache.beam.sdk.annotations.FeatureMetrics; import org.apache.beam.sdk.annotations.Internal; import org.apache.beam.sdk.metrics.Counter; import org.apache.beam.sdk.metrics.Metrics; @@ -127,10 +127,11 @@ public void close() { PCollectionList.of(errorCollections) .apply(Flatten.pCollections()) .apply(new WriteErrorMetrics(sinkTransform)) - .apply(sinkTransform); + .apply( + sinkTransform.addAnnotation( + "FeatureMetric", "ErrorHandler".getBytes(StandardCharsets.UTF_8))); } - @FeatureMetrics.ErrorHandler public static class WriteErrorMetrics extends PTransform, PCollection> { diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/PTransform.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/PTransform.java index 7b3dfb18803..ebb14b098f6 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/PTransform.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/PTransform.java @@ -21,6 +21,7 @@ import java.io.ObjectOutputStream; import java.io.Serializable; import java.util.Collections; +import java.util.HashMap; import java.util.Map; import org.apache.beam.sdk.Pipeline; import org.apache.beam.sdk.coders.CannotProvideCoderException; @@ -216,6 +217,17 @@ public ResourceHints getResourceHints() { return resourceHints; } + /** Returns annotations map to provide additional hints to the runner */ + public Map getAnnotations() { + return annotations; + } + + public PTransform addAnnotation( + @NonNull String annotationType, byte @NonNull [] annotation) { + annotations.put(annotationType, annotation); + return this; + } + ///////////////////////////////////////////////////////////////////////////// // See the note about about PTransform's fake Serializability, to @@ -229,6 +241,8 @@ public ResourceHints getResourceHints() { protected transient @NonNull ResourceHints resourceHints = ResourceHints.create(); + protected transient @NonNull Map annotations = new HashMap<>(); + protected PTransform() { this.name = null; } From bf99363f6e69a509c97b12f09c90c2252b60709e Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Thu, 2 Nov 2023 09:35:58 -0500 Subject: [PATCH 39/80] fix checkstyle --- .../main/java/org/apache/beam/sdk/transforms/PTransform.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/PTransform.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/PTransform.java index ebb14b098f6..c0c3638b28d 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/PTransform.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/PTransform.java @@ -217,7 +217,7 @@ public ResourceHints getResourceHints() { return resourceHints; } - /** Returns annotations map to provide additional hints to the runner */ + /** Returns annotations map to provide additional hints to the runner. */ public Map getAnnotations() { return annotations; } From 881f9d8a5ed8719e712c1b5f0bae5a48e5e4945e Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Wed, 8 Nov 2023 11:27:33 -0500 Subject: [PATCH 40/80] Address comments --- .../java/org/apache/beam/sdk/Pipeline.java | 4 ++-- .../errorhandling/BadRecord.java | 20 ++++++++----------- .../errorhandling/BadRecordRouter.java | 10 +++++----- .../errorhandling/ErrorHandler.java | 6 +++--- .../errorhandling/package-info.java | 2 +- .../errorhandling/BRHEnabledPTransform.java | 4 ++-- .../errorhandling/BadRecordRouterTest.java | 17 +++++++--------- .../errorhandling/ErrorHandlerTest.java | 2 +- 8 files changed, 29 insertions(+), 36 deletions(-) rename sdks/java/core/src/main/java/org/apache/beam/sdk/{ => transforms}/errorhandling/BadRecord.java (86%) rename sdks/java/core/src/main/java/org/apache/beam/sdk/{ => transforms}/errorhandling/BadRecordRouter.java (92%) rename sdks/java/core/src/main/java/org/apache/beam/sdk/{ => transforms}/errorhandling/ErrorHandler.java (97%) rename sdks/java/core/src/main/java/org/apache/beam/sdk/{ => transforms}/errorhandling/package-info.java (94%) rename sdks/java/core/src/test/java/org/apache/beam/sdk/{ => transforms}/errorhandling/BRHEnabledPTransform.java (96%) rename sdks/java/core/src/test/java/org/apache/beam/sdk/{ => transforms}/errorhandling/BadRecordRouterTest.java (90%) rename sdks/java/core/src/test/java/org/apache/beam/sdk/{ => transforms}/errorhandling/ErrorHandlerTest.java (98%) diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/Pipeline.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/Pipeline.java index b37289c893f..c27d4420217 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/Pipeline.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/Pipeline.java @@ -31,8 +31,6 @@ import javax.annotation.Nonnull; import org.apache.beam.sdk.annotations.Internal; import org.apache.beam.sdk.coders.CoderRegistry; -import org.apache.beam.sdk.errorhandling.ErrorHandler; -import org.apache.beam.sdk.errorhandling.ErrorHandler.PTransformErrorHandler; import org.apache.beam.sdk.io.Read; import org.apache.beam.sdk.options.PipelineOptions; import org.apache.beam.sdk.options.PipelineOptionsFactory; @@ -46,6 +44,8 @@ import org.apache.beam.sdk.schemas.SchemaRegistry; import org.apache.beam.sdk.transforms.Create; import org.apache.beam.sdk.transforms.PTransform; +import org.apache.beam.sdk.transforms.errorhandling.ErrorHandler; +import org.apache.beam.sdk.transforms.errorhandling.ErrorHandler.PTransformErrorHandler; import org.apache.beam.sdk.transforms.resourcehints.ResourceHints; import org.apache.beam.sdk.util.UserCodeException; import org.apache.beam.sdk.values.PBegin; diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/BadRecord.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecord.java similarity index 86% rename from sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/BadRecord.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecord.java index 44fdf636ad2..390b90a1469 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/BadRecord.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecord.java @@ -15,13 +15,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.beam.sdk.errorhandling; +package org.apache.beam.sdk.transforms.errorhandling; import com.google.auto.value.AutoValue; import java.io.Serializable; -import javax.annotation.Nullable; import org.apache.beam.sdk.schemas.AutoValueSchema; import org.apache.beam.sdk.schemas.annotations.DefaultSchema; +import org.checkerframework.checker.nullness.qual.Nullable; @AutoValue @DefaultSchema(AutoValueSchema.class) @@ -52,20 +52,17 @@ public abstract static class Builder { public abstract static class Record implements Serializable { /** The failing record, encoded as JSON. Will be null if serialization as JSON fails. */ - @Nullable - public abstract String getHumanReadableRecord(); + public abstract @Nullable String getJsonRecord(); /** * Nullable to account for failing to encode, or if there is no coder for the record at the time * of failure. */ - @Nullable @SuppressWarnings("mutable") - public abstract byte[] getEncodedRecord(); + public abstract byte @Nullable [] getEncodedRecord(); /** The coder for the record, or null if there is no coder. */ - @Nullable - public abstract String getCoder(); + public abstract @Nullable String getCoder(); public static Builder builder() { return new AutoValue_BadRecord_Record.Builder(); @@ -74,10 +71,10 @@ public static Builder builder() { @AutoValue.Builder public abstract static class Builder { - public abstract Builder setHumanReadableRecord(@Nullable String humanReadableRecord); + public abstract Builder setJsonRecord(@Nullable String jsonRecord); @SuppressWarnings("mutable") - public abstract Builder setEncodedRecord(@Nullable byte[] encodedRecord); + public abstract Builder setEncodedRecord(byte @Nullable [] encodedRecord); public abstract Builder setCoder(@Nullable String coder); @@ -90,8 +87,7 @@ public abstract static class Builder { public abstract static class Failure implements Serializable { /** The exception itself, e.g. IOException. Null if there is a failure without an exception. */ - @Nullable - public abstract String getException(); + public abstract @Nullable String getException(); /** The description of what was being attempted when the failure occurred. */ public abstract String getDescription(); diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/BadRecordRouter.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouter.java similarity index 92% rename from sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/BadRecordRouter.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouter.java index 7f71e1208aa..ff98d6804e0 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/BadRecordRouter.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouter.java @@ -15,20 +15,20 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.beam.sdk.errorhandling; +package org.apache.beam.sdk.transforms.errorhandling; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectWriter; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.Serializable; -import javax.annotation.Nullable; import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.errorhandling.BadRecord.Failure; -import org.apache.beam.sdk.errorhandling.BadRecord.Record; import org.apache.beam.sdk.transforms.DoFn.MultiOutputReceiver; +import org.apache.beam.sdk.transforms.errorhandling.BadRecord.Failure; +import org.apache.beam.sdk.transforms.errorhandling.BadRecord.Record; import org.apache.beam.sdk.util.Preconditions; import org.apache.beam.sdk.values.TupleTag; +import org.checkerframework.checker.nullness.qual.Nullable; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -85,7 +85,7 @@ public void route( // Build up record information BadRecord.Record.Builder recordBuilder = Record.builder(); try { - recordBuilder.setHumanReadableRecord(objectWriter.writeValueAsString(record)); + recordBuilder.setJsonRecord(objectWriter.writeValueAsString(record)); } catch (Exception e) { LOG.error("Unable to serialize record as JSON. Human readable record will be null", e); } diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/ErrorHandler.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java similarity index 97% rename from sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/ErrorHandler.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java index 21aad1cd7bf..12704ed635f 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/ErrorHandler.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java @@ -15,12 +15,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.beam.sdk.errorhandling; +package org.apache.beam.sdk.transforms.errorhandling; import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; -import javax.annotation.Nullable; import org.apache.beam.sdk.annotations.Internal; import org.apache.beam.sdk.metrics.Counter; import org.apache.beam.sdk.metrics.Metrics; @@ -32,6 +31,7 @@ import org.apache.beam.sdk.values.PCollection; import org.apache.beam.sdk.values.PCollectionList; import org.apache.beam.sdk.values.POutput; +import org.checkerframework.checker.nullness.qual.Nullable; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -78,7 +78,7 @@ class PTransformErrorHandler private final List> errorCollections = new ArrayList<>(); - @Nullable private OutputT sinkOutput = null; + private @Nullable OutputT sinkOutput = null; private boolean closed = false; diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/package-info.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/package-info.java similarity index 94% rename from sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/package-info.java rename to sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/package-info.java index 1cd399be565..c41e10143c1 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/errorhandling/package-info.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/package-info.java @@ -17,7 +17,7 @@ */ /** Provides utilities for handling errors in Pipelines. */ @DefaultAnnotation(NonNull.class) -package org.apache.beam.sdk.errorhandling; +package org.apache.beam.sdk.transforms.errorhandling; import edu.umd.cs.findbugs.annotations.DefaultAnnotation; import org.checkerframework.checker.nullness.qual.NonNull; diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/BRHEnabledPTransform.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BRHEnabledPTransform.java similarity index 96% rename from sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/BRHEnabledPTransform.java rename to sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BRHEnabledPTransform.java index 65fa95eb6d4..38864b07c7f 100644 --- a/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/BRHEnabledPTransform.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BRHEnabledPTransform.java @@ -15,17 +15,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.beam.sdk.errorhandling; +package org.apache.beam.sdk.transforms.errorhandling; import org.apache.beam.sdk.coders.BigEndianIntegerCoder; import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.errorhandling.ErrorHandler.NoOpErrorHandler; import org.apache.beam.sdk.schemas.NoSuchSchemaException; import org.apache.beam.sdk.schemas.SchemaCoder; import org.apache.beam.sdk.schemas.SchemaRegistry; import org.apache.beam.sdk.transforms.DoFn; import org.apache.beam.sdk.transforms.PTransform; import org.apache.beam.sdk.transforms.ParDo; +import org.apache.beam.sdk.transforms.errorhandling.ErrorHandler.NoOpErrorHandler; import org.apache.beam.sdk.values.PCollection; import org.apache.beam.sdk.values.PCollectionTuple; import org.apache.beam.sdk.values.TupleTag; diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/BadRecordRouterTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouterTest.java similarity index 90% rename from sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/BadRecordRouterTest.java rename to sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouterTest.java index e6a686ec8a0..f060942cae4 100644 --- a/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/BadRecordRouterTest.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouterTest.java @@ -15,9 +15,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.beam.sdk.errorhandling; +package org.apache.beam.sdk.transforms.errorhandling; -import static org.apache.beam.sdk.errorhandling.BadRecordRouter.BAD_RECORD_TAG; +import static org.apache.beam.sdk.transforms.errorhandling.BadRecordRouter.BAD_RECORD_TAG; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; @@ -28,10 +28,10 @@ import org.apache.beam.sdk.coders.BigEndianIntegerCoder; import org.apache.beam.sdk.coders.Coder; import org.apache.beam.sdk.coders.CoderException; -import org.apache.beam.sdk.errorhandling.BadRecord.Failure; -import org.apache.beam.sdk.errorhandling.BadRecord.Record; import org.apache.beam.sdk.transforms.DoFn.MultiOutputReceiver; import org.apache.beam.sdk.transforms.DoFn.OutputReceiver; +import org.apache.beam.sdk.transforms.errorhandling.BadRecord.Failure; +import org.apache.beam.sdk.transforms.errorhandling.BadRecord.Record; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; @@ -81,7 +81,7 @@ public void testRecordingHandler() throws Exception { BadRecord.builder() .setRecord( Record.builder() - .setHumanReadableRecord("5") + .setJsonRecord("5") .setEncodedRecord(new byte[] {0, 0, 0, 5}) .setCoder("BigEndianIntegerCoder") .build()) @@ -106,7 +106,7 @@ public void testNoCoder() throws Exception { BadRecord expected = BadRecord.builder() - .setRecord(Record.builder().setHumanReadableRecord("5").build()) + .setRecord(Record.builder().setJsonRecord("5").build()) .setFailure( Failure.builder() .setException("java.lang.RuntimeException") @@ -151,10 +151,7 @@ public void verifyDeterministic() throws NonDeterministicException {} BadRecord expected = BadRecord.builder() .setRecord( - Record.builder() - .setHumanReadableRecord("5") - .setCoder(failingCoder.toString()) - .build()) + Record.builder().setJsonRecord("5").setCoder(failingCoder.toString()).build()) .setFailure( Failure.builder() .setException("java.lang.RuntimeException") diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/ErrorHandlerTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandlerTest.java similarity index 98% rename from sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/ErrorHandlerTest.java rename to sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandlerTest.java index efedb5c43f2..8dcdd3489c1 100644 --- a/sdks/java/core/src/test/java/org/apache/beam/sdk/errorhandling/ErrorHandlerTest.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandlerTest.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.beam.sdk.errorhandling; +package org.apache.beam.sdk.transforms.errorhandling; import org.apache.beam.sdk.testing.NeedsRunner; import org.apache.beam.sdk.testing.TestPipeline; From f8c6d8cb15c5717ca5f90be6017e4c1fa9878ff4 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Tue, 14 Nov 2023 15:02:15 -0500 Subject: [PATCH 41/80] Address comments --- .../java/org/apache/beam/sdk/Pipeline.java | 2 +- .../transforms/errorhandling/BadRecord.java | 24 ++++++ .../errorhandling/BadRecordRouter.java | 22 ++++-- .../errorhandling/ErrorHandler.java | 41 +++++++--- .../errorhandling/BRHEnabledPTransform.java | 21 +---- .../errorhandling/BadRecordRouterTest.java | 79 +++++++++++-------- .../errorhandling/ErrorHandlerTest.java | 40 +++++++++- 7 files changed, 157 insertions(+), 72 deletions(-) diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/Pipeline.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/Pipeline.java index c27d4420217..409b8017aa3 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/Pipeline.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/Pipeline.java @@ -349,7 +349,7 @@ public SchemaRegistry getSchemaRegistry() { public ErrorHandler registerErrorHandler( PTransform, T> sinkTransform) { - ErrorHandler errorHandler = new PTransformErrorHandler<>(sinkTransform); + ErrorHandler errorHandler = new PTransformErrorHandler<>(sinkTransform, this); errorHandlers.add(errorHandler); return errorHandler; } diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecord.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecord.java index 390b90a1469..4d7153cef0a 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecord.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecord.java @@ -19,8 +19,14 @@ import com.google.auto.value.AutoValue; import java.io.Serializable; +import org.apache.beam.sdk.Pipeline; +import org.apache.beam.sdk.coders.Coder; import org.apache.beam.sdk.schemas.AutoValueSchema; +import org.apache.beam.sdk.schemas.NoSuchSchemaException; +import org.apache.beam.sdk.schemas.SchemaCoder; +import org.apache.beam.sdk.schemas.SchemaRegistry; import org.apache.beam.sdk.schemas.annotations.DefaultSchema; +import org.apache.beam.sdk.values.TypeDescriptor; import org.checkerframework.checker.nullness.qual.Nullable; @AutoValue @@ -36,6 +42,19 @@ public abstract class BadRecord implements Serializable { public static Builder builder() { return new AutoValue_BadRecord.Builder(); } + public static Coder getCoder(Pipeline pipeline){ + try { + SchemaRegistry schemaRegistry = pipeline.getSchemaRegistry(); + return + SchemaCoder.of( + schemaRegistry.getSchema(BadRecord.class), + TypeDescriptor.of(BadRecord.class), + schemaRegistry.getToRowFunction(BadRecord.class), + schemaRegistry.getFromRowFunction(BadRecord.class)); + } catch (NoSuchSchemaException e) { + throw new RuntimeException(e); + } + } @AutoValue.Builder public abstract static class Builder { @@ -89,6 +108,9 @@ public abstract static class Failure implements Serializable { /** The exception itself, e.g. IOException. Null if there is a failure without an exception. */ public abstract @Nullable String getException(); + /** The full stacktrace. Null if there is a failure without an exception. */ + public abstract @Nullable String getExceptionStacktrace(); + /** The description of what was being attempted when the failure occurred. */ public abstract String getDescription(); @@ -104,6 +126,8 @@ public abstract static class Builder { public abstract Builder setException(@Nullable String exception); + public abstract Builder setExceptionStacktrace(@Nullable String stacktrace); + public abstract Builder setDescription(String description); public abstract Builder setFailingTransform(String failingTransform); diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouter.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouter.java index ff98d6804e0..0e69c13242a 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouter.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouter.java @@ -21,11 +21,13 @@ import com.fasterxml.jackson.databind.ObjectWriter; import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.io.PrintStream; import java.io.Serializable; import org.apache.beam.sdk.coders.Coder; import org.apache.beam.sdk.transforms.DoFn.MultiOutputReceiver; import org.apache.beam.sdk.transforms.errorhandling.BadRecord.Failure; import org.apache.beam.sdk.transforms.errorhandling.BadRecord.Record; +import org.apache.beam.sdk.util.CoderUtils; import org.apache.beam.sdk.util.Preconditions; import org.apache.beam.sdk.values.TupleTag; import org.checkerframework.checker.nullness.qual.Nullable; @@ -87,19 +89,22 @@ public void route( try { recordBuilder.setJsonRecord(objectWriter.writeValueAsString(record)); } catch (Exception e) { - LOG.error("Unable to serialize record as JSON. Human readable record will be null", e); + LOG.error( + "Unable to serialize record as JSON. Human readable record attempted via .toString", e); + try { + recordBuilder.setJsonRecord(record.toString()); + } catch (Exception e2) { + LOG.error( + "Unable to serialize record via .toString. Human readable record will be null", e2); + } } // We will sometimes not have a coder for a failing record, for example if it has already been // modified within the dofn. if (coder != null) { recordBuilder.setCoder(coder.toString()); - try { - ByteArrayOutputStream stream = new ByteArrayOutputStream(); - coder.encode(record, stream); - byte[] bytes = stream.toByteArray(); - recordBuilder.setEncodedRecord(bytes); + recordBuilder.setEncodedRecord(CoderUtils.encodeToByteArray(coder, record)); } catch (IOException e) { LOG.error( "Unable to encode failing record using provided coder." @@ -116,6 +121,11 @@ public void route( // exists if (exception != null) { failureBuilder.setException(exception.toString()); + ByteArrayOutputStream stream = new ByteArrayOutputStream(); + PrintStream printStream = new PrintStream(stream); + exception.printStackTrace(printStream); + printStream.close(); + failureBuilder.setExceptionStacktrace(stream.toString()); } BadRecord badRecord = diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java index 12704ed635f..75997878a13 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java @@ -20,9 +20,11 @@ import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; +import org.apache.beam.sdk.Pipeline; import org.apache.beam.sdk.annotations.Internal; import org.apache.beam.sdk.metrics.Counter; import org.apache.beam.sdk.metrics.Metrics; +import org.apache.beam.sdk.transforms.Create; import org.apache.beam.sdk.transforms.DoFn; import org.apache.beam.sdk.transforms.Flatten; import org.apache.beam.sdk.transforms.PTransform; @@ -31,6 +33,7 @@ import org.apache.beam.sdk.values.PCollection; import org.apache.beam.sdk.values.PCollectionList; import org.apache.beam.sdk.values.POutput; +import org.apache.beam.sdk.values.TypeDescriptor; import org.checkerframework.checker.nullness.qual.Nullable; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -76,6 +79,8 @@ class PTransformErrorHandler private static final Logger LOG = LoggerFactory.getLogger(PTransformErrorHandler.class); private final PTransform, OutputT> sinkTransform; + private final Pipeline pipeline; + private final List> errorCollections = new ArrayList<>(); private @Nullable OutputT sinkOutput = null; @@ -87,8 +92,10 @@ class PTransformErrorHandler * pipeline.registerErrorHandler to ensure safe pipeline construction */ @Internal - public PTransformErrorHandler(PTransform, OutputT> sinkTransform) { + public PTransformErrorHandler( + PTransform, OutputT> sinkTransform, Pipeline pipeline) { this.sinkTransform = sinkTransform; + this.pipeline = pipeline; } @Override @@ -115,19 +122,25 @@ public OutputT getOutput() { @Override public void close() { closed = true; + PCollection flattened; if (errorCollections.isEmpty()) { LOG.warn("Empty list of error pcollections passed to ErrorHandler."); - return; + flattened = pipeline.apply(Create.empty(new TypeDescriptor() {})); + } else { + flattened = PCollectionList.of(errorCollections).apply(Flatten.pCollections()); } LOG.debug( "{} error collections are being sent to {}", errorCollections.size(), sinkTransform.getName()); + String sinkTransformName = sinkTransform.getName(); sinkOutput = - PCollectionList.of(errorCollections) - .apply(Flatten.pCollections()) - .apply(new WriteErrorMetrics(sinkTransform)) + flattened + .apply( + "Record Error Metrics to " + sinkTransformName, + new WriteErrorMetrics(sinkTransformName)) .apply( + "Write to error Sink", sinkTransform.addAnnotation( "FeatureMetric", "ErrorHandler".getBytes(StandardCharsets.UTF_8))); } @@ -137,8 +150,8 @@ public static class WriteErrorMetrics private final Counter errorCounter; - public WriteErrorMetrics(PTransform sinkTransform) { - errorCounter = Metrics.counter("ErrorMetrics", sinkTransform.getName() + "-input"); + public WriteErrorMetrics(String sinkTransformName) { + errorCounter = Metrics.counter("ErrorMetrics", sinkTransformName + "-input"); } @Override @@ -163,6 +176,11 @@ public void processElement(@Element ErrorT error, OutputReceiver receive } } + /** + * A default, placeholder error handler that exists to allow usage of .addErrorCollection() + * without effects. This enables more simple codepaths without checking for whether the user + * configured an error handler or not. + */ @Internal class NoOpErrorHandler implements ErrorHandler { @@ -171,17 +189,20 @@ public void addErrorCollection(PCollection errorCollection) {} @Override public boolean isClosed() { - throw new IllegalArgumentException("No Op handler should not be closed"); + throw new IllegalArgumentException( + "No Op handler should not be closed. This implies this IO is misconfigured."); } @Override public OutputT getOutput() { - throw new IllegalArgumentException("No Op handler has no output"); + throw new IllegalArgumentException( + "No Op handler has no output. This implies this IO is misconfigured."); } @Override public void close() { - throw new IllegalArgumentException("No Op handler should not be closed"); + throw new IllegalArgumentException( + "No Op handler should not be closed. This implies this IO is misconfigured."); } } } diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BRHEnabledPTransform.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BRHEnabledPTransform.java index 38864b07c7f..cbf7c09b411 100644 --- a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BRHEnabledPTransform.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BRHEnabledPTransform.java @@ -32,7 +32,8 @@ import org.apache.beam.sdk.values.TupleTagList; import org.apache.beam.sdk.values.TypeDescriptor; -/** Dummy PTransform that is configurable with a Bad Record Handler. */ +/** Dummy PTransform that is configurable with a Bad Record Handler. + * TODO(johncasey) look to factor some of this out for easy use in other IOs */ public class BRHEnabledPTransform extends PTransform, PCollection> { private ErrorHandler errorHandler = new NoOpErrorHandler<>(); @@ -57,22 +58,8 @@ public PCollection expand(PCollection input) { ParDo.of(new OddIsBad(badRecordRouter)) .withOutputTags(RECORDS, TupleTagList.of(BadRecordRouter.BAD_RECORD_TAG))); - Coder badRecordCoder; - - try { - SchemaRegistry schemaRegistry = input.getPipeline().getSchemaRegistry(); - badRecordCoder = - SchemaCoder.of( - schemaRegistry.getSchema(BadRecord.class), - TypeDescriptor.of(BadRecord.class), - schemaRegistry.getToRowFunction(BadRecord.class), - schemaRegistry.getFromRowFunction(BadRecord.class)); - } catch (NoSuchSchemaException e) { - throw new RuntimeException(e); - } - errorHandler.addErrorCollection( - pCollectionTuple.get(BadRecordRouter.BAD_RECORD_TAG).setCoder(badRecordCoder)); + pCollectionTuple.get(BadRecordRouter.BAD_RECORD_TAG).setCoder(BadRecord.getCoder(input.getPipeline()))); return pCollectionTuple.get(RECORDS).setCoder(BigEndianIntegerCoder.of()); } @@ -95,7 +82,7 @@ public void processElement(@Element Integer element, MultiOutputReceiver receive receiver, element, BigEndianIntegerCoder.of(), - new RuntimeException(), + new RuntimeException("Integer was odd"), "Integer was odd", "NoOpDoFn"); } diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouterTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouterTest.java index f060942cae4..fb38304c62d 100644 --- a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouterTest.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouterTest.java @@ -18,6 +18,7 @@ package org.apache.beam.sdk.transforms.errorhandling; import static org.apache.beam.sdk.transforms.errorhandling.BadRecordRouter.BAD_RECORD_TAG; +import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; @@ -25,6 +26,7 @@ import java.io.InputStream; import java.io.OutputStream; import java.util.List; +import java.util.function.BiFunction; import org.apache.beam.sdk.coders.BigEndianIntegerCoder; import org.apache.beam.sdk.coders.Coder; import org.apache.beam.sdk.coders.CoderException; @@ -32,11 +34,14 @@ import org.apache.beam.sdk.transforms.DoFn.OutputReceiver; import org.apache.beam.sdk.transforms.errorhandling.BadRecord.Failure; import org.apache.beam.sdk.transforms.errorhandling.BadRecord.Record; +import org.junit.Assert; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; +import org.mockito.ArgumentMatcher; +import org.mockito.ArgumentMatchers; import org.mockito.Mock; import org.mockito.junit.MockitoJUnit; import org.mockito.junit.MockitoRule; @@ -52,6 +57,16 @@ public class BadRecordRouterTest { @Mock private OutputReceiver badRecordOutputReceiver; + private static final BiFunction> ignoreStacktraceMatcher = (expectedBuilder, failure) -> + (ArgumentMatcher) argument -> { + //This complex matcher means we don't need to maintain an expected stacktrace + String stackTrace = argument.getFailure().getExceptionStacktrace(); + failure.setExceptionStacktrace(stackTrace); + BadRecord expected = expectedBuilder.setFailure(failure.build()).build(); + return expected.equals(argument); + }; + + @Test public void testThrowingHandlerWithException() throws Exception { BadRecordRouter handler = BadRecordRouter.THROWING_ROUTER; @@ -77,23 +92,21 @@ public void testRecordingHandler() throws Exception { handler.route( outputReceiver, 5, BigEndianIntegerCoder.of(), new RuntimeException(), "desc", "transform"); - BadRecord expected = + BadRecord.Builder expectedBuilder = BadRecord.builder() .setRecord( Record.builder() .setJsonRecord("5") .setEncodedRecord(new byte[] {0, 0, 0, 5}) .setCoder("BigEndianIntegerCoder") - .build()) - .setFailure( - Failure.builder() - .setException("java.lang.RuntimeException") - .setDescription("desc") - .setFailingTransform("transform") - .build()) - .build(); - - verify(badRecordOutputReceiver).output(expected); + .build()); + + BadRecord.Failure.Builder failure = BadRecord.Failure.builder() + .setException("java.lang.RuntimeException") + .setDescription("desc") + .setFailingTransform("transform"); + + verify(badRecordOutputReceiver).output(ArgumentMatchers.argThat(ignoreStacktraceMatcher.apply(expectedBuilder,failure))); } @Test @@ -104,18 +117,17 @@ public void testNoCoder() throws Exception { handler.route(outputReceiver, 5, null, new RuntimeException(), "desc", "transform"); - BadRecord expected = + BadRecord.Builder expectedBuilder = BadRecord.builder() - .setRecord(Record.builder().setJsonRecord("5").build()) - .setFailure( - Failure.builder() - .setException("java.lang.RuntimeException") - .setDescription("desc") - .setFailingTransform("transform") - .build()) - .build(); - - verify(badRecordOutputReceiver).output(expected); + .setRecord(Record.builder().setJsonRecord("5").build()); + + BadRecord.Failure.Builder failure = BadRecord.Failure.builder() + .setException("java.lang.RuntimeException") + .setDescription("desc") + .setFailingTransform("transform"); + + verify(badRecordOutputReceiver).output(ArgumentMatchers.argThat(ignoreStacktraceMatcher.apply(expectedBuilder,failure))); + } @Test @@ -129,7 +141,7 @@ public void testFailingCoder() throws Exception { @Override public void encode(Integer value, OutputStream outStream) throws CoderException, IOException { - throw new IOException(); + throw new CoderException("Failing Coder"); } @Override @@ -148,18 +160,17 @@ public void verifyDeterministic() throws NonDeterministicException {} handler.route(outputReceiver, 5, failingCoder, new RuntimeException(), "desc", "transform"); - BadRecord expected = + BadRecord.Builder expectedBuilder = BadRecord.builder() .setRecord( - Record.builder().setJsonRecord("5").setCoder(failingCoder.toString()).build()) - .setFailure( - Failure.builder() - .setException("java.lang.RuntimeException") - .setDescription("desc") - .setFailingTransform("transform") - .build()) - .build(); - - verify(badRecordOutputReceiver).output(expected); + Record.builder().setJsonRecord("5").setCoder(failingCoder.toString()).build()); + + BadRecord.Failure.Builder failure = BadRecord.Failure.builder() + .setException("java.lang.RuntimeException") + .setDescription("desc") + .setFailingTransform("transform"); + + verify(badRecordOutputReceiver).output(ArgumentMatchers.argThat(ignoreStacktraceMatcher.apply(expectedBuilder,failure))); + } } diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandlerTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandlerTest.java index 8dcdd3489c1..691bd30b8f3 100644 --- a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandlerTest.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandlerTest.java @@ -17,11 +17,18 @@ */ package org.apache.beam.sdk.transforms.errorhandling; +import java.util.ArrayList; +import java.util.List; import org.apache.beam.sdk.testing.NeedsRunner; import org.apache.beam.sdk.testing.TestPipeline; import org.apache.beam.sdk.transforms.Create; +import org.apache.beam.sdk.transforms.DoFn; import org.apache.beam.sdk.transforms.PTransform; +import org.apache.beam.sdk.transforms.ParDo; +import org.apache.beam.sdk.transforms.errorhandling.BadRecord.Failure; +import org.apache.beam.sdk.transforms.errorhandling.BadRecord.Record; import org.apache.beam.sdk.values.PCollection; +import org.junit.Assert; import org.junit.Rule; import org.junit.Test; import org.junit.experimental.categories.Category; @@ -48,6 +55,7 @@ public void testBadErrorHandlerUsage() { pipeline.registerErrorHandler(new DummySinkTransform>()); + //Expected to be thrown because the error handler isn't closed thrown.expect(IllegalStateException.class); pipeline.run(); @@ -58,6 +66,7 @@ public void testBRHEnabledPTransform() { PCollection record = pipeline.apply(Create.of(1, 2, 3, 4)); record.apply(new BRHEnabledPTransform()); + //unhandled runtime exception thrown by the BRHEnabledPTransform thrown.expect(RuntimeException.class); pipeline.run(); @@ -67,18 +76,41 @@ public void testBRHEnabledPTransform() { @Category(NeedsRunner.class) public void testErrorHandlerWithBRHTransform() throws Exception { PCollection record = pipeline.apply(Create.of(1, 2, 3, 4)); + DummySinkTransform transform = new DummySinkTransform<>(); try (ErrorHandler> eh = - pipeline.registerErrorHandler(new DummySinkTransform<>())) { + pipeline.registerErrorHandler(transform)) { record.apply(new BRHEnabledPTransform().withBadRecordHandler(eh)); } - pipeline.run(); + pipeline.run().waitUntilFinish(); + + Assert.assertEquals(2,transform.getValues().size()); + for (BadRecord badRecord : transform.getValues()){ + BadRecord expectedRecord = BadRecord.builder() + .setRecord(Record.builder() + .build()) + .setFailure(Failure.builder() + .build()).build(); + Assert.assertEquals(expectedRecord,badRecord); + } } - public static class DummySinkTransform> extends PTransform { + public static class DummySinkTransform extends PTransform, PCollection> { + + private final List values = new ArrayList<>(); + + public List getValues(){ + return values; + } @Override - public T expand(T input) { + public PCollection expand(PCollection input) { + input.apply(ParDo.of(new DoFn() { + @ProcessElement + public void processElement(@Element T element){ + values.add(element); + } + })); return input; } } From a1b112c733f2ff23653257dc466fbc90a1c7fa85 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Wed, 15 Nov 2023 14:16:38 -0500 Subject: [PATCH 42/80] Fix test cases, spotless --- .../transforms/errorhandling/BadRecord.java | 14 +-- .../errorhandling/ErrorHandler.java | 4 +- .../errorhandling/BRHEnabledPTransform.java | 17 ++-- .../errorhandling/BadRecordRouterTest.java | 65 +++++++------- .../errorhandling/ErrorHandlerTest.java | 86 +++++++++++-------- 5 files changed, 103 insertions(+), 83 deletions(-) diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecord.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecord.java index 4d7153cef0a..4902401a0e1 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecord.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecord.java @@ -42,15 +42,15 @@ public abstract class BadRecord implements Serializable { public static Builder builder() { return new AutoValue_BadRecord.Builder(); } - public static Coder getCoder(Pipeline pipeline){ + + public static Coder getCoder(Pipeline pipeline) { try { SchemaRegistry schemaRegistry = pipeline.getSchemaRegistry(); - return - SchemaCoder.of( - schemaRegistry.getSchema(BadRecord.class), - TypeDescriptor.of(BadRecord.class), - schemaRegistry.getToRowFunction(BadRecord.class), - schemaRegistry.getFromRowFunction(BadRecord.class)); + return SchemaCoder.of( + schemaRegistry.getSchema(BadRecord.class), + TypeDescriptor.of(BadRecord.class), + schemaRegistry.getToRowFunction(BadRecord.class), + schemaRegistry.getFromRowFunction(BadRecord.class)); } catch (NoSuchSchemaException e) { throw new RuntimeException(e); } diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java index 75997878a13..79770662c91 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java @@ -33,7 +33,6 @@ import org.apache.beam.sdk.values.PCollection; import org.apache.beam.sdk.values.PCollectionList; import org.apache.beam.sdk.values.POutput; -import org.apache.beam.sdk.values.TypeDescriptor; import org.checkerframework.checker.nullness.qual.Nullable; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -125,7 +124,8 @@ public void close() { PCollection flattened; if (errorCollections.isEmpty()) { LOG.warn("Empty list of error pcollections passed to ErrorHandler."); - flattened = pipeline.apply(Create.empty(new TypeDescriptor() {})); + // We need to use Create.of() this way to infer the coder for ErrorT properly + flattened = pipeline.apply(Create.of(new ArrayList())); } else { flattened = PCollectionList.of(errorCollections).apply(Flatten.pCollections()); } diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BRHEnabledPTransform.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BRHEnabledPTransform.java index cbf7c09b411..d159bb59cd5 100644 --- a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BRHEnabledPTransform.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BRHEnabledPTransform.java @@ -18,10 +18,6 @@ package org.apache.beam.sdk.transforms.errorhandling; import org.apache.beam.sdk.coders.BigEndianIntegerCoder; -import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.schemas.NoSuchSchemaException; -import org.apache.beam.sdk.schemas.SchemaCoder; -import org.apache.beam.sdk.schemas.SchemaRegistry; import org.apache.beam.sdk.transforms.DoFn; import org.apache.beam.sdk.transforms.PTransform; import org.apache.beam.sdk.transforms.ParDo; @@ -30,10 +26,11 @@ import org.apache.beam.sdk.values.PCollectionTuple; import org.apache.beam.sdk.values.TupleTag; import org.apache.beam.sdk.values.TupleTagList; -import org.apache.beam.sdk.values.TypeDescriptor; -/** Dummy PTransform that is configurable with a Bad Record Handler. - * TODO(johncasey) look to factor some of this out for easy use in other IOs */ +/** + * Dummy PTransform that is configurable with a Bad Record Handler. TODO(johncasey) look to factor + * some of this out for easy use in other IOs + */ public class BRHEnabledPTransform extends PTransform, PCollection> { private ErrorHandler errorHandler = new NoOpErrorHandler<>(); @@ -59,14 +56,16 @@ public PCollection expand(PCollection input) { .withOutputTags(RECORDS, TupleTagList.of(BadRecordRouter.BAD_RECORD_TAG))); errorHandler.addErrorCollection( - pCollectionTuple.get(BadRecordRouter.BAD_RECORD_TAG).setCoder(BadRecord.getCoder(input.getPipeline()))); + pCollectionTuple + .get(BadRecordRouter.BAD_RECORD_TAG) + .setCoder(BadRecord.getCoder(input.getPipeline()))); return pCollectionTuple.get(RECORDS).setCoder(BigEndianIntegerCoder.of()); } public static class OddIsBad extends DoFn { - private BadRecordRouter badRecordRouter; + private final BadRecordRouter badRecordRouter; public OddIsBad(BadRecordRouter badRecordRouter) { this.badRecordRouter = badRecordRouter; diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouterTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouterTest.java index fb38304c62d..9b2819f707f 100644 --- a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouterTest.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouterTest.java @@ -18,7 +18,6 @@ package org.apache.beam.sdk.transforms.errorhandling; import static org.apache.beam.sdk.transforms.errorhandling.BadRecordRouter.BAD_RECORD_TAG; -import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; @@ -32,9 +31,7 @@ import org.apache.beam.sdk.coders.CoderException; import org.apache.beam.sdk.transforms.DoFn.MultiOutputReceiver; import org.apache.beam.sdk.transforms.DoFn.OutputReceiver; -import org.apache.beam.sdk.transforms.errorhandling.BadRecord.Failure; import org.apache.beam.sdk.transforms.errorhandling.BadRecord.Record; -import org.junit.Assert; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; @@ -57,15 +54,18 @@ public class BadRecordRouterTest { @Mock private OutputReceiver badRecordOutputReceiver; - private static final BiFunction> ignoreStacktraceMatcher = (expectedBuilder, failure) -> - (ArgumentMatcher) argument -> { - //This complex matcher means we don't need to maintain an expected stacktrace - String stackTrace = argument.getFailure().getExceptionStacktrace(); - failure.setExceptionStacktrace(stackTrace); - BadRecord expected = expectedBuilder.setFailure(failure.build()).build(); - return expected.equals(argument); - }; - + private static final BiFunction< + BadRecord.Builder, BadRecord.Failure.Builder, ArgumentMatcher> + ignoreStacktraceMatcher = + (expectedBuilder, failure) -> + (ArgumentMatcher) + argument -> { + // This complex matcher means we don't need to maintain an expected stacktrace + String stackTrace = argument.getFailure().getExceptionStacktrace(); + failure.setExceptionStacktrace(stackTrace); + BadRecord expected = expectedBuilder.setFailure(failure.build()).build(); + return expected.equals(argument); + }; @Test public void testThrowingHandlerWithException() throws Exception { @@ -101,12 +101,14 @@ public void testRecordingHandler() throws Exception { .setCoder("BigEndianIntegerCoder") .build()); - BadRecord.Failure.Builder failure = BadRecord.Failure.builder() - .setException("java.lang.RuntimeException") - .setDescription("desc") - .setFailingTransform("transform"); + BadRecord.Failure.Builder failure = + BadRecord.Failure.builder() + .setException("java.lang.RuntimeException") + .setDescription("desc") + .setFailingTransform("transform"); - verify(badRecordOutputReceiver).output(ArgumentMatchers.argThat(ignoreStacktraceMatcher.apply(expectedBuilder,failure))); + verify(badRecordOutputReceiver) + .output(ArgumentMatchers.argThat(ignoreStacktraceMatcher.apply(expectedBuilder, failure))); } @Test @@ -118,16 +120,16 @@ public void testNoCoder() throws Exception { handler.route(outputReceiver, 5, null, new RuntimeException(), "desc", "transform"); BadRecord.Builder expectedBuilder = - BadRecord.builder() - .setRecord(Record.builder().setJsonRecord("5").build()); + BadRecord.builder().setRecord(Record.builder().setJsonRecord("5").build()); - BadRecord.Failure.Builder failure = BadRecord.Failure.builder() - .setException("java.lang.RuntimeException") - .setDescription("desc") - .setFailingTransform("transform"); - - verify(badRecordOutputReceiver).output(ArgumentMatchers.argThat(ignoreStacktraceMatcher.apply(expectedBuilder,failure))); + BadRecord.Failure.Builder failure = + BadRecord.Failure.builder() + .setException("java.lang.RuntimeException") + .setDescription("desc") + .setFailingTransform("transform"); + verify(badRecordOutputReceiver) + .output(ArgumentMatchers.argThat(ignoreStacktraceMatcher.apply(expectedBuilder, failure))); } @Test @@ -165,12 +167,13 @@ public void verifyDeterministic() throws NonDeterministicException {} .setRecord( Record.builder().setJsonRecord("5").setCoder(failingCoder.toString()).build()); - BadRecord.Failure.Builder failure = BadRecord.Failure.builder() - .setException("java.lang.RuntimeException") - .setDescription("desc") - .setFailingTransform("transform"); - - verify(badRecordOutputReceiver).output(ArgumentMatchers.argThat(ignoreStacktraceMatcher.apply(expectedBuilder,failure))); + BadRecord.Failure.Builder failure = + BadRecord.Failure.builder() + .setException("java.lang.RuntimeException") + .setDescription("desc") + .setFailingTransform("transform"); + verify(badRecordOutputReceiver) + .output(ArgumentMatchers.argThat(ignoreStacktraceMatcher.apply(expectedBuilder, failure))); } } diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandlerTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandlerTest.java index 691bd30b8f3..a1de3d2be89 100644 --- a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandlerTest.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandlerTest.java @@ -17,15 +17,12 @@ */ package org.apache.beam.sdk.transforms.errorhandling; -import java.util.ArrayList; -import java.util.List; +import java.util.Objects; import org.apache.beam.sdk.testing.NeedsRunner; +import org.apache.beam.sdk.testing.PAssert; import org.apache.beam.sdk.testing.TestPipeline; import org.apache.beam.sdk.transforms.Create; -import org.apache.beam.sdk.transforms.DoFn; import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.errorhandling.BadRecord.Failure; import org.apache.beam.sdk.transforms.errorhandling.BadRecord.Record; import org.apache.beam.sdk.values.PCollection; import org.junit.Assert; @@ -45,7 +42,7 @@ public class ErrorHandlerTest { @Category(NeedsRunner.class) public void testGoodErrorHandlerUsage() throws Exception { try (ErrorHandler> eh = - pipeline.registerErrorHandler(new DummySinkTransform<>())) {} + pipeline.registerErrorHandler(new DummySinkTransform())) {} pipeline.run(); } @@ -55,7 +52,7 @@ public void testBadErrorHandlerUsage() { pipeline.registerErrorHandler(new DummySinkTransform>()); - //Expected to be thrown because the error handler isn't closed + // Expected to be thrown because the error handler isn't closed thrown.expect(IllegalStateException.class); pipeline.run(); @@ -66,7 +63,7 @@ public void testBRHEnabledPTransform() { PCollection record = pipeline.apply(Create.of(1, 2, 3, 4)); record.apply(new BRHEnabledPTransform()); - //unhandled runtime exception thrown by the BRHEnabledPTransform + // unhandled runtime exception thrown by the BRHEnabledPTransform thrown.expect(RuntimeException.class); pipeline.run(); @@ -77,40 +74,61 @@ public void testBRHEnabledPTransform() { public void testErrorHandlerWithBRHTransform() throws Exception { PCollection record = pipeline.apply(Create.of(1, 2, 3, 4)); DummySinkTransform transform = new DummySinkTransform<>(); - try (ErrorHandler> eh = - pipeline.registerErrorHandler(transform)) { - record.apply(new BRHEnabledPTransform().withBadRecordHandler(eh)); - } + ErrorHandler> eh = pipeline.registerErrorHandler(transform); + record.apply(new BRHEnabledPTransform().withBadRecordHandler(eh)); + eh.close(); + PCollection badRecords = eh.getOutput(); + + // We use a more complex satisfies statement to ensure we don't need to preserve stacktraces + // in test cases + PAssert.that(badRecords) + .satisfies( + (records) -> { + int count = 0; + for (BadRecord badRecord : records) { + count++; + + Record r = null; + + if (Objects.equals(badRecord.getRecord().getJsonRecord(), "1")) { + r = + Record.builder() + .setJsonRecord("1") + .setEncodedRecord(new byte[] {0, 0, 0, 1}) + .setCoder("BigEndianIntegerCoder") + .build(); + } else { + r = + Record.builder() + .setJsonRecord("3") + .setEncodedRecord(new byte[] {0, 0, 0, 3}) + .setCoder("BigEndianIntegerCoder") + .build(); + } + + BadRecord.Builder expectedBuilder = BadRecord.builder().setRecord(r); + + BadRecord.Failure.Builder failure = + BadRecord.Failure.builder() + .setException("java.lang.RuntimeException: Integer was odd") + .setDescription("Integer was odd") + .setFailingTransform("NoOpDoFn"); + + failure.setExceptionStacktrace(badRecord.getFailure().getExceptionStacktrace()); + expectedBuilder.setFailure(failure.build()); + Assert.assertEquals("Expect failure to match", expectedBuilder.build(), badRecord); + } + Assert.assertEquals("Expect 2 errors", 2, count); + return null; + }); pipeline.run().waitUntilFinish(); - - Assert.assertEquals(2,transform.getValues().size()); - for (BadRecord badRecord : transform.getValues()){ - BadRecord expectedRecord = BadRecord.builder() - .setRecord(Record.builder() - .build()) - .setFailure(Failure.builder() - .build()).build(); - Assert.assertEquals(expectedRecord,badRecord); - } } public static class DummySinkTransform extends PTransform, PCollection> { - private final List values = new ArrayList<>(); - - public List getValues(){ - return values; - } - @Override public PCollection expand(PCollection input) { - input.apply(ParDo.of(new DoFn() { - @ProcessElement - public void processElement(@Element T element){ - values.add(element); - } - })); return input; } } From ad1684a7e5b1660d85f72264050a95577046491a Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Wed, 15 Nov 2023 15:12:47 -0500 Subject: [PATCH 43/80] remove flatting without error collections --- .../main/java/org/apache/beam/sdk/Pipeline.java | 2 +- .../transforms/errorhandling/ErrorHandler.java | 16 ++++++---------- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/Pipeline.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/Pipeline.java index 409b8017aa3..c27d4420217 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/Pipeline.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/Pipeline.java @@ -349,7 +349,7 @@ public SchemaRegistry getSchemaRegistry() { public ErrorHandler registerErrorHandler( PTransform, T> sinkTransform) { - ErrorHandler errorHandler = new PTransformErrorHandler<>(sinkTransform, this); + ErrorHandler errorHandler = new PTransformErrorHandler<>(sinkTransform); errorHandlers.add(errorHandler); return errorHandler; } diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java index 79770662c91..c1449977225 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java @@ -78,8 +78,6 @@ class PTransformErrorHandler private static final Logger LOG = LoggerFactory.getLogger(PTransformErrorHandler.class); private final PTransform, OutputT> sinkTransform; - private final Pipeline pipeline; - private final List> errorCollections = new ArrayList<>(); private @Nullable OutputT sinkOutput = null; @@ -92,9 +90,8 @@ class PTransformErrorHandler */ @Internal public PTransformErrorHandler( - PTransform, OutputT> sinkTransform, Pipeline pipeline) { + PTransform, OutputT> sinkTransform) { this.sinkTransform = sinkTransform; - this.pipeline = pipeline; } @Override @@ -113,6 +110,9 @@ public OutputT getOutput() { throw new IllegalStateException( "ErrorHandler must be finalized before the output can be returned"); } + if (errorCollections.isEmpty()){ + return null; + } // make the static analysis checker happy Preconditions.checkArgumentNotNull(sinkOutput); return sinkOutput; @@ -121,13 +121,9 @@ public OutputT getOutput() { @Override public void close() { closed = true; - PCollection flattened; if (errorCollections.isEmpty()) { LOG.warn("Empty list of error pcollections passed to ErrorHandler."); - // We need to use Create.of() this way to infer the coder for ErrorT properly - flattened = pipeline.apply(Create.of(new ArrayList())); - } else { - flattened = PCollectionList.of(errorCollections).apply(Flatten.pCollections()); + return; } LOG.debug( "{} error collections are being sent to {}", @@ -135,7 +131,7 @@ public void close() { sinkTransform.getName()); String sinkTransformName = sinkTransform.getName(); sinkOutput = - flattened + PCollectionList.of(errorCollections).apply(Flatten.pCollections()) .apply( "Record Error Metrics to " + sinkTransformName, new WriteErrorMetrics(sinkTransformName)) From 074faf22a867d3e71d2d7b84aa5422a5fc3bc325 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Thu, 16 Nov 2023 09:52:34 -0500 Subject: [PATCH 44/80] fix nullness --- .../sdk/transforms/errorhandling/ErrorHandler.java | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java index c1449977225..cc6c80b0647 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java @@ -70,7 +70,7 @@ public interface ErrorHandler extends AutoClose boolean isClosed(); - OutputT getOutput(); + @Nullable OutputT getOutput(); class PTransformErrorHandler implements ErrorHandler { @@ -105,16 +105,11 @@ public boolean isClosed() { } @Override - public OutputT getOutput() { + public @Nullable OutputT getOutput() { if (!this.isClosed()) { throw new IllegalStateException( "ErrorHandler must be finalized before the output can be returned"); } - if (errorCollections.isEmpty()){ - return null; - } - // make the static analysis checker happy - Preconditions.checkArgumentNotNull(sinkOutput); return sinkOutput; } @@ -190,7 +185,7 @@ public boolean isClosed() { } @Override - public OutputT getOutput() { + public @Nullable OutputT getOutput() { throw new IllegalArgumentException( "No Op handler has no output. This implies this IO is misconfigured."); } From 17bf295585092a7169655cd1976fa13044a91768 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Thu, 16 Nov 2023 12:31:36 -0500 Subject: [PATCH 45/80] spotless + encoding issues --- .../transforms/errorhandling/BadRecordRouter.java | 8 +++++--- .../sdk/transforms/errorhandling/ErrorHandler.java | 12 +++++------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouter.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouter.java index 0e69c13242a..00c11bdc17f 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouter.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouter.java @@ -23,6 +23,7 @@ import java.io.IOException; import java.io.PrintStream; import java.io.Serializable; +import java.nio.charset.StandardCharsets; import org.apache.beam.sdk.coders.Coder; import org.apache.beam.sdk.transforms.DoFn.MultiOutputReceiver; import org.apache.beam.sdk.transforms.errorhandling.BadRecord.Failure; @@ -30,6 +31,7 @@ import org.apache.beam.sdk.util.CoderUtils; import org.apache.beam.sdk.util.Preconditions; import org.apache.beam.sdk.values.TupleTag; +import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Charsets; import org.checkerframework.checker.nullness.qual.Nullable; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -117,15 +119,15 @@ public void route( BadRecord.Failure.Builder failureBuilder = Failure.builder().setDescription(description).setFailingTransform(failingTransform); - // Its possible for us to want to handle an error scenario where no actual exception objet + // It's possible for us to want to handle an error scenario where no actual exception object // exists if (exception != null) { failureBuilder.setException(exception.toString()); ByteArrayOutputStream stream = new ByteArrayOutputStream(); - PrintStream printStream = new PrintStream(stream); + PrintStream printStream = new PrintStream(stream, false, Charsets.UTF_8.name()); exception.printStackTrace(printStream); printStream.close(); - failureBuilder.setExceptionStacktrace(stream.toString()); + failureBuilder.setExceptionStacktrace(new String(stream.toByteArray(), Charsets.UTF_8)); } BadRecord badRecord = diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java index cc6c80b0647..e11e35c8dda 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java @@ -20,16 +20,13 @@ import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; -import org.apache.beam.sdk.Pipeline; import org.apache.beam.sdk.annotations.Internal; import org.apache.beam.sdk.metrics.Counter; import org.apache.beam.sdk.metrics.Metrics; -import org.apache.beam.sdk.transforms.Create; import org.apache.beam.sdk.transforms.DoFn; import org.apache.beam.sdk.transforms.Flatten; import org.apache.beam.sdk.transforms.PTransform; import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.util.Preconditions; import org.apache.beam.sdk.values.PCollection; import org.apache.beam.sdk.values.PCollectionList; import org.apache.beam.sdk.values.POutput; @@ -70,7 +67,8 @@ public interface ErrorHandler extends AutoClose boolean isClosed(); - @Nullable OutputT getOutput(); + @Nullable + OutputT getOutput(); class PTransformErrorHandler implements ErrorHandler { @@ -89,8 +87,7 @@ class PTransformErrorHandler * pipeline.registerErrorHandler to ensure safe pipeline construction */ @Internal - public PTransformErrorHandler( - PTransform, OutputT> sinkTransform) { + public PTransformErrorHandler(PTransform, OutputT> sinkTransform) { this.sinkTransform = sinkTransform; } @@ -126,7 +123,8 @@ public void close() { sinkTransform.getName()); String sinkTransformName = sinkTransform.getName(); sinkOutput = - PCollectionList.of(errorCollections).apply(Flatten.pCollections()) + PCollectionList.of(errorCollections) + .apply(Flatten.pCollections()) .apply( "Record Error Metrics to " + sinkTransformName, new WriteErrorMetrics(sinkTransformName)) From e2ec57fb757cf6c00b6485d3bed8efb8a5c03f86 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Thu, 16 Nov 2023 13:06:43 -0500 Subject: [PATCH 46/80] spotless --- .../beam/sdk/transforms/errorhandling/BadRecordRouter.java | 1 - 1 file changed, 1 deletion(-) diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouter.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouter.java index 00c11bdc17f..a9c75689cb5 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouter.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouter.java @@ -23,7 +23,6 @@ import java.io.IOException; import java.io.PrintStream; import java.io.Serializable; -import java.nio.charset.StandardCharsets; import org.apache.beam.sdk.coders.Coder; import org.apache.beam.sdk.transforms.DoFn.MultiOutputReceiver; import org.apache.beam.sdk.transforms.errorhandling.BadRecord.Failure; From 8b3f0522da0f1eb3041e7cb4b0027c3cd2fe0224 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Tue, 21 Nov 2023 11:12:27 -0500 Subject: [PATCH 47/80] throw error when error handler isn't used --- .../beam/sdk/transforms/errorhandling/ErrorHandler.java | 4 ++-- .../sdk/transforms/errorhandling/ErrorHandlerTest.java | 8 +++++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java index e11e35c8dda..5540f3f8c8f 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java @@ -114,8 +114,8 @@ public boolean isClosed() { public void close() { closed = true; if (errorCollections.isEmpty()) { - LOG.warn("Empty list of error pcollections passed to ErrorHandler."); - return; + LOG.error("Empty list of error pcollections passed to ErrorHandler."); + throw new IllegalStateException("Empty list of error pcollections passed to ErrorHandler."); } LOG.debug( "{} error collections are being sent to {}", diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandlerTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandlerTest.java index a1de3d2be89..6c4602fdfbb 100644 --- a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandlerTest.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandlerTest.java @@ -39,16 +39,18 @@ public class ErrorHandlerTest { @Rule public ExpectedException thrown = ExpectedException.none(); @Test - @Category(NeedsRunner.class) - public void testGoodErrorHandlerUsage() throws Exception { + public void testNoUsageErrorHandlerUsage() throws Exception { try (ErrorHandler> eh = pipeline.registerErrorHandler(new DummySinkTransform())) {} + // Expected to be thrown because the error handler isn't used + thrown.expect(IllegalStateException.class); + pipeline.run(); } @Test - public void testBadErrorHandlerUsage() { + public void testUnclosedErrorHandlerUsage() { pipeline.registerErrorHandler(new DummySinkTransform>()); From 525d912d91b2d440e41f4a20c49fd91672e47690 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Tue, 21 Nov 2023 16:26:23 -0500 Subject: [PATCH 48/80] add concrete bad record error handler class --- .../java/org/apache/beam/sdk/Pipeline.java | 8 +++-- .../errorhandling/ErrorHandler.java | 36 +++++++++++++++---- .../errorhandling/ErrorHandlerTest.java | 12 +++---- 3 files changed, 40 insertions(+), 16 deletions(-) diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/Pipeline.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/Pipeline.java index c27d4420217..29588797ca8 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/Pipeline.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/Pipeline.java @@ -44,7 +44,9 @@ import org.apache.beam.sdk.schemas.SchemaRegistry; import org.apache.beam.sdk.transforms.Create; import org.apache.beam.sdk.transforms.PTransform; +import org.apache.beam.sdk.transforms.errorhandling.BadRecord; import org.apache.beam.sdk.transforms.errorhandling.ErrorHandler; +import org.apache.beam.sdk.transforms.errorhandling.ErrorHandler.BadRecordErrorHandler; import org.apache.beam.sdk.transforms.errorhandling.ErrorHandler.PTransformErrorHandler; import org.apache.beam.sdk.transforms.resourcehints.ResourceHints; import org.apache.beam.sdk.util.UserCodeException; @@ -347,9 +349,9 @@ public SchemaRegistry getSchemaRegistry() { return schemaRegistry; } - public ErrorHandler registerErrorHandler( - PTransform, T> sinkTransform) { - ErrorHandler errorHandler = new PTransformErrorHandler<>(sinkTransform); + public BadRecordErrorHandler registerBadRecordErrorHandler( + PTransform, OutputT> sinkTransform) { + BadRecordErrorHandler errorHandler = new BadRecordErrorHandler<>(sinkTransform, this); errorHandlers.add(errorHandler); return errorHandler; } diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java index 5540f3f8c8f..300c0820cae 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java @@ -20,9 +20,12 @@ import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; +import org.apache.beam.sdk.Pipeline; import org.apache.beam.sdk.annotations.Internal; +import org.apache.beam.sdk.coders.Coder; import org.apache.beam.sdk.metrics.Counter; import org.apache.beam.sdk.metrics.Metrics; +import org.apache.beam.sdk.transforms.Create; import org.apache.beam.sdk.transforms.DoFn; import org.apache.beam.sdk.transforms.Flatten; import org.apache.beam.sdk.transforms.PTransform; @@ -76,6 +79,10 @@ class PTransformErrorHandler private static final Logger LOG = LoggerFactory.getLogger(PTransformErrorHandler.class); private final PTransform, OutputT> sinkTransform; + private final Pipeline pipeline; + + private final Coder coder; + private final List> errorCollections = new ArrayList<>(); private @Nullable OutputT sinkOutput = null; @@ -87,8 +94,10 @@ class PTransformErrorHandler * pipeline.registerErrorHandler to ensure safe pipeline construction */ @Internal - public PTransformErrorHandler(PTransform, OutputT> sinkTransform) { + public PTransformErrorHandler(PTransform, OutputT> sinkTransform, Pipeline pipeline, Coder coder) { this.sinkTransform = sinkTransform; + this.pipeline = pipeline; + this.coder = coder; } @Override @@ -113,18 +122,20 @@ public boolean isClosed() { @Override public void close() { closed = true; + PCollection flattened; if (errorCollections.isEmpty()) { - LOG.error("Empty list of error pcollections passed to ErrorHandler."); - throw new IllegalStateException("Empty list of error pcollections passed to ErrorHandler."); + LOG.warn("Empty list of error pcollections passed to ErrorHandler."); + flattened = pipeline.apply(Create.empty(coder)); + } else { + flattened = PCollectionList.of(errorCollections) + .apply(Flatten.pCollections()); } LOG.debug( "{} error collections are being sent to {}", errorCollections.size(), sinkTransform.getName()); String sinkTransformName = sinkTransform.getName(); - sinkOutput = - PCollectionList.of(errorCollections) - .apply(Flatten.pCollections()) + sinkOutput = flattened .apply( "Record Error Metrics to " + sinkTransformName, new WriteErrorMetrics(sinkTransformName)) @@ -165,6 +176,19 @@ public void processElement(@Element ErrorT error, OutputReceiver receive } } + + class BadRecordErrorHandler extends PTransformErrorHandler{ + + /** + * Constructs a new ErrorHandler for handling BadRecords + */ + @Internal + public BadRecordErrorHandler(PTransform, OutputT> sinkTransform, + Pipeline pipeline) { + super(sinkTransform, pipeline, BadRecord.getCoder(pipeline)); + } + } + /** * A default, placeholder error handler that exists to allow usage of .addErrorCollection() * without effects. This enables more simple codepaths without checking for whether the user diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandlerTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandlerTest.java index 6c4602fdfbb..eb1b28c9c66 100644 --- a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandlerTest.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandlerTest.java @@ -24,6 +24,7 @@ import org.apache.beam.sdk.transforms.Create; import org.apache.beam.sdk.transforms.PTransform; import org.apache.beam.sdk.transforms.errorhandling.BadRecord.Record; +import org.apache.beam.sdk.transforms.errorhandling.ErrorHandler.BadRecordErrorHandler; import org.apache.beam.sdk.values.PCollection; import org.junit.Assert; import org.junit.Rule; @@ -40,11 +41,8 @@ public class ErrorHandlerTest { @Test public void testNoUsageErrorHandlerUsage() throws Exception { - try (ErrorHandler> eh = - pipeline.registerErrorHandler(new DummySinkTransform())) {} - - // Expected to be thrown because the error handler isn't used - thrown.expect(IllegalStateException.class); + try (BadRecordErrorHandler> eh = + pipeline.registerBadRecordErrorHandler(new DummySinkTransform<>())) {} pipeline.run(); } @@ -52,7 +50,7 @@ public void testNoUsageErrorHandlerUsage() throws Exception { @Test public void testUnclosedErrorHandlerUsage() { - pipeline.registerErrorHandler(new DummySinkTransform>()); + pipeline.registerBadRecordErrorHandler(new DummySinkTransform<>()); // Expected to be thrown because the error handler isn't closed thrown.expect(IllegalStateException.class); @@ -76,7 +74,7 @@ public void testBRHEnabledPTransform() { public void testErrorHandlerWithBRHTransform() throws Exception { PCollection record = pipeline.apply(Create.of(1, 2, 3, 4)); DummySinkTransform transform = new DummySinkTransform<>(); - ErrorHandler> eh = pipeline.registerErrorHandler(transform); + ErrorHandler> eh = pipeline.registerBadRecordErrorHandler(transform); record.apply(new BRHEnabledPTransform().withBadRecordHandler(eh)); eh.close(); PCollection badRecords = eh.getOutput(); From 9b4a3486db5e8551ffd905f4484b844e49bf2b51 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Wed, 22 Nov 2023 10:15:48 -0500 Subject: [PATCH 49/80] spotless, fix test category --- .../java/org/apache/beam/sdk/Pipeline.java | 1 - .../errorhandling/ErrorHandler.java | 23 ++++++++++--------- .../errorhandling/ErrorHandlerTest.java | 4 +++- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/Pipeline.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/Pipeline.java index 29588797ca8..50158f109cb 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/Pipeline.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/Pipeline.java @@ -47,7 +47,6 @@ import org.apache.beam.sdk.transforms.errorhandling.BadRecord; import org.apache.beam.sdk.transforms.errorhandling.ErrorHandler; import org.apache.beam.sdk.transforms.errorhandling.ErrorHandler.BadRecordErrorHandler; -import org.apache.beam.sdk.transforms.errorhandling.ErrorHandler.PTransformErrorHandler; import org.apache.beam.sdk.transforms.resourcehints.ResourceHints; import org.apache.beam.sdk.util.UserCodeException; import org.apache.beam.sdk.values.PBegin; diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java index 300c0820cae..55d2182e3eb 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java @@ -94,7 +94,10 @@ class PTransformErrorHandler * pipeline.registerErrorHandler to ensure safe pipeline construction */ @Internal - public PTransformErrorHandler(PTransform, OutputT> sinkTransform, Pipeline pipeline, Coder coder) { + public PTransformErrorHandler( + PTransform, OutputT> sinkTransform, + Pipeline pipeline, + Coder coder) { this.sinkTransform = sinkTransform; this.pipeline = pipeline; this.coder = coder; @@ -127,15 +130,15 @@ public void close() { LOG.warn("Empty list of error pcollections passed to ErrorHandler."); flattened = pipeline.apply(Create.empty(coder)); } else { - flattened = PCollectionList.of(errorCollections) - .apply(Flatten.pCollections()); + flattened = PCollectionList.of(errorCollections).apply(Flatten.pCollections()); } LOG.debug( "{} error collections are being sent to {}", errorCollections.size(), sinkTransform.getName()); String sinkTransformName = sinkTransform.getName(); - sinkOutput = flattened + sinkOutput = + flattened .apply( "Record Error Metrics to " + sinkTransformName, new WriteErrorMetrics(sinkTransformName)) @@ -176,15 +179,13 @@ public void processElement(@Element ErrorT error, OutputReceiver receive } } + class BadRecordErrorHandler + extends PTransformErrorHandler { - class BadRecordErrorHandler extends PTransformErrorHandler{ - - /** - * Constructs a new ErrorHandler for handling BadRecords - */ + /** Constructs a new ErrorHandler for handling BadRecords */ @Internal - public BadRecordErrorHandler(PTransform, OutputT> sinkTransform, - Pipeline pipeline) { + public BadRecordErrorHandler( + PTransform, OutputT> sinkTransform, Pipeline pipeline) { super(sinkTransform, pipeline, BadRecord.getCoder(pipeline)); } } diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandlerTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandlerTest.java index eb1b28c9c66..8dcfcefe71b 100644 --- a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandlerTest.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandlerTest.java @@ -40,6 +40,7 @@ public class ErrorHandlerTest { @Rule public ExpectedException thrown = ExpectedException.none(); @Test + @Category(NeedsRunner.class) public void testNoUsageErrorHandlerUsage() throws Exception { try (BadRecordErrorHandler> eh = pipeline.registerBadRecordErrorHandler(new DummySinkTransform<>())) {} @@ -74,7 +75,8 @@ public void testBRHEnabledPTransform() { public void testErrorHandlerWithBRHTransform() throws Exception { PCollection record = pipeline.apply(Create.of(1, 2, 3, 4)); DummySinkTransform transform = new DummySinkTransform<>(); - ErrorHandler> eh = pipeline.registerBadRecordErrorHandler(transform); + ErrorHandler> eh = + pipeline.registerBadRecordErrorHandler(transform); record.apply(new BRHEnabledPTransform().withBadRecordHandler(eh)); eh.close(); PCollection badRecords = eh.getOutput(); From d6f4097615c7e750401c8fb3015927770f637a8c Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Wed, 22 Nov 2023 13:57:50 -0500 Subject: [PATCH 50/80] fix checkstyle --- .../apache/beam/sdk/transforms/errorhandling/ErrorHandler.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java index 55d2182e3eb..79a34ca19d2 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java @@ -182,7 +182,7 @@ public void processElement(@Element ErrorT error, OutputReceiver receive class BadRecordErrorHandler extends PTransformErrorHandler { - /** Constructs a new ErrorHandler for handling BadRecords */ + /** Constructs a new ErrorHandler for handling BadRecords. */ @Internal public BadRecordErrorHandler( PTransform, OutputT> sinkTransform, Pipeline pipeline) { From a06723833f71e6b051b06b3e6d8a277196f380ab Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Mon, 27 Nov 2023 11:16:25 -0500 Subject: [PATCH 51/80] clean up comments --- .../transforms/errorhandling/BadRecord.java | 61 ++++++++++++++++++- .../errorhandling/BadRecordRouter.java | 48 ++------------- .../errorhandling/ErrorHandler.java | 9 ++- .../errorhandling/BRHEnabledPTransform.java | 4 +- .../errorhandling/BadRecordRouterTest.java | 9 ++- .../errorhandling/ErrorHandlerTest.java | 6 +- 6 files changed, 81 insertions(+), 56 deletions(-) diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecord.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecord.java index 4902401a0e1..381d3c64c63 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecord.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecord.java @@ -17,7 +17,12 @@ */ package org.apache.beam.sdk.transforms.errorhandling; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.ObjectWriter; import com.google.auto.value.AutoValue; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.PrintStream; import java.io.Serializable; import org.apache.beam.sdk.Pipeline; import org.apache.beam.sdk.coders.Coder; @@ -26,13 +31,19 @@ import org.apache.beam.sdk.schemas.SchemaCoder; import org.apache.beam.sdk.schemas.SchemaRegistry; import org.apache.beam.sdk.schemas.annotations.DefaultSchema; +import org.apache.beam.sdk.util.CoderUtils; import org.apache.beam.sdk.values.TypeDescriptor; +import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Charsets; import org.checkerframework.checker.nullness.qual.Nullable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; @AutoValue @DefaultSchema(AutoValueSchema.class) public abstract class BadRecord implements Serializable { + private static final Logger LOG = LoggerFactory.getLogger(BadRecord.class); + /** Information about the record that failed. */ public abstract Record getRecord(); @@ -71,7 +82,7 @@ public abstract static class Builder { public abstract static class Record implements Serializable { /** The failing record, encoded as JSON. Will be null if serialization as JSON fails. */ - public abstract @Nullable String getJsonRecord(); + public abstract @Nullable String getHumanReadableJsonRecord(); /** * Nullable to account for failing to encode, or if there is no coder for the record at the time @@ -90,13 +101,49 @@ public static Builder builder() { @AutoValue.Builder public abstract static class Builder { - public abstract Builder setJsonRecord(@Nullable String jsonRecord); + public abstract Builder setHumanReadableJsonRecord(@Nullable String jsonRecord); + + public Builder addHumanReadableJson(Object record) { + ObjectWriter objectWriter = new ObjectMapper().writer().withDefaultPrettyPrinter(); + try { + this.setHumanReadableJsonRecord(objectWriter.writeValueAsString(record)); + } catch (Exception e) { + LOG.error( + "Unable to serialize record as JSON. Human readable record attempted via .toString", + e); + try { + this.setHumanReadableJsonRecord(record.toString()); + } catch (Exception e2) { + LOG.error( + "Unable to serialize record via .toString. Human readable record will be null", e2); + } + } + return this; + } @SuppressWarnings("mutable") public abstract Builder setEncodedRecord(byte @Nullable [] encodedRecord); public abstract Builder setCoder(@Nullable String coder); + public Builder addCoderAndEncodedRecord(@Nullable Coder coder, T record) { + // We will sometimes not have a coder for a failing record, for example if it has already + // been + // modified within the dofn. + if (coder != null) { + this.setCoder(coder.toString()); + try { + this.setEncodedRecord(CoderUtils.encodeToByteArray(coder, record)); + } catch (IOException e) { + LOG.error( + "Unable to encode failing record using provided coder." + + " BadRecord will be published without encoded bytes", + e); + } + } + return this; + } + public abstract Record build(); } } @@ -128,6 +175,16 @@ public abstract static class Builder { public abstract Builder setExceptionStacktrace(@Nullable String stacktrace); + public Builder addExceptionStackTrace(Exception exception) throws IOException { + ByteArrayOutputStream stream = new ByteArrayOutputStream(); + PrintStream printStream = new PrintStream(stream, false, Charsets.UTF_8.name()); + exception.printStackTrace(printStream); + printStream.close(); + + this.setExceptionStacktrace(new String(stream.toByteArray(), Charsets.UTF_8)); + return this; + } + public abstract Builder setDescription(String description); public abstract Builder setFailingTransform(String failingTransform); diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouter.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouter.java index a9c75689cb5..8cee66335cb 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouter.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouter.java @@ -17,23 +17,14 @@ */ package org.apache.beam.sdk.transforms.errorhandling; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.ObjectWriter; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.PrintStream; import java.io.Serializable; import org.apache.beam.sdk.coders.Coder; import org.apache.beam.sdk.transforms.DoFn.MultiOutputReceiver; import org.apache.beam.sdk.transforms.errorhandling.BadRecord.Failure; import org.apache.beam.sdk.transforms.errorhandling.BadRecord.Record; -import org.apache.beam.sdk.util.CoderUtils; import org.apache.beam.sdk.util.Preconditions; import org.apache.beam.sdk.values.TupleTag; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Charsets; import org.checkerframework.checker.nullness.qual.Nullable; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; public interface BadRecordRouter extends Serializable { @@ -65,14 +56,14 @@ public void route( throws Exception { if (exception != null) { throw exception; + } else { + throw new RuntimeException("Throwing default exception from Throwing Bad Record Router"); } } } class RecordingBadRecordRouter implements BadRecordRouter { - private static final Logger LOG = LoggerFactory.getLogger(RecordingBadRecordRouter.class); - @Override public void route( MultiOutputReceiver outputReceiver, @@ -83,36 +74,10 @@ public void route( String failingTransform) throws Exception { Preconditions.checkArgumentNotNull(record); - ObjectWriter objectWriter = new ObjectMapper().writer().withDefaultPrettyPrinter(); // Build up record information BadRecord.Record.Builder recordBuilder = Record.builder(); - try { - recordBuilder.setJsonRecord(objectWriter.writeValueAsString(record)); - } catch (Exception e) { - LOG.error( - "Unable to serialize record as JSON. Human readable record attempted via .toString", e); - try { - recordBuilder.setJsonRecord(record.toString()); - } catch (Exception e2) { - LOG.error( - "Unable to serialize record via .toString. Human readable record will be null", e2); - } - } - - // We will sometimes not have a coder for a failing record, for example if it has already been - // modified within the dofn. - if (coder != null) { - recordBuilder.setCoder(coder.toString()); - try { - recordBuilder.setEncodedRecord(CoderUtils.encodeToByteArray(coder, record)); - } catch (IOException e) { - LOG.error( - "Unable to encode failing record using provided coder." - + " BadRecord will be published without encoded bytes", - e); - } - } + recordBuilder.addHumanReadableJson(record).addCoderAndEncodedRecord(coder, record); // Build up failure information BadRecord.Failure.Builder failureBuilder = @@ -121,12 +86,7 @@ public void route( // It's possible for us to want to handle an error scenario where no actual exception object // exists if (exception != null) { - failureBuilder.setException(exception.toString()); - ByteArrayOutputStream stream = new ByteArrayOutputStream(); - PrintStream printStream = new PrintStream(stream, false, Charsets.UTF_8.name()); - exception.printStackTrace(printStream); - printStream.close(); - failureBuilder.setExceptionStacktrace(new String(stream.toByteArray(), Charsets.UTF_8)); + failureBuilder.setException(exception.toString()).addExceptionStackTrace(exception); } BadRecord badRecord = diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java index 79a34ca19d2..9e0298d885e 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java @@ -124,10 +124,14 @@ public boolean isClosed() { @Override public void close() { + if (closed) { + throw new IllegalStateException( + "Error handler is already closed, and may not be closed twice"); + } closed = true; PCollection flattened; if (errorCollections.isEmpty()) { - LOG.warn("Empty list of error pcollections passed to ErrorHandler."); + LOG.info("Empty list of error pcollections passed to ErrorHandler."); flattened = pipeline.apply(Create.empty(coder)); } else { flattened = PCollectionList.of(errorCollections).apply(Flatten.pCollections()); @@ -196,7 +200,8 @@ public BadRecordErrorHandler( * configured an error handler or not. */ @Internal - class NoOpErrorHandler implements ErrorHandler { + class DefaultErrorHandler + implements ErrorHandler { @Override public void addErrorCollection(PCollection errorCollection) {} diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BRHEnabledPTransform.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BRHEnabledPTransform.java index d159bb59cd5..56e37278637 100644 --- a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BRHEnabledPTransform.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BRHEnabledPTransform.java @@ -21,7 +21,7 @@ import org.apache.beam.sdk.transforms.DoFn; import org.apache.beam.sdk.transforms.PTransform; import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.errorhandling.ErrorHandler.NoOpErrorHandler; +import org.apache.beam.sdk.transforms.errorhandling.ErrorHandler.DefaultErrorHandler; import org.apache.beam.sdk.values.PCollection; import org.apache.beam.sdk.values.PCollectionTuple; import org.apache.beam.sdk.values.TupleTag; @@ -33,7 +33,7 @@ */ public class BRHEnabledPTransform extends PTransform, PCollection> { - private ErrorHandler errorHandler = new NoOpErrorHandler<>(); + private ErrorHandler errorHandler = new DefaultErrorHandler<>(); private BadRecordRouter badRecordRouter = BadRecordRouter.THROWING_ROUTER; diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouterTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouterTest.java index 9b2819f707f..98f919bea66 100644 --- a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouterTest.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouterTest.java @@ -96,7 +96,7 @@ public void testRecordingHandler() throws Exception { BadRecord.builder() .setRecord( Record.builder() - .setJsonRecord("5") + .setHumanReadableJsonRecord("5") .setEncodedRecord(new byte[] {0, 0, 0, 5}) .setCoder("BigEndianIntegerCoder") .build()); @@ -120,7 +120,7 @@ public void testNoCoder() throws Exception { handler.route(outputReceiver, 5, null, new RuntimeException(), "desc", "transform"); BadRecord.Builder expectedBuilder = - BadRecord.builder().setRecord(Record.builder().setJsonRecord("5").build()); + BadRecord.builder().setRecord(Record.builder().setHumanReadableJsonRecord("5").build()); BadRecord.Failure.Builder failure = BadRecord.Failure.builder() @@ -165,7 +165,10 @@ public void verifyDeterministic() throws NonDeterministicException {} BadRecord.Builder expectedBuilder = BadRecord.builder() .setRecord( - Record.builder().setJsonRecord("5").setCoder(failingCoder.toString()).build()); + Record.builder() + .setHumanReadableJsonRecord("5") + .setCoder(failingCoder.toString()) + .build()); BadRecord.Failure.Builder failure = BadRecord.Failure.builder() diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandlerTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandlerTest.java index 8dcfcefe71b..4506a7f3fb6 100644 --- a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandlerTest.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandlerTest.java @@ -92,17 +92,17 @@ public void testErrorHandlerWithBRHTransform() throws Exception { Record r = null; - if (Objects.equals(badRecord.getRecord().getJsonRecord(), "1")) { + if (Objects.equals(badRecord.getRecord().getHumanReadableJsonRecord(), "1")) { r = Record.builder() - .setJsonRecord("1") + .setHumanReadableJsonRecord("1") .setEncodedRecord(new byte[] {0, 0, 0, 1}) .setCoder("BigEndianIntegerCoder") .build(); } else { r = Record.builder() - .setJsonRecord("3") + .setHumanReadableJsonRecord("3") .setEncodedRecord(new byte[] {0, 0, 0, 3}) .setCoder("BigEndianIntegerCoder") .build(); From 408bc26dd53c53e22fce963f4ef4c64227fc46d5 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Mon, 27 Nov 2023 13:51:44 -0500 Subject: [PATCH 52/80] fix test case --- .../sdk/transforms/errorhandling/BadRecordRouterTest.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouterTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouterTest.java index 98f919bea66..554c00a4584 100644 --- a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouterTest.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouterTest.java @@ -71,15 +71,17 @@ public class BadRecordRouterTest { public void testThrowingHandlerWithException() throws Exception { BadRecordRouter handler = BadRecordRouter.THROWING_ROUTER; - thrown.expect(RuntimeException.class); + thrown.expect(IOException.class); - handler.route(outputReceiver, new Object(), null, new RuntimeException(), "desc", "transform"); + handler.route(outputReceiver, new Object(), null, new IOException(), "desc", "transform"); } @Test public void testThrowingHandlerWithNoException() throws Exception { BadRecordRouter handler = BadRecordRouter.THROWING_ROUTER; + thrown.expect(RuntimeException.class); + handler.route(outputReceiver, new Object(), null, null, "desc", "transform"); } From 9766ea07d5752df66a60b8dfebbcc4451ec533c2 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Mon, 27 Nov 2023 15:50:06 -0500 Subject: [PATCH 53/80] initial wiring of error handler into KafkaIO Read --- .../org/apache/beam/sdk/io/kafka/KafkaIO.java | 46 ++++++++- .../beam/sdk/io/kafka/ReadFromKafkaDoFn.java | 98 ++++++++++++------- .../sdk/io/kafka/ReadFromKafkaDoFnTest.java | 75 ++++++++++---- 3 files changed, 163 insertions(+), 56 deletions(-) diff --git a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java index 7275986de8b..5444e17d780 100644 --- a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java +++ b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java @@ -81,6 +81,9 @@ import org.apache.beam.sdk.transforms.SerializableFunction; import org.apache.beam.sdk.transforms.SimpleFunction; import org.apache.beam.sdk.transforms.display.DisplayData; +import org.apache.beam.sdk.transforms.errorhandling.BadRecord; +import org.apache.beam.sdk.transforms.errorhandling.BadRecordRouter; +import org.apache.beam.sdk.transforms.errorhandling.ErrorHandler; import org.apache.beam.sdk.transforms.splittabledofn.WatermarkEstimator; import org.apache.beam.sdk.transforms.splittabledofn.WatermarkEstimators.Manual; import org.apache.beam.sdk.transforms.splittabledofn.WatermarkEstimators.MonotonicallyIncreasing; @@ -89,9 +92,11 @@ import org.apache.beam.sdk.values.KV; import org.apache.beam.sdk.values.PBegin; import org.apache.beam.sdk.values.PCollection; +import org.apache.beam.sdk.values.PCollectionTuple; import org.apache.beam.sdk.values.PDone; import org.apache.beam.sdk.values.Row; import org.apache.beam.sdk.values.TupleTag; +import org.apache.beam.sdk.values.TupleTagList; import org.apache.beam.sdk.values.TypeDescriptor; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.annotations.VisibleForTesting; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Joiner; @@ -626,6 +631,7 @@ public static WriteRecords writeRecords() { @AutoValue.CopyAnnotations public abstract static class Read extends PTransform>> { + @Pure abstract Map getConsumerConfig(); @@ -687,6 +693,9 @@ public abstract static class Read @Pure abstract @Nullable SerializableFunction getCheckStopReadingFn(); + @Pure + abstract @Nullable ErrorHandler getErrorHandler(); + abstract Builder toBuilder(); @AutoValue.Builder @@ -736,6 +745,8 @@ abstract Builder setValueDeserializerProvider( abstract Builder setCheckStopReadingFn( SerializableFunction checkStopReadingFn); + abstract Builder setErrorHandler(ErrorHandler errorHandler); + abstract Read build(); static void setupExternalBuilder( @@ -1278,6 +1289,10 @@ public Read withCheckStopReadingFn( return toBuilder().setCheckStopReadingFn(checkStopReadingFn).build(); } + public Read withErrorHandler(ErrorHandler errorHandler) { + return toBuilder().setErrorHandler(errorHandler).build(); + } + /** Returns a {@link PTransform} for PCollection of {@link KV}, dropping Kafka metatdata. */ public PTransform>> withoutMetadata() { return new TypedWithoutMetadata<>(this); @@ -1542,6 +1557,9 @@ public PCollection> expand(PBegin input) { if (kafkaRead.getStopReadTime() != null) { readTransform = readTransform.withBounded(); } + if (kafkaRead.getErrorHandler() != null) { + readTransform = readTransform.withErrorHandler(kafkaRead.getErrorHandler()); + } PCollection output; if (kafkaRead.isDynamicRead()) { Set topics = new HashSet<>(); @@ -1922,6 +1940,8 @@ public void populateDisplayData(DisplayData.Builder builder) { public abstract static class ReadSourceDescriptors extends PTransform, PCollection>> { + private final TupleTag>> RECORDS = new TupleTag<>(); + private static final Logger LOG = LoggerFactory.getLogger(ReadSourceDescriptors.class); @Pure @@ -1963,6 +1983,12 @@ public abstract static class ReadSourceDescriptors @Pure abstract @Nullable TimestampPolicyFactory getTimestampPolicyFactory(); + @Pure + abstract BadRecordRouter getBadRecordRouter(); + + @Pure + abstract ErrorHandler getErrorHandler(); + abstract boolean isBounded(); abstract ReadSourceDescriptors.Builder toBuilder(); @@ -2002,6 +2028,10 @@ abstract ReadSourceDescriptors.Builder setCommitOffsetEnabled( abstract ReadSourceDescriptors.Builder setTimestampPolicyFactory( TimestampPolicyFactory policy); + abstract ReadSourceDescriptors.Builder setBadRecordRouter(BadRecordRouter badRecordRouter); + + abstract ReadSourceDescriptors.Builder setErrorHandler(ErrorHandler errorHandler); + abstract ReadSourceDescriptors.Builder setBounded(boolean bounded); abstract ReadSourceDescriptors build(); @@ -2013,6 +2043,8 @@ public static ReadSourceDescriptors read() { .setConsumerConfig(KafkaIOUtils.DEFAULT_CONSUMER_PROPERTIES) .setCommitOffsetEnabled(false) .setBounded(false) + .setBadRecordRouter(BadRecordRouter.THROWING_ROUTER) + .setErrorHandler(new ErrorHandler.DefaultErrorHandler<>()) .build() .withProcessingTime() .withMonotonicallyIncreasingWatermarkEstimator(); @@ -2255,6 +2287,10 @@ public ReadSourceDescriptors withConsumerConfigOverrides( return toBuilder().setConsumerConfig(consumerConfig).build(); } + public ReadSourceDescriptors withErrorHandler(ErrorHandler errorHandler){ + return toBuilder().setBadRecordRouter(BadRecordRouter.RECORDING_ROUTER).setErrorHandler(errorHandler).build(); + } + ReadAllFromRow forExternalBuild() { return new ReadAllFromRow<>(this); } @@ -2345,9 +2381,15 @@ public PCollection> expand(PCollection Coder> recordCoder = KafkaRecordCoder.of(keyCoder, valueCoder); try { + PCollectionTuple pCollectionTuple = input + .apply(ParDo.of(ReadFromKafkaDoFn.create(this, RECORDS)) + .withOutputTags(RECORDS, TupleTagList.of(BadRecordRouter.BAD_RECORD_TAG))); + getErrorHandler().addErrorCollection( + pCollectionTuple + .get(BadRecordRouter.BAD_RECORD_TAG) + .setCoder(BadRecord.getCoder(input.getPipeline()))); PCollection>> outputWithDescriptor = - input - .apply(ParDo.of(ReadFromKafkaDoFn.create(this))) + pCollectionTuple.get(RECORDS) .setCoder( KvCoder.of( input diff --git a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/ReadFromKafkaDoFn.java b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/ReadFromKafkaDoFn.java index 31620549ab2..997c64fc570 100644 --- a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/ReadFromKafkaDoFn.java +++ b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/ReadFromKafkaDoFn.java @@ -26,6 +26,7 @@ import java.util.Optional; import java.util.Set; import java.util.concurrent.TimeUnit; +import org.apache.beam.sdk.coders.BigEndianIntegerCoder; import org.apache.beam.sdk.coders.Coder; import org.apache.beam.sdk.io.kafka.KafkaIO.ReadSourceDescriptors; import org.apache.beam.sdk.io.kafka.KafkaIOUtils.MovingAvg; @@ -33,6 +34,7 @@ import org.apache.beam.sdk.io.range.OffsetRange; import org.apache.beam.sdk.transforms.DoFn; import org.apache.beam.sdk.transforms.SerializableFunction; +import org.apache.beam.sdk.transforms.errorhandling.BadRecordRouter; import org.apache.beam.sdk.transforms.splittabledofn.GrowableOffsetRangeTracker; import org.apache.beam.sdk.transforms.splittabledofn.ManualWatermarkEstimator; import org.apache.beam.sdk.transforms.splittabledofn.OffsetRangeTracker; @@ -43,6 +45,7 @@ import org.apache.beam.sdk.transforms.windowing.BoundedWindow; import org.apache.beam.sdk.util.Preconditions; import org.apache.beam.sdk.values.KV; +import org.apache.beam.sdk.values.TupleTag; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.annotations.VisibleForTesting; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Stopwatch; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Supplier; @@ -58,6 +61,7 @@ import org.apache.kafka.clients.consumer.ConsumerRecords; import org.apache.kafka.common.PartitionInfo; import org.apache.kafka.common.TopicPartition; +import org.apache.kafka.common.errors.SerializationException; import org.apache.kafka.common.serialization.Deserializer; import org.checkerframework.checker.nullness.qual.Nullable; import org.joda.time.Instant; @@ -142,29 +146,29 @@ abstract class ReadFromKafkaDoFn extends DoFn>> { - static ReadFromKafkaDoFn create(ReadSourceDescriptors transform) { + static ReadFromKafkaDoFn create(ReadSourceDescriptors transform, TupleTag>> recordTag) { if (transform.isBounded()) { - return new Bounded<>(transform); + return new Bounded<>(transform, recordTag); } else { - return new Unbounded<>(transform); + return new Unbounded<>(transform, recordTag); } } @UnboundedPerElement private static class Unbounded extends ReadFromKafkaDoFn { - Unbounded(ReadSourceDescriptors transform) { - super(transform); + Unbounded(ReadSourceDescriptors transform, TupleTag>> recordTag) { + super(transform, recordTag); } } @BoundedPerElement private static class Bounded extends ReadFromKafkaDoFn { - Bounded(ReadSourceDescriptors transform) { - super(transform); + Bounded(ReadSourceDescriptors transform,TupleTag>> recordTag) { + super(transform, recordTag); } } - private ReadFromKafkaDoFn(ReadSourceDescriptors transform) { + private ReadFromKafkaDoFn(ReadSourceDescriptors transform, TupleTag>> recordTag) { this.consumerConfig = transform.getConsumerConfig(); this.offsetConsumerConfig = transform.getOffsetConsumerConfig(); this.keyDeserializerProvider = @@ -176,6 +180,8 @@ private ReadFromKafkaDoFn(ReadSourceDescriptors transform) { this.createWatermarkEstimatorFn = transform.getCreateWatermarkEstimatorFn(); this.timestampPolicyFactory = transform.getTimestampPolicyFactory(); this.checkStopReadingFn = transform.getCheckStopReadingFn(); + this.badRecordRouter = transform.getBadRecordRouter(); + this.recordTag = recordTag; } private static final Logger LOG = LoggerFactory.getLogger(ReadFromKafkaDoFn.class); @@ -191,6 +197,10 @@ private ReadFromKafkaDoFn(ReadSourceDescriptors transform) { createWatermarkEstimatorFn; private final @Nullable TimestampPolicyFactory timestampPolicyFactory; + private final BadRecordRouter badRecordRouter; + + private final TupleTag>> recordTag; + // Valid between bundle start and bundle finish. private transient @Nullable Deserializer keyDeserializerInstance = null; private transient @Nullable Deserializer valueDeserializerInstance = null; @@ -355,7 +365,7 @@ public ProcessContinuation processElement( @Element KafkaSourceDescriptor kafkaSourceDescriptor, RestrictionTracker tracker, WatermarkEstimator watermarkEstimator, - OutputReceiver>> receiver) { + MultiOutputReceiver receiver) throws Exception { final LoadingCache avgRecordSize = Preconditions.checkStateNotNull(this.avgRecordSize); final Deserializer keyDeserializerInstance = @@ -421,35 +431,49 @@ public ProcessContinuation processElement( if (!tracker.tryClaim(rawRecord.offset())) { return ProcessContinuation.stop(); } - KafkaRecord kafkaRecord = - new KafkaRecord<>( - rawRecord.topic(), - rawRecord.partition(), - rawRecord.offset(), - ConsumerSpEL.getRecordTimestamp(rawRecord), - ConsumerSpEL.getRecordTimestampType(rawRecord), - ConsumerSpEL.hasHeaders() ? rawRecord.headers() : null, - ConsumerSpEL.deserializeKey(keyDeserializerInstance, rawRecord), - ConsumerSpEL.deserializeValue(valueDeserializerInstance, rawRecord)); - int recordSize = - (rawRecord.key() == null ? 0 : rawRecord.key().length) - + (rawRecord.value() == null ? 0 : rawRecord.value().length); - avgRecordSize - .getUnchecked(kafkaSourceDescriptor.getTopicPartition()) - .update(recordSize, rawRecord.offset() - expectedOffset); - expectedOffset = rawRecord.offset() + 1; - Instant outputTimestamp; - // The outputTimestamp and watermark will be computed by timestampPolicy, where the - // WatermarkEstimator should be a manual one. - if (timestampPolicy != null) { - TimestampPolicyContext context = - updateWatermarkManually(timestampPolicy, watermarkEstimator, tracker); - outputTimestamp = timestampPolicy.getTimestampForRecord(context, kafkaRecord); - } else { - Preconditions.checkStateNotNull(this.extractOutputTimestampFn); - outputTimestamp = extractOutputTimestampFn.apply(kafkaRecord); + try { + KafkaRecordkafkaRecord = + new KafkaRecord<>( + rawRecord.topic(), + rawRecord.partition(), + rawRecord.offset(), + ConsumerSpEL.getRecordTimestamp(rawRecord), + ConsumerSpEL.getRecordTimestampType(rawRecord), + ConsumerSpEL.hasHeaders() ? rawRecord.headers() : null, + ConsumerSpEL.deserializeKey(keyDeserializerInstance, rawRecord), + ConsumerSpEL.deserializeValue(valueDeserializerInstance, rawRecord)); + int recordSize = + (rawRecord.key() == null ? 0 : rawRecord.key().length) + + (rawRecord.value() == null ? 0 : rawRecord.value().length); + avgRecordSize + .getUnchecked(kafkaSourceDescriptor.getTopicPartition()) + .update(recordSize, rawRecord.offset() - expectedOffset); + expectedOffset = rawRecord.offset() + 1; + Instant outputTimestamp; + // The outputTimestamp and watermark will be computed by timestampPolicy, where the + // WatermarkEstimator should be a manual one. + if (timestampPolicy != null) { + TimestampPolicyContext context = + updateWatermarkManually(timestampPolicy, watermarkEstimator, tracker); + outputTimestamp = timestampPolicy.getTimestampForRecord(context, kafkaRecord); + } else { + Preconditions.checkStateNotNull(this.extractOutputTimestampFn); + outputTimestamp = extractOutputTimestampFn.apply(kafkaRecord); + } + receiver.get(recordTag).outputWithTimestamp(KV.of(kafkaSourceDescriptor, kafkaRecord), outputTimestamp); + } catch (SerializationException e) { + //This exception should only occur during the key and value deserialization when creating the Kafka Record + badRecordRouter.route( + receiver, + rawRecord, + null, + e, + "Failure deserializing Key or Value of Kakfa record reading from Kafka", + "ReadFromKafkaDoFn"); + if (timestampPolicy != null){ + updateWatermarkManually(timestampPolicy, watermarkEstimator, tracker); + } } - receiver.outputWithTimestamp(KV.of(kafkaSourceDescriptor, kafkaRecord), outputTimestamp); } } } diff --git a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/ReadFromKafkaDoFnTest.java b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/ReadFromKafkaDoFnTest.java index 854fd5ecea6..dbe7a85ae5a 100644 --- a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/ReadFromKafkaDoFnTest.java +++ b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/ReadFromKafkaDoFnTest.java @@ -17,6 +17,7 @@ */ package org.apache.beam.sdk.io.kafka; +import static org.apache.beam.sdk.transforms.errorhandling.BadRecordRouter.BAD_RECORD_TAG; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; @@ -36,15 +37,19 @@ import org.apache.beam.sdk.runners.TransformHierarchy.Node; import org.apache.beam.sdk.testing.TestPipeline; import org.apache.beam.sdk.transforms.Create; +import org.apache.beam.sdk.transforms.DoFn.MultiOutputReceiver; import org.apache.beam.sdk.transforms.DoFn.OutputReceiver; import org.apache.beam.sdk.transforms.DoFn.ProcessContinuation; import org.apache.beam.sdk.transforms.ParDo; import org.apache.beam.sdk.transforms.SerializableFunction; +import org.apache.beam.sdk.transforms.errorhandling.BadRecord; import org.apache.beam.sdk.transforms.splittabledofn.OffsetRangeTracker; import org.apache.beam.sdk.values.KV; import org.apache.beam.sdk.values.PCollection; import org.apache.beam.sdk.values.PCollection.IsBounded; import org.apache.beam.sdk.values.PValue; +import org.apache.beam.sdk.values.Row; +import org.apache.beam.sdk.values.TupleTag; import org.apache.beam.sdk.values.TypeDescriptor; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Charsets; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList; @@ -75,19 +80,21 @@ public class ReadFromKafkaDoFnTest { private final TopicPartition topicPartition = new TopicPartition("topic", 0); + private static final TupleTag>> RECORDS = new TupleTag<>(); + @Rule public ExpectedException thrown = ExpectedException.none(); private final SimpleMockKafkaConsumer consumer = new SimpleMockKafkaConsumer(OffsetResetStrategy.NONE, topicPartition); private final ReadFromKafkaDoFn dofnInstance = - ReadFromKafkaDoFn.create(makeReadSourceDescriptor(consumer)); + ReadFromKafkaDoFn.create(makeReadSourceDescriptor(consumer), RECORDS); private final ExceptionMockKafkaConsumer exceptionConsumer = new ExceptionMockKafkaConsumer(OffsetResetStrategy.NONE, topicPartition); private final ReadFromKafkaDoFn exceptionDofnInstance = - ReadFromKafkaDoFn.create(makeReadSourceDescriptor(exceptionConsumer)); + ReadFromKafkaDoFn.create(makeReadSourceDescriptor(exceptionConsumer), RECORDS); private ReadSourceDescriptors makeReadSourceDescriptor( Consumer kafkaMockConsumer) { @@ -249,23 +256,57 @@ public synchronized long position(TopicPartition partition) { } } - private static class MockOutputReceiver - implements OutputReceiver>> { + private static class MockMultiOutputReceiver implements MultiOutputReceiver{ + + MockOutputReceiver>> mockOutputReceiver = new MockOutputReceiver<>(); + + MockOutputReceiver badOutputReceiver = new MockOutputReceiver<>(); + + @Override + public @UnknownKeyFor @NonNull @Initialized OutputReceiver get( + @UnknownKeyFor @NonNull @Initialized TupleTag tag) { + if (RECORDS.equals(tag)) { + return (OutputReceiver) mockOutputReceiver; + } else if (BAD_RECORD_TAG.equals(tag)) { + return (OutputReceiver) badOutputReceiver; + } else { + throw new RuntimeException("Invalid Tag"); + } + } + + public List>> getGoodRecords(){ + return mockOutputReceiver.getOutputs(); + } + + public List getBadRecords(){ + return badOutputReceiver.getOutputs(); + } + + @Override + public @UnknownKeyFor @NonNull @Initialized OutputReceiver<@UnknownKeyFor @NonNull @Initialized Row> getRowReceiver( + @UnknownKeyFor @NonNull @Initialized TupleTag tag) { + return null; + } + } + + private static class MockOutputReceiver implements OutputReceiver { - private final List>> records = + private final List records = new ArrayList<>(); @Override - public void output(KV> output) {} + public void output(T output) { + records.add(output); + } @Override public void outputWithTimestamp( - KV> output, + T output, @UnknownKeyFor @NonNull @Initialized Instant timestamp) { records.add(output); } - public List>> getOutputs() { + public List getOutputs() { return this.records; } } @@ -381,7 +422,7 @@ public void testInitialRestrictionWithException() throws Exception { @Test public void testProcessElement() throws Exception { - MockOutputReceiver receiver = new MockOutputReceiver(); + MockMultiOutputReceiver receiver = new MockMultiOutputReceiver(); consumer.setNumOfRecordsPerPoll(3L); long startOffset = 5L; OffsetRangeTracker tracker = @@ -391,12 +432,12 @@ public void testProcessElement() throws Exception { ProcessContinuation result = dofnInstance.processElement(descriptor, tracker, null, receiver); assertEquals(ProcessContinuation.stop(), result); assertEquals( - createExpectedRecords(descriptor, startOffset, 3, "key", "value"), receiver.getOutputs()); + createExpectedRecords(descriptor, startOffset, 3, "key", "value"), receiver.getGoodRecords()); } @Test public void testProcessElementWithEmptyPoll() throws Exception { - MockOutputReceiver receiver = new MockOutputReceiver(); + MockMultiOutputReceiver receiver = new MockMultiOutputReceiver(); consumer.setNumOfRecordsPerPoll(-1); OffsetRangeTracker tracker = new OffsetRangeTracker(new OffsetRange(0L, Long.MAX_VALUE)); ProcessContinuation result = @@ -406,12 +447,12 @@ public void testProcessElementWithEmptyPoll() throws Exception { null, receiver); assertEquals(ProcessContinuation.resume(), result); - assertTrue(receiver.getOutputs().isEmpty()); + assertTrue(receiver.getGoodRecords().isEmpty()); } @Test public void testProcessElementWhenTopicPartitionIsRemoved() throws Exception { - MockOutputReceiver receiver = new MockOutputReceiver(); + MockMultiOutputReceiver receiver = new MockMultiOutputReceiver(); consumer.setRemoved(); consumer.setNumOfRecordsPerPoll(10); OffsetRangeTracker tracker = new OffsetRangeTracker(new OffsetRange(0L, Long.MAX_VALUE)); @@ -426,7 +467,7 @@ public void testProcessElementWhenTopicPartitionIsRemoved() throws Exception { @Test public void testProcessElementWhenTopicPartitionIsStopped() throws Exception { - MockOutputReceiver receiver = new MockOutputReceiver(); + MockMultiOutputReceiver receiver = new MockMultiOutputReceiver(); ReadFromKafkaDoFn instance = ReadFromKafkaDoFn.create( makeReadSourceDescriptor(consumer) @@ -439,7 +480,7 @@ public Boolean apply(TopicPartition input) { return true; } }) - .build()); + .build(), RECORDS); instance.setup(); consumer.setNumOfRecordsPerPoll(10); OffsetRangeTracker tracker = new OffsetRangeTracker(new OffsetRange(0L, Long.MAX_VALUE)); @@ -458,7 +499,7 @@ public void testProcessElementWithException() throws Exception { thrown.expect(KafkaException.class); thrown.expectMessage("SeekException"); - MockOutputReceiver receiver = new MockOutputReceiver(); + MockMultiOutputReceiver receiver = new MockMultiOutputReceiver(); OffsetRangeTracker tracker = new OffsetRangeTracker(new OffsetRange(0L, Long.MAX_VALUE)); exceptionDofnInstance.processElement( @@ -491,7 +532,7 @@ private BoundednessVisitor testBoundedness( .apply( ParDo.of( ReadFromKafkaDoFn.create( - readSourceDescriptorsDecorator.apply(makeReadSourceDescriptor(consumer))))) + readSourceDescriptorsDecorator.apply(makeReadSourceDescriptor(consumer)), RECORDS))) .setCoder( KvCoder.of( SerializableCoder.of(KafkaSourceDescriptor.class), From ec50bffc0cdc19f8db6ce406f6e36743e6c31109 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Mon, 27 Nov 2023 15:59:06 -0500 Subject: [PATCH 54/80] remove "failing transform" field on bad record, add note to CHANGES.md --- CHANGES.md | 1 + .../beam/sdk/transforms/errorhandling/BadRecord.java | 6 ------ .../sdk/transforms/errorhandling/BadRecordRouter.java | 11 ++++------- .../errorhandling/BRHEnabledPTransform.java | 3 +-- .../transforms/errorhandling/BadRecordRouterTest.java | 9 +++------ .../transforms/errorhandling/ErrorHandlerTest.java | 3 +-- 6 files changed, 10 insertions(+), 23 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 9318e85d477..84097374d63 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -71,6 +71,7 @@ * X feature added (Java/Python) ([#X](https://github.com/apache/beam/issues/X)). * The Python SDK now type checks `collections.abc.Collections` types properly. Some type hints that were erroneously allowed by the SDK may now fail. ([#29272](https://github.com/apache/beam/pull/29272)) +* Framework for adding Error Handlers to composite transforms added in Java ([#29164](https://github.com/apache/beam/pull/29164)) ## Breaking Changes diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecord.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecord.java index 381d3c64c63..e19b45312f9 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecord.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecord.java @@ -161,9 +161,6 @@ public abstract static class Failure implements Serializable { /** The description of what was being attempted when the failure occurred. */ public abstract String getDescription(); - /** The particular sub-transform that failed. */ - public abstract String getFailingTransform(); - public static Builder builder() { return new AutoValue_BadRecord_Failure.Builder(); } @@ -186,9 +183,6 @@ public Builder addExceptionStackTrace(Exception exception) throws IOException { } public abstract Builder setDescription(String description); - - public abstract Builder setFailingTransform(String failingTransform); - public abstract Failure build(); } } diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouter.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouter.java index 8cee66335cb..12aad999573 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouter.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouter.java @@ -39,8 +39,7 @@ void route( RecordT record, @Nullable Coder coder, @Nullable Exception exception, - String description, - String failingTransform) + String description) throws Exception; class ThrowingBadRecordRouter implements BadRecordRouter { @@ -51,8 +50,7 @@ public void route( RecordT record, @Nullable Coder coder, @Nullable Exception exception, - String description, - String failingTransform) + String description) throws Exception { if (exception != null) { throw exception; @@ -70,8 +68,7 @@ public void route( RecordT record, @Nullable Coder coder, @Nullable Exception exception, - String description, - String failingTransform) + String description) throws Exception { Preconditions.checkArgumentNotNull(record); @@ -81,7 +78,7 @@ public void route( // Build up failure information BadRecord.Failure.Builder failureBuilder = - Failure.builder().setDescription(description).setFailingTransform(failingTransform); + Failure.builder().setDescription(description); // It's possible for us to want to handle an error scenario where no actual exception object // exists diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BRHEnabledPTransform.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BRHEnabledPTransform.java index 56e37278637..ba931497f7e 100644 --- a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BRHEnabledPTransform.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BRHEnabledPTransform.java @@ -82,8 +82,7 @@ public void processElement(@Element Integer element, MultiOutputReceiver receive element, BigEndianIntegerCoder.of(), new RuntimeException("Integer was odd"), - "Integer was odd", - "NoOpDoFn"); + "Integer was odd"); } } } diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouterTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouterTest.java index 554c00a4584..5d7e36dc438 100644 --- a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouterTest.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouterTest.java @@ -106,8 +106,7 @@ public void testRecordingHandler() throws Exception { BadRecord.Failure.Builder failure = BadRecord.Failure.builder() .setException("java.lang.RuntimeException") - .setDescription("desc") - .setFailingTransform("transform"); + .setDescription("desc"); verify(badRecordOutputReceiver) .output(ArgumentMatchers.argThat(ignoreStacktraceMatcher.apply(expectedBuilder, failure))); @@ -127,8 +126,7 @@ public void testNoCoder() throws Exception { BadRecord.Failure.Builder failure = BadRecord.Failure.builder() .setException("java.lang.RuntimeException") - .setDescription("desc") - .setFailingTransform("transform"); + .setDescription("desc"); verify(badRecordOutputReceiver) .output(ArgumentMatchers.argThat(ignoreStacktraceMatcher.apply(expectedBuilder, failure))); @@ -175,8 +173,7 @@ public void verifyDeterministic() throws NonDeterministicException {} BadRecord.Failure.Builder failure = BadRecord.Failure.builder() .setException("java.lang.RuntimeException") - .setDescription("desc") - .setFailingTransform("transform"); + .setDescription("desc"); verify(badRecordOutputReceiver) .output(ArgumentMatchers.argThat(ignoreStacktraceMatcher.apply(expectedBuilder, failure))); diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandlerTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandlerTest.java index 4506a7f3fb6..b0a5733cb1b 100644 --- a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandlerTest.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandlerTest.java @@ -113,8 +113,7 @@ public void testErrorHandlerWithBRHTransform() throws Exception { BadRecord.Failure.Builder failure = BadRecord.Failure.builder() .setException("java.lang.RuntimeException: Integer was odd") - .setDescription("Integer was odd") - .setFailingTransform("NoOpDoFn"); + .setDescription("Integer was odd"); failure.setExceptionStacktrace(badRecord.getFailure().getExceptionStacktrace()); expectedBuilder.setFailure(failure.build()); From b0927093e224086630b095ea431c274b7de38a27 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Mon, 27 Nov 2023 16:01:39 -0500 Subject: [PATCH 55/80] fix failing test cases --- .../transforms/errorhandling/BRHEnabledPTransform.java | 1 + .../transforms/errorhandling/BadRecordRouterTest.java | 10 +++++----- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BRHEnabledPTransform.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BRHEnabledPTransform.java index ba931497f7e..1e4069799ea 100644 --- a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BRHEnabledPTransform.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BRHEnabledPTransform.java @@ -49,6 +49,7 @@ public BRHEnabledPTransform withBadRecordHandler(ErrorHandler erro @Override public PCollection expand(PCollection input) { + //TODO this pattern is a clunky. Look to improve this once we have ParDo level error handling PCollectionTuple pCollectionTuple = input.apply( "NoOpDoFn", diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouterTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouterTest.java index 5d7e36dc438..5b7a47f9a75 100644 --- a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouterTest.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouterTest.java @@ -73,7 +73,7 @@ public void testThrowingHandlerWithException() throws Exception { thrown.expect(IOException.class); - handler.route(outputReceiver, new Object(), null, new IOException(), "desc", "transform"); + handler.route(outputReceiver, new Object(), null, new IOException(), "desc"); } @Test @@ -82,7 +82,7 @@ public void testThrowingHandlerWithNoException() throws Exception { thrown.expect(RuntimeException.class); - handler.route(outputReceiver, new Object(), null, null, "desc", "transform"); + handler.route(outputReceiver, new Object(), null, null, "desc"); } @Test @@ -92,7 +92,7 @@ public void testRecordingHandler() throws Exception { BadRecordRouter handler = BadRecordRouter.RECORDING_ROUTER; handler.route( - outputReceiver, 5, BigEndianIntegerCoder.of(), new RuntimeException(), "desc", "transform"); + outputReceiver, 5, BigEndianIntegerCoder.of(), new RuntimeException(), "desc"); BadRecord.Builder expectedBuilder = BadRecord.builder() @@ -118,7 +118,7 @@ public void testNoCoder() throws Exception { BadRecordRouter handler = BadRecordRouter.RECORDING_ROUTER; - handler.route(outputReceiver, 5, null, new RuntimeException(), "desc", "transform"); + handler.route(outputReceiver, 5, null, new RuntimeException(), "desc"); BadRecord.Builder expectedBuilder = BadRecord.builder().setRecord(Record.builder().setHumanReadableJsonRecord("5").build()); @@ -160,7 +160,7 @@ public List> getCoderArguments() { public void verifyDeterministic() throws NonDeterministicException {} }; - handler.route(outputReceiver, 5, failingCoder, new RuntimeException(), "desc", "transform"); + handler.route(outputReceiver, 5, failingCoder, new RuntimeException(), "desc"); BadRecord.Builder expectedBuilder = BadRecord.builder() From 3f1e97cc5ed175b70b0d9043b42b9bbbcc10a61b Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Mon, 27 Nov 2023 16:01:46 -0500 Subject: [PATCH 56/80] fix failing test cases --- .../beam/sdk/transforms/errorhandling/BRHEnabledPTransform.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BRHEnabledPTransform.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BRHEnabledPTransform.java index 1e4069799ea..4627237812c 100644 --- a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BRHEnabledPTransform.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BRHEnabledPTransform.java @@ -49,7 +49,7 @@ public BRHEnabledPTransform withBadRecordHandler(ErrorHandler erro @Override public PCollection expand(PCollection input) { - //TODO this pattern is a clunky. Look to improve this once we have ParDo level error handling + //TODO this pattern is a clunky. Look to improve this once we have ParDo level error handling. PCollectionTuple pCollectionTuple = input.apply( "NoOpDoFn", From 4356f27b5913fbe77f0e4958018427743fcf553c Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Tue, 28 Nov 2023 09:39:15 -0500 Subject: [PATCH 57/80] apply spotless --- .../apache/beam/sdk/transforms/errorhandling/BadRecord.java | 1 + .../beam/sdk/transforms/errorhandling/BadRecordRouter.java | 3 +-- .../sdk/transforms/errorhandling/BRHEnabledPTransform.java | 2 +- .../beam/sdk/transforms/errorhandling/BadRecordRouterTest.java | 3 +-- 4 files changed, 4 insertions(+), 5 deletions(-) diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecord.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecord.java index e19b45312f9..6cd9493041f 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecord.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecord.java @@ -183,6 +183,7 @@ public Builder addExceptionStackTrace(Exception exception) throws IOException { } public abstract Builder setDescription(String description); + public abstract Failure build(); } } diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouter.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouter.java index 12aad999573..5d4a9b51015 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouter.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouter.java @@ -77,8 +77,7 @@ public void route( recordBuilder.addHumanReadableJson(record).addCoderAndEncodedRecord(coder, record); // Build up failure information - BadRecord.Failure.Builder failureBuilder = - Failure.builder().setDescription(description); + BadRecord.Failure.Builder failureBuilder = Failure.builder().setDescription(description); // It's possible for us to want to handle an error scenario where no actual exception object // exists diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BRHEnabledPTransform.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BRHEnabledPTransform.java index 4627237812c..b279ad938db 100644 --- a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BRHEnabledPTransform.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BRHEnabledPTransform.java @@ -49,7 +49,7 @@ public BRHEnabledPTransform withBadRecordHandler(ErrorHandler erro @Override public PCollection expand(PCollection input) { - //TODO this pattern is a clunky. Look to improve this once we have ParDo level error handling. + // TODO this pattern is a clunky. Look to improve this once we have ParDo level error handling. PCollectionTuple pCollectionTuple = input.apply( "NoOpDoFn", diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouterTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouterTest.java index 5b7a47f9a75..fb18a6077db 100644 --- a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouterTest.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouterTest.java @@ -91,8 +91,7 @@ public void testRecordingHandler() throws Exception { BadRecordRouter handler = BadRecordRouter.RECORDING_ROUTER; - handler.route( - outputReceiver, 5, BigEndianIntegerCoder.of(), new RuntimeException(), "desc"); + handler.route(outputReceiver, 5, BigEndianIntegerCoder.of(), new RuntimeException(), "desc"); BadRecord.Builder expectedBuilder = BadRecord.builder() From c10669898211e43b6e75ce8383088a51299216f8 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Tue, 28 Nov 2023 10:44:07 -0500 Subject: [PATCH 58/80] Add tests --- .../org/apache/beam/sdk/io/kafka/KafkaIO.java | 30 +++-- .../beam/sdk/io/kafka/ReadFromKafkaDoFn.java | 34 +++-- .../sdk/io/kafka/ReadFromKafkaDoFnTest.java | 116 +++++++++++++++--- 3 files changed, 142 insertions(+), 38 deletions(-) diff --git a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java index e21f782bbfb..8b19c933518 100644 --- a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java +++ b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java @@ -2066,9 +2066,11 @@ abstract ReadSourceDescriptors.Builder setCommitOffsetEnabled( abstract ReadSourceDescriptors.Builder setTimestampPolicyFactory( TimestampPolicyFactory policy); - abstract ReadSourceDescriptors.Builder setBadRecordRouter(BadRecordRouter badRecordRouter); + abstract ReadSourceDescriptors.Builder setBadRecordRouter( + BadRecordRouter badRecordRouter); - abstract ReadSourceDescriptors.Builder setErrorHandler(ErrorHandler errorHandler); + abstract ReadSourceDescriptors.Builder setErrorHandler( + ErrorHandler errorHandler); abstract ReadSourceDescriptors.Builder setBounded(boolean bounded); @@ -2336,8 +2338,11 @@ public ReadSourceDescriptors withConsumerConfigOverrides( return toBuilder().setConsumerConfig(consumerConfig).build(); } - public ReadSourceDescriptors withErrorHandler(ErrorHandler errorHandler){ - return toBuilder().setBadRecordRouter(BadRecordRouter.RECORDING_ROUTER).setErrorHandler(errorHandler).build(); + public ReadSourceDescriptors withErrorHandler(ErrorHandler errorHandler) { + return toBuilder() + .setBadRecordRouter(BadRecordRouter.RECORDING_ROUTER) + .setErrorHandler(errorHandler) + .build(); } ReadAllFromRow forExternalBuild() { @@ -2430,15 +2435,18 @@ public PCollection> expand(PCollection Coder> recordCoder = KafkaRecordCoder.of(keyCoder, valueCoder); try { - PCollectionTuple pCollectionTuple = input - .apply(ParDo.of(ReadFromKafkaDoFn.create(this, RECORDS)) + PCollectionTuple pCollectionTuple = + input.apply( + ParDo.of(ReadFromKafkaDoFn.create(this, RECORDS)) .withOutputTags(RECORDS, TupleTagList.of(BadRecordRouter.BAD_RECORD_TAG))); - getErrorHandler().addErrorCollection( - pCollectionTuple - .get(BadRecordRouter.BAD_RECORD_TAG) - .setCoder(BadRecord.getCoder(input.getPipeline()))); + getErrorHandler() + .addErrorCollection( + pCollectionTuple + .get(BadRecordRouter.BAD_RECORD_TAG) + .setCoder(BadRecord.getCoder(input.getPipeline()))); PCollection>> outputWithDescriptor = - pCollectionTuple.get(RECORDS) + pCollectionTuple + .get(RECORDS) .setCoder( KvCoder.of( input diff --git a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/ReadFromKafkaDoFn.java b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/ReadFromKafkaDoFn.java index 604d0b524af..924833290f1 100644 --- a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/ReadFromKafkaDoFn.java +++ b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/ReadFromKafkaDoFn.java @@ -26,7 +26,6 @@ import java.util.Optional; import java.util.Set; import java.util.concurrent.TimeUnit; -import org.apache.beam.sdk.coders.BigEndianIntegerCoder; import org.apache.beam.sdk.coders.Coder; import org.apache.beam.sdk.io.kafka.KafkaIO.ReadSourceDescriptors; import org.apache.beam.sdk.io.kafka.KafkaIOUtils.MovingAvg; @@ -148,7 +147,9 @@ abstract class ReadFromKafkaDoFn extends DoFn>> { - static ReadFromKafkaDoFn create(ReadSourceDescriptors transform, TupleTag>> recordTag) { + static ReadFromKafkaDoFn create( + ReadSourceDescriptors transform, + TupleTag>> recordTag) { if (transform.isBounded()) { return new Bounded<>(transform, recordTag); } else { @@ -158,19 +159,25 @@ static ReadFromKafkaDoFn create(ReadSourceDescriptors transfo @UnboundedPerElement private static class Unbounded extends ReadFromKafkaDoFn { - Unbounded(ReadSourceDescriptors transform, TupleTag>> recordTag) { + Unbounded( + ReadSourceDescriptors transform, + TupleTag>> recordTag) { super(transform, recordTag); } } @BoundedPerElement private static class Bounded extends ReadFromKafkaDoFn { - Bounded(ReadSourceDescriptors transform,TupleTag>> recordTag) { + Bounded( + ReadSourceDescriptors transform, + TupleTag>> recordTag) { super(transform, recordTag); } } - private ReadFromKafkaDoFn(ReadSourceDescriptors transform, TupleTag>> recordTag) { + private ReadFromKafkaDoFn( + ReadSourceDescriptors transform, + TupleTag>> recordTag) { this.consumerConfig = transform.getConsumerConfig(); this.offsetConsumerConfig = transform.getOffsetConsumerConfig(); this.keyDeserializerProvider = @@ -371,7 +378,8 @@ public ProcessContinuation processElement( @Element KafkaSourceDescriptor kafkaSourceDescriptor, RestrictionTracker tracker, WatermarkEstimator watermarkEstimator, - MultiOutputReceiver receiver) throws Exception { + MultiOutputReceiver receiver) + throws Exception { final LoadingCache avgRecordSize = Preconditions.checkStateNotNull(this.avgRecordSize); final Deserializer keyDeserializerInstance = @@ -442,7 +450,7 @@ public ProcessContinuation processElement( return ProcessContinuation.stop(); } try { - KafkaRecordkafkaRecord = + KafkaRecord kafkaRecord = new KafkaRecord<>( rawRecord.topic(), rawRecord.partition(), @@ -471,17 +479,19 @@ public ProcessContinuation processElement( Preconditions.checkStateNotNull(this.extractOutputTimestampFn); outputTimestamp = extractOutputTimestampFn.apply(kafkaRecord); } - receiver.get(recordTag).outputWithTimestamp(KV.of(kafkaSourceDescriptor, kafkaRecord), outputTimestamp); + receiver + .get(recordTag) + .outputWithTimestamp(KV.of(kafkaSourceDescriptor, kafkaRecord), outputTimestamp); } catch (SerializationException e) { - //This exception should only occur during the key and value deserialization when creating the Kafka Record + // This exception should only occur during the key and value deserialization when + // creating the Kafka Record badRecordRouter.route( receiver, rawRecord, null, e, - "Failure deserializing Key or Value of Kakfa record reading from Kafka", - "ReadFromKafkaDoFn"); - if (timestampPolicy != null){ + "Failure deserializing Key or Value of Kakfa record reading from Kafka"); + if (timestampPolicy != null) { updateWatermarkManually(timestampPolicy, watermarkEstimator, tracker); } } diff --git a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/ReadFromKafkaDoFnTest.java b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/ReadFromKafkaDoFnTest.java index d36ea080671..514adda9099 100644 --- a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/ReadFromKafkaDoFnTest.java +++ b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/ReadFromKafkaDoFnTest.java @@ -48,6 +48,7 @@ import org.apache.beam.sdk.transforms.ParDo; import org.apache.beam.sdk.transforms.SerializableFunction; import org.apache.beam.sdk.transforms.errorhandling.BadRecord; +import org.apache.beam.sdk.transforms.errorhandling.ErrorHandler.DefaultErrorHandler; import org.apache.beam.sdk.transforms.splittabledofn.OffsetRangeTracker; import org.apache.beam.sdk.values.KV; import org.apache.beam.sdk.values.PCollection; @@ -69,7 +70,9 @@ import org.apache.kafka.common.KafkaException; import org.apache.kafka.common.PartitionInfo; import org.apache.kafka.common.TopicPartition; +import org.apache.kafka.common.errors.SerializationException; import org.apache.kafka.common.header.internals.RecordHeaders; +import org.apache.kafka.common.serialization.Deserializer; import org.apache.kafka.common.serialization.StringDeserializer; import org.checkerframework.checker.initialization.qual.Initialized; import org.checkerframework.checker.nullness.qual.NonNull; @@ -85,7 +88,8 @@ public class ReadFromKafkaDoFnTest { private final TopicPartition topicPartition = new TopicPartition("topic", 0); - private static final TupleTag>> RECORDS = new TupleTag<>(); + private static final TupleTag>> RECORDS = + new TupleTag<>(); @Rule public ExpectedException thrown = ExpectedException.none(); @@ -116,6 +120,31 @@ public Consumer apply(Map input) { .withBootstrapServers("bootstrap_server"); } + private ReadSourceDescriptors makeFailingReadSourceDescriptor( + Consumer kafkaMockConsumer) { + return ReadSourceDescriptors.read() + .withKeyDeserializer(FailingDeserializer.class) + .withValueDeserializer(FailingDeserializer.class) + .withConsumerFactoryFn( + new SerializableFunction, Consumer>() { + @Override + public Consumer apply(Map input) { + return kafkaMockConsumer; + } + }) + .withBootstrapServers("bootstrap_server"); + } + + private static class FailingDeserializer implements Deserializer { + + public FailingDeserializer() {} + + @Override + public String deserialize(String topic, byte[] data) { + throw new SerializationException("Intentional serialization exception"); + } + } + private static class ExceptionMockKafkaConsumer extends MockConsumer { private final TopicPartition topicPartition; @@ -261,9 +290,10 @@ public synchronized long position(TopicPartition partition) { } } - private static class MockMultiOutputReceiver implements MultiOutputReceiver{ + private static class MockMultiOutputReceiver implements MultiOutputReceiver { - MockOutputReceiver>> mockOutputReceiver = new MockOutputReceiver<>(); + MockOutputReceiver>> mockOutputReceiver = + new MockOutputReceiver<>(); MockOutputReceiver badOutputReceiver = new MockOutputReceiver<>(); @@ -279,25 +309,25 @@ private static class MockMultiOutputReceiver implements MultiOutputReceiver{ } } - public List>> getGoodRecords(){ + public List>> getGoodRecords() { return mockOutputReceiver.getOutputs(); } - public List getBadRecords(){ + public List getBadRecords() { return badOutputReceiver.getOutputs(); } @Override - public @UnknownKeyFor @NonNull @Initialized OutputReceiver<@UnknownKeyFor @NonNull @Initialized Row> getRowReceiver( - @UnknownKeyFor @NonNull @Initialized TupleTag tag) { + public @UnknownKeyFor @NonNull @Initialized + OutputReceiver<@UnknownKeyFor @NonNull @Initialized Row> getRowReceiver( + @UnknownKeyFor @NonNull @Initialized TupleTag tag) { return null; } } private static class MockOutputReceiver implements OutputReceiver { - private final List records = - new ArrayList<>(); + private final List records = new ArrayList<>(); @Override public void output(T output) { @@ -306,8 +336,7 @@ public void output(T output) { @Override public void outputWithTimestamp( - T output, - @UnknownKeyFor @NonNull @Initialized Instant timestamp) { + T output, @UnknownKeyFor @NonNull @Initialized Instant timestamp) { records.add(output); } @@ -437,7 +466,8 @@ public void testProcessElement() throws Exception { ProcessContinuation result = dofnInstance.processElement(descriptor, tracker, null, receiver); assertEquals(ProcessContinuation.stop(), result); assertEquals( - createExpectedRecords(descriptor, startOffset, 3, "key", "value"), receiver.getGoodRecords()); + createExpectedRecords(descriptor, startOffset, 3, "key", "value"), + receiver.getGoodRecords()); } @Test @@ -447,7 +477,7 @@ public void testRawSizeMetric() throws Exception { MetricsContainerImpl container = new MetricsContainerImpl("any"); MetricsEnvironment.setCurrentContainer(container); - MockOutputReceiver receiver = new MockOutputReceiver(); + MockMultiOutputReceiver receiver = new MockMultiOutputReceiver(); consumer.setNumOfRecordsPerPoll(numElements); OffsetRangeTracker tracker = new OffsetRangeTracker(new OffsetRange(0, numElements)); KafkaSourceDescriptor descriptor = @@ -511,7 +541,8 @@ public Boolean apply(TopicPartition input) { return true; } }) - .build(), RECORDS); + .build(), + RECORDS); instance.setup(); consumer.setNumOfRecordsPerPoll(10); OffsetRangeTracker tracker = new OffsetRangeTracker(new OffsetRange(0L, Long.MAX_VALUE)); @@ -540,6 +571,60 @@ public void testProcessElementWithException() throws Exception { receiver); } + @Test + public void testProcessElementWithDeserializationExceptionDefaultRecordHandler() + throws Exception { + thrown.expect(SerializationException.class); + thrown.expectMessage("Intentional serialization exception"); + + MockMultiOutputReceiver receiver = new MockMultiOutputReceiver(); + OffsetRangeTracker tracker = new OffsetRangeTracker(new OffsetRange(0L, Long.MAX_VALUE)); + + consumer.setNumOfRecordsPerPoll(1); + + ReadFromKafkaDoFn dofnInstance = + ReadFromKafkaDoFn.create(makeFailingReadSourceDescriptor(consumer), RECORDS); + + dofnInstance.setup(); + + dofnInstance.processElement( + KafkaSourceDescriptor.of(topicPartition, null, null, null, null, null), + tracker, + null, + receiver); + + Assert.assertEquals("OutputRecordSize", 0, receiver.getGoodRecords().size()); + Assert.assertEquals("OutputErrorSize", 0, receiver.getBadRecords().size()); + } + + @Test + public void testProcessElementWithDeserializationExceptionRecordingRecordHandler() + throws Exception { + MockMultiOutputReceiver receiver = new MockMultiOutputReceiver(); + OffsetRangeTracker tracker = new OffsetRangeTracker(new OffsetRange(0L, 1L)); + + consumer.setNumOfRecordsPerPoll(1); + + // Because we never actually execute the pipeline, no data will actually make it to the error + // handler. This will just configure the ReadSourceDesriptors to route the errors to the output + // PCollection instead of rethrowing. + ReadSourceDescriptors descriptors = + makeFailingReadSourceDescriptor(consumer).withErrorHandler(new DefaultErrorHandler<>()); + + ReadFromKafkaDoFn dofnInstance = ReadFromKafkaDoFn.create(descriptors, RECORDS); + + dofnInstance.setup(); + + dofnInstance.processElement( + KafkaSourceDescriptor.of(topicPartition, null, null, null, null, null), + tracker, + null, + receiver); + + Assert.assertEquals("OutputRecordSize", 0, receiver.getGoodRecords().size()); + Assert.assertEquals("OutputErrorSize", 1, receiver.getBadRecords().size()); + } + private static final TypeDescriptor KAFKA_SOURCE_DESCRIPTOR_TYPE_DESCRIPTOR = new TypeDescriptor() {}; @@ -563,7 +648,8 @@ private BoundednessVisitor testBoundedness( .apply( ParDo.of( ReadFromKafkaDoFn.create( - readSourceDescriptorsDecorator.apply(makeReadSourceDescriptor(consumer)), RECORDS))) + readSourceDescriptorsDecorator.apply(makeReadSourceDescriptor(consumer)), + RECORDS))) .setCoder( KvCoder.of( SerializableCoder.of(KafkaSourceDescriptor.class), From 2724a667720ac8d29bfee3bc5f3ef01503863e02 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Tue, 28 Nov 2023 11:21:03 -0500 Subject: [PATCH 59/80] Add tests --- .../org/apache/beam/sdk/io/kafka/KafkaIO.java | 4 ++ .../apache/beam/sdk/io/kafka/KafkaIOIT.java | 39 +++++++++++++++++++ .../sdk/io/kafka/ReadFromKafkaDoFnTest.java | 2 +- 3 files changed, 44 insertions(+), 1 deletion(-) diff --git a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java index 8b19c933518..b09e857f98d 100644 --- a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java +++ b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java @@ -1543,6 +1543,10 @@ static class ReadFromKafkaViaUnbounded extends AbstractReadFromKafka @Override public PCollection> expand(PBegin input) { + if (kafkaRead.getErrorHandler() != null) { + LOG.warn("The Legacy implementation of Kafka Read does not support writing malformed" + + "messages to an error handler. Use the SDF implementation instead."); + } // Handles unbounded source to bounded conversion if maxNumRecords or maxReadTime is set. Unbounded> unbounded = org.apache.beam.sdk.io.Read.from( diff --git a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java index 2c8ace9c66c..3527f3c016e 100644 --- a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java +++ b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java @@ -18,6 +18,7 @@ package org.apache.beam.sdk.io.kafka; import static org.apache.beam.sdk.io.synthetic.SyntheticOptions.fromJsonString; +import static org.apache.beam.sdk.transforms.Count.combineFn; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertNull; @@ -29,6 +30,7 @@ import java.util.HashMap; import java.util.HashSet; import java.util.Map; +import java.util.Objects; import java.util.Random; import java.util.Set; import java.util.UUID; @@ -43,6 +45,7 @@ import org.apache.beam.sdk.io.Read; import org.apache.beam.sdk.io.common.IOITHelper; import org.apache.beam.sdk.io.common.IOTestPipelineOptions; +import org.apache.beam.sdk.io.kafka.ReadFromKafkaDoFnTest.FailingDeserializer; import org.apache.beam.sdk.io.synthetic.SyntheticBoundedSource; import org.apache.beam.sdk.io.synthetic.SyntheticSourceOptions; import org.apache.beam.sdk.options.Default; @@ -72,6 +75,9 @@ import org.apache.beam.sdk.transforms.MapElements; import org.apache.beam.sdk.transforms.ParDo; import org.apache.beam.sdk.transforms.Values; +import org.apache.beam.sdk.transforms.errorhandling.BadRecord; +import org.apache.beam.sdk.transforms.errorhandling.ErrorHandler; +import org.apache.beam.sdk.transforms.errorhandling.ErrorHandler.BadRecordErrorHandler; import org.apache.beam.sdk.transforms.windowing.CalendarWindows; import org.apache.beam.sdk.transforms.windowing.FixedWindows; import org.apache.beam.sdk.transforms.windowing.Window; @@ -352,6 +358,39 @@ public void processElement(@Element String element, OutputReceiver outpu } } + //This test verifies that bad data from Kafka is properly sent to the error handler + @Test + public void testKafkaIOSDFWithErrorHandler() throws IOException { + writePipeline.apply(Create.of(KV.of("key", "val"))) + .apply("Write to Kafka", KafkaIO.write() + .withBootstrapServers(options.getKafkaBootstrapServerAddresses()) + .withKeySerializer(StringSerializer.class) + .withValueSerializer(StringSerializer.class) + .withTopic(options.getKafkaTopic() + "-failingDeserialization")); + + PipelineResult writeResult = writePipeline.run(); + PipelineResult.State writeState = writeResult.waitUntilFinish(); + assertNotEquals(PipelineResult.State.FAILED, writeState); + + BadRecordErrorHandler> eh = sdfReadPipeline.registerBadRecordErrorHandler(Combine.globally(Count.combineFn()).withoutDefaults()); + sdfReadPipeline.apply(KafkaIO.read() + .withBootstrapServers(options.getKafkaBootstrapServerAddresses()) + .withTopic(options.getKafkaTopic() + "-failingDeserialization") + .withConsumerConfigUpdates(ImmutableMap.of("auto.offset.reset", "earliest")) + .withKeyDeserializer(FailingDeserializer.class) + .withValueDeserializer(FailingDeserializer.class) + .withErrorHandler(eh)); + eh.close(); + + PAssert.thatSingleton(Objects.requireNonNull(eh.getOutput())).isEqualTo(1L); + + PipelineResult readResult = sdfReadPipeline.run(); + PipelineResult.State readState = + readResult.waitUntilFinish(Duration.standardSeconds(options.getReadTimeout())); + cancelIfTimeouted(readResult, readState); + assertNotEquals(PipelineResult.State.FAILED, readState); + } + // This test roundtrips a single KV to verify that externalWithMetadata // can handle null keys and values correctly. @Test diff --git a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/ReadFromKafkaDoFnTest.java b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/ReadFromKafkaDoFnTest.java index 514adda9099..ed91073e469 100644 --- a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/ReadFromKafkaDoFnTest.java +++ b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/ReadFromKafkaDoFnTest.java @@ -135,7 +135,7 @@ public Consumer apply(Map input) { .withBootstrapServers("bootstrap_server"); } - private static class FailingDeserializer implements Deserializer { + public static class FailingDeserializer implements Deserializer { public FailingDeserializer() {} From 686a5e743848c5a39af06f659d7431624e58866a Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Tue, 28 Nov 2023 13:26:15 -0500 Subject: [PATCH 60/80] fix test case --- .../org/apache/beam/sdk/io/kafka/KafkaIO.java | 5 +- .../apache/beam/sdk/io/kafka/KafkaIOIT.java | 52 +++++++++++++------ 2 files changed, 38 insertions(+), 19 deletions(-) diff --git a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java index b09e857f98d..f2b0863191b 100644 --- a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java +++ b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java @@ -1544,8 +1544,9 @@ static class ReadFromKafkaViaUnbounded extends AbstractReadFromKafka @Override public PCollection> expand(PBegin input) { if (kafkaRead.getErrorHandler() != null) { - LOG.warn("The Legacy implementation of Kafka Read does not support writing malformed" - + "messages to an error handler. Use the SDF implementation instead."); + LOG.warn( + "The Legacy implementation of Kafka Read does not support writing malformed" + + "messages to an error handler. Use the SDF implementation instead."); } // Handles unbounded source to bounded conversion if maxNumRecords or maxReadTime is set. Unbounded> unbounded = diff --git a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java index 3527f3c016e..a3f1eeb6d7d 100644 --- a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java +++ b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java @@ -18,7 +18,6 @@ package org.apache.beam.sdk.io.kafka; import static org.apache.beam.sdk.io.synthetic.SyntheticOptions.fromJsonString; -import static org.apache.beam.sdk.transforms.Count.combineFn; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertNull; @@ -73,10 +72,10 @@ import org.apache.beam.sdk.transforms.GroupByKey; import org.apache.beam.sdk.transforms.Keys; import org.apache.beam.sdk.transforms.MapElements; +import org.apache.beam.sdk.transforms.PTransform; import org.apache.beam.sdk.transforms.ParDo; import org.apache.beam.sdk.transforms.Values; import org.apache.beam.sdk.transforms.errorhandling.BadRecord; -import org.apache.beam.sdk.transforms.errorhandling.ErrorHandler; import org.apache.beam.sdk.transforms.errorhandling.ErrorHandler.BadRecordErrorHandler; import org.apache.beam.sdk.transforms.windowing.CalendarWindows; import org.apache.beam.sdk.transforms.windowing.FixedWindows; @@ -99,7 +98,10 @@ import org.apache.kafka.common.serialization.IntegerSerializer; import org.apache.kafka.common.serialization.StringDeserializer; import org.apache.kafka.common.serialization.StringSerializer; +import org.checkerframework.checker.initialization.qual.Initialized; +import org.checkerframework.checker.nullness.qual.NonNull; import org.checkerframework.checker.nullness.qual.Nullable; +import org.checkerframework.checker.nullness.qual.UnknownKeyFor; import org.joda.time.Duration; import org.junit.AfterClass; import org.junit.BeforeClass; @@ -358,28 +360,44 @@ public void processElement(@Element String element, OutputReceiver outpu } } - //This test verifies that bad data from Kafka is properly sent to the error handler + // This test verifies that bad data from Kafka is properly sent to the error handler @Test public void testKafkaIOSDFWithErrorHandler() throws IOException { - writePipeline.apply(Create.of(KV.of("key", "val"))) - .apply("Write to Kafka", KafkaIO.write() - .withBootstrapServers(options.getKafkaBootstrapServerAddresses()) - .withKeySerializer(StringSerializer.class) - .withValueSerializer(StringSerializer.class) - .withTopic(options.getKafkaTopic() + "-failingDeserialization")); + writePipeline + .apply(Create.of(KV.of("key", "val"))) + .apply( + "Write to Kafka", + KafkaIO.write() + .withBootstrapServers(options.getKafkaBootstrapServerAddresses()) + .withKeySerializer(StringSerializer.class) + .withValueSerializer(StringSerializer.class) + .withTopic(options.getKafkaTopic() + "-failingDeserialization")); PipelineResult writeResult = writePipeline.run(); PipelineResult.State writeState = writeResult.waitUntilFinish(); assertNotEquals(PipelineResult.State.FAILED, writeState); - BadRecordErrorHandler> eh = sdfReadPipeline.registerBadRecordErrorHandler(Combine.globally(Count.combineFn()).withoutDefaults()); - sdfReadPipeline.apply(KafkaIO.read() - .withBootstrapServers(options.getKafkaBootstrapServerAddresses()) - .withTopic(options.getKafkaTopic() + "-failingDeserialization") - .withConsumerConfigUpdates(ImmutableMap.of("auto.offset.reset", "earliest")) - .withKeyDeserializer(FailingDeserializer.class) - .withValueDeserializer(FailingDeserializer.class) - .withErrorHandler(eh)); + PTransform, PCollection> sinkTransform = + new PTransform, PCollection>() { + @Override + public @UnknownKeyFor @NonNull @Initialized PCollection expand( + PCollection input) { + return input + .apply("Window", Window.into(CalendarWindows.years(1))) + .apply("Combine", Combine.globally(Count.combineFn()).withoutDefaults()); + } + }; + + BadRecordErrorHandler> eh = + sdfReadPipeline.registerBadRecordErrorHandler(sinkTransform); + sdfReadPipeline.apply( + KafkaIO.read() + .withBootstrapServers(options.getKafkaBootstrapServerAddresses()) + .withTopic(options.getKafkaTopic() + "-failingDeserialization") + .withConsumerConfigUpdates(ImmutableMap.of("auto.offset.reset", "earliest")) + .withKeyDeserializer(FailingDeserializer.class) + .withValueDeserializer(FailingDeserializer.class) + .withErrorHandler(eh)); eh.close(); PAssert.thatSingleton(Objects.requireNonNull(eh.getOutput())).isEqualTo(1L); From 7b11704a4363f57f6e2a76a9bda65f31f8b3051f Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Tue, 28 Nov 2023 14:21:38 -0500 Subject: [PATCH 61/80] add documentation --- .../sdk/transforms/errorhandling/ErrorHandler.java | 10 +++++----- .../java/org/apache/beam/sdk/io/kafka/KafkaIO.java | 12 ++++++++---- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java index 9e0298d885e..054df020f99 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java @@ -49,17 +49,17 @@ *

Simple usage with one DLQ *

{@code
  * PCollection records = ...;
- * try (ErrorHandler errorHandler = pipeline.registerErrorHandler(SomeSink.write())) {
- *  PCollection results = records.apply(SomeIO.write().withDeadLetterQueue(errorHandler));
+ * try (BadRecordErrorHandler errorHandler = pipeline.registerBadRecordErrorHandler(SomeSink.write())) {
+ *  PCollection results = records.apply(SomeIO.write().withErrorHandler(errorHandler));
  * }
  * results.apply(SomeOtherTransform);
  * }
* Usage with multiple DLQ stages *
{@code
  * PCollection records = ...;
- * try (ErrorHandler errorHandler = pipeline.registerErrorHandler(SomeSink.write())) {
- *  PCollection results = records.apply(SomeIO.write().withDeadLetterQueue(errorHandler))
- *                        .apply(OtherTransform.builder().withDeadLetterQueue(errorHandler));
+ * try (BadRecordErrorHandler errorHandler = pipeline.registerBadRecordErrorHandler(SomeSink.write())) {
+ *  PCollection results = records.apply(SomeIO.write().withErrorHandler(errorHandler))
+ *                        .apply(OtherTransform.builder().withErrorHandler(errorHandler));
  * }
  * results.apply(SomeOtherTransform);
  * }
diff --git a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java index f2b0863191b..d6de9571b62 100644 --- a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java +++ b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java @@ -172,6 +172,10 @@ * // signal. * .withCheckStopReadingFn(new SerializedFunction() {}) * + * //If you would like to send messages that fail to be parsed from Kafka to an alternate sink, + * //use the error handler pattern as defined in {@link ErrorHandler} + * .withErrorHandler(errorHandler) + * * // finally, if you don't need Kafka metadata, you can drop it.g * .withoutMetadata() // PCollection> * ) @@ -1978,7 +1982,7 @@ public void populateDisplayData(DisplayData.Builder builder) { public abstract static class ReadSourceDescriptors extends PTransform, PCollection>> { - private final TupleTag>> RECORDS = new TupleTag<>(); + private final TupleTag>> records = new TupleTag<>(); private static final Logger LOG = LoggerFactory.getLogger(ReadSourceDescriptors.class); @@ -2442,8 +2446,8 @@ public PCollection> expand(PCollection try { PCollectionTuple pCollectionTuple = input.apply( - ParDo.of(ReadFromKafkaDoFn.create(this, RECORDS)) - .withOutputTags(RECORDS, TupleTagList.of(BadRecordRouter.BAD_RECORD_TAG))); + ParDo.of(ReadFromKafkaDoFn.create(this, records)) + .withOutputTags(records, TupleTagList.of(BadRecordRouter.BAD_RECORD_TAG))); getErrorHandler() .addErrorCollection( pCollectionTuple @@ -2451,7 +2455,7 @@ public PCollection> expand(PCollection .setCoder(BadRecord.getCoder(input.getPipeline()))); PCollection>> outputWithDescriptor = pCollectionTuple - .get(RECORDS) + .get(records) .setCoder( KvCoder.of( input From 4957b8aff53093f7428e7eb3f5b0e6a91c96227f Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Wed, 29 Nov 2023 10:33:03 -0500 Subject: [PATCH 62/80] wire error handler into kafka write --- .../org/apache/beam/sdk/io/kafka/KafkaIO.java | 84 ++++++++++++++----- .../apache/beam/sdk/io/kafka/KafkaWriter.java | 45 ++++++---- .../apache/beam/sdk/io/kafka/KafkaIOIT.java | 2 +- .../sdk/io/kafka/ReadFromKafkaDoFnTest.java | 3 +- 4 files changed, 95 insertions(+), 39 deletions(-) diff --git a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java index d6de9571b62..c0f601f9166 100644 --- a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java +++ b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java @@ -84,6 +84,8 @@ import org.apache.beam.sdk.transforms.errorhandling.BadRecord; import org.apache.beam.sdk.transforms.errorhandling.BadRecordRouter; import org.apache.beam.sdk.transforms.errorhandling.ErrorHandler; +import org.apache.beam.sdk.transforms.errorhandling.ErrorHandler.BadRecordErrorHandler; +import org.apache.beam.sdk.transforms.errorhandling.ErrorHandler.DefaultErrorHandler; import org.apache.beam.sdk.transforms.splittabledofn.WatermarkEstimator; import org.apache.beam.sdk.transforms.splittabledofn.WatermarkEstimators.Manual; import org.apache.beam.sdk.transforms.splittabledofn.WatermarkEstimators.MonotonicallyIncreasing; @@ -174,7 +176,7 @@ * * //If you would like to send messages that fail to be parsed from Kafka to an alternate sink, * //use the error handler pattern as defined in {@link ErrorHandler} - * .withErrorHandler(errorHandler) + * .withBadRecordErrorHandler(errorHandler) * * // finally, if you don't need Kafka metadata, you can drop it.g * .withoutMetadata() // PCollection> @@ -601,13 +603,7 @@ public static ReadSourceDescriptors readSourceDescriptors() { */ public static Write write() { return new AutoValue_KafkaIO_Write.Builder() - .setWriteRecordsTransform( - new AutoValue_KafkaIO_WriteRecords.Builder() - .setProducerConfig(WriteRecords.DEFAULT_PRODUCER_PROPERTIES) - .setEOS(false) - .setNumShards(0) - .setConsumerFactoryFn(KafkaIOUtils.KAFKA_CONSUMER_FACTORY_FN) - .build()) + .setWriteRecordsTransform(writeRecords()) .build(); } @@ -622,6 +618,7 @@ public static WriteRecords writeRecords() { .setEOS(false) .setNumShards(0) .setConsumerFactoryFn(KafkaIOUtils.KAFKA_CONSUMER_FACTORY_FN) + .setBadRecordRouter(BadRecordRouter.THROWING_ROUTER) .build(); } @@ -701,7 +698,7 @@ public abstract static class Read public abstract @Nullable CheckStopReadingFn getCheckStopReadingFn(); @Pure - abstract @Nullable ErrorHandler getErrorHandler(); + abstract @Nullable ErrorHandler getBadRecordErrorHandler(); abstract Builder toBuilder(); @@ -756,7 +753,8 @@ Builder setCheckStopReadingFn( return setCheckStopReadingFn(CheckStopReadingFnWrapper.of(checkStopReadingFn)); } - abstract Builder setErrorHandler(ErrorHandler errorHandler); + abstract Builder setBadRecordErrorHandler( + ErrorHandler badRecordErrorHandler); abstract Read build(); @@ -1326,8 +1324,8 @@ public Read withCheckStopReadingFn( .build(); } - public Read withErrorHandler(ErrorHandler errorHandler) { - return toBuilder().setErrorHandler(errorHandler).build(); + public Read withBadRecordErrorHandler(ErrorHandler badRecordErrorHandler) { + return toBuilder().setBadRecordErrorHandler(badRecordErrorHandler).build(); } /** Returns a {@link PTransform} for PCollection of {@link KV}, dropping Kafka metatdata. */ @@ -1547,7 +1545,7 @@ static class ReadFromKafkaViaUnbounded extends AbstractReadFromKafka @Override public PCollection> expand(PBegin input) { - if (kafkaRead.getErrorHandler() != null) { + if (kafkaRead.getBadRecordErrorHandler() != null) { LOG.warn( "The Legacy implementation of Kafka Read does not support writing malformed" + "messages to an error handler. Use the SDF implementation instead."); @@ -1599,8 +1597,9 @@ public PCollection> expand(PBegin input) { if (kafkaRead.getStopReadTime() != null) { readTransform = readTransform.withBounded(); } - if (kafkaRead.getErrorHandler() != null) { - readTransform = readTransform.withErrorHandler(kafkaRead.getErrorHandler()); + if (kafkaRead.getBadRecordErrorHandler() != null) { + readTransform = + readTransform.withBadRecordErrorHandler(kafkaRead.getBadRecordErrorHandler()); } PCollection output; if (kafkaRead.isDynamicRead()) { @@ -2029,7 +2028,7 @@ public abstract static class ReadSourceDescriptors abstract BadRecordRouter getBadRecordRouter(); @Pure - abstract ErrorHandler getErrorHandler(); + abstract ErrorHandler getBadRecordErrorHandler(); abstract boolean isBounded(); @@ -2078,8 +2077,8 @@ abstract ReadSourceDescriptors.Builder setTimestampPolicyFactory( abstract ReadSourceDescriptors.Builder setBadRecordRouter( BadRecordRouter badRecordRouter); - abstract ReadSourceDescriptors.Builder setErrorHandler( - ErrorHandler errorHandler); + abstract ReadSourceDescriptors.Builder setBadRecordErrorHandler( + ErrorHandler badRecordErrorHandler); abstract ReadSourceDescriptors.Builder setBounded(boolean bounded); @@ -2093,7 +2092,7 @@ public static ReadSourceDescriptors read() { .setCommitOffsetEnabled(false) .setBounded(false) .setBadRecordRouter(BadRecordRouter.THROWING_ROUTER) - .setErrorHandler(new ErrorHandler.DefaultErrorHandler<>()) + .setBadRecordErrorHandler(new ErrorHandler.DefaultErrorHandler<>()) .build() .withProcessingTime() .withMonotonicallyIncreasingWatermarkEstimator(); @@ -2347,10 +2346,11 @@ public ReadSourceDescriptors withConsumerConfigOverrides( return toBuilder().setConsumerConfig(consumerConfig).build(); } - public ReadSourceDescriptors withErrorHandler(ErrorHandler errorHandler) { + public ReadSourceDescriptors withBadRecordErrorHandler( + ErrorHandler errorHandler) { return toBuilder() .setBadRecordRouter(BadRecordRouter.RECORDING_ROUTER) - .setErrorHandler(errorHandler) + .setBadRecordErrorHandler(errorHandler) .build(); } @@ -2448,7 +2448,7 @@ public PCollection> expand(PCollection input.apply( ParDo.of(ReadFromKafkaDoFn.create(this, records)) .withOutputTags(records, TupleTagList.of(BadRecordRouter.BAD_RECORD_TAG))); - getErrorHandler() + getBadRecordErrorHandler() .addErrorCollection( pCollectionTuple .get(BadRecordRouter.BAD_RECORD_TAG) @@ -2562,6 +2562,8 @@ public abstract static class WriteRecords // we shouldn't have to duplicate the same API for similar transforms like {@link Write} and // {@link WriteRecords}. See example at {@link PubsubIO.Write}. + transient ErrorHandler badRecordErrorHandler = new DefaultErrorHandler<>(); + @Pure public abstract @Nullable String getTopic(); @@ -2596,6 +2598,9 @@ public abstract static class WriteRecords public abstract @Nullable SerializableFunction, ? extends Consumer> getConsumerFactoryFn(); + @Pure + public abstract BadRecordRouter getBadRecordRouter(); + abstract Builder toBuilder(); @AutoValue.Builder @@ -2623,6 +2628,8 @@ abstract Builder setPublishTimestampFunction( abstract Builder setConsumerFactoryFn( SerializableFunction, ? extends Consumer> fn); + abstract Builder setBadRecordRouter(BadRecordRouter router); + abstract WriteRecords build(); } @@ -2769,6 +2776,14 @@ public WriteRecords withConsumerFactoryFn( return toBuilder().setConsumerFactoryFn(consumerFactoryFn).build(); } + public WriteRecords withBadRecordErrorHandler( + ErrorHandler badRecordErrorHandler) { + WriteRecords writeRecords = + toBuilder().setBadRecordRouter(BadRecordRouter.RECORDING_ROUTER).build(); + writeRecords.badRecordErrorHandler = badRecordErrorHandler; + return writeRecords; + } + @Override public PDone expand(PCollection> input) { checkArgument( @@ -2780,6 +2795,9 @@ public PDone expand(PCollection> input) { if (isEOS()) { checkArgument(getTopic() != null, "withTopic() is required when isEOS() is true"); + checkArgument( + badRecordErrorHandler instanceof DefaultErrorHandler, + "BadRecordErrorHandling isn't supported with Kafka Exactly Once writing"); KafkaExactlyOnceSink.ensureEOSSupport(); // TODO: Verify that the group_id does not have existing state stored on Kafka unless @@ -2790,7 +2808,18 @@ public PDone expand(PCollection> input) { input.apply(new KafkaExactlyOnceSink<>(this)); } else { - input.apply(ParDo.of(new KafkaWriter<>(this))); + // Even though the errors are the only output from writing to Kafka, we maintain a + // PCollectionTuple + // with a void tag as the 'primary' output for easy forward compatibility + PCollectionTuple pCollectionTuple = + input.apply( + ParDo.of(new KafkaWriter<>(this)) + .withOutputTags( + new TupleTag(), TupleTagList.of(BadRecordRouter.BAD_RECORD_TAG))); + badRecordErrorHandler.addErrorCollection( + pCollectionTuple + .get(BadRecordRouter.BAD_RECORD_TAG) + .setCoder(BadRecord.getCoder(input.getPipeline()))); } return PDone.in(input.getPipeline()); } @@ -3053,6 +3082,15 @@ public Write withProducerConfigUpdates(Map configUpdates) getWriteRecordsTransform().withProducerConfigUpdates(configUpdates)); } + /** + * Configure a {@link BadRecordErrorHandler} for sending records to if they fail to serialize + * when being sent to Kafka. + */ + public Write withBadRecordErrorHandler(ErrorHandler badRecordErrorHandler) { + return withWriteRecordsTransform( + getWriteRecordsTransform().withBadRecordErrorHandler(badRecordErrorHandler)); + } + @Override public PDone expand(PCollection> input) { final String topic = Preconditions.checkStateNotNull(getTopic(), "withTopic() is required"); diff --git a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaWriter.java b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaWriter.java index c0c9772959f..d96efc6f320 100644 --- a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaWriter.java +++ b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaWriter.java @@ -25,6 +25,7 @@ import org.apache.beam.sdk.metrics.Counter; import org.apache.beam.sdk.metrics.SinkMetrics; import org.apache.beam.sdk.transforms.DoFn; +import org.apache.beam.sdk.transforms.errorhandling.BadRecordRouter; import org.apache.beam.sdk.util.Preconditions; import org.apache.kafka.clients.producer.Callback; import org.apache.kafka.clients.producer.KafkaProducer; @@ -32,6 +33,7 @@ import org.apache.kafka.clients.producer.ProducerConfig; import org.apache.kafka.clients.producer.ProducerRecord; import org.apache.kafka.clients.producer.RecordMetadata; +import org.apache.kafka.common.errors.SerializationException; import org.checkerframework.checker.nullness.qual.Nullable; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -57,7 +59,7 @@ public void setup() { // Suppression since errors are tracked in SendCallback(), and checked in finishBundle() @ProcessElement @SuppressWarnings("FutureReturnValueIgnored") - public void processElement(ProcessContext ctx) throws Exception { + public void processElement(ProcessContext ctx, MultiOutputReceiver receiver) throws Exception { Producer producer = Preconditions.checkStateNotNull(this.producer); checkForFailures(); @@ -75,19 +77,30 @@ public void processElement(ProcessContext ctx) throws Exception { topicName = spec.getTopic(); } - @SuppressWarnings({"nullness", "unused"}) // Kafka library not annotated - Future ignored = - producer.send( - new ProducerRecord<>( - topicName, - record.partition(), - timestampMillis, - record.key(), - record.value(), - record.headers()), - callback); - - elementsWritten.inc(); + try { + @SuppressWarnings({"nullness", "unused"}) // Kafka library not annotated + Future ignored = + producer.send( + new ProducerRecord<>( + topicName, + record.partition(), + timestampMillis, + record.key(), + record.value(), + record.headers()), + callback); + + elementsWritten.inc(); + } catch (SerializationException e) { + // This exception should only occur during the key and value deserialization when + // creating the Kafka Record + badRecordRouter.route( + receiver, + record, + null, + e, + "Failure serializing Key or Value of Kakfa record writing from Kafka"); + } } @FinishBundle @@ -110,6 +123,8 @@ public void teardown() { private final WriteRecords spec; private final Map producerConfig; + private final BadRecordRouter badRecordRouter; + private transient @Nullable Producer producer = null; // first exception and number of failures since last invocation of checkForFailures(): private transient @Nullable Exception sendException = null; @@ -122,6 +137,8 @@ public void teardown() { this.producerConfig = new HashMap<>(spec.getProducerConfig()); + this.badRecordRouter = spec.getBadRecordRouter(); + if (spec.getKeySerializer() != null) { this.producerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, spec.getKeySerializer()); } diff --git a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java index a3f1eeb6d7d..9f1d0908b51 100644 --- a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java +++ b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java @@ -397,7 +397,7 @@ public void testKafkaIOSDFWithErrorHandler() throws IOException { .withConsumerConfigUpdates(ImmutableMap.of("auto.offset.reset", "earliest")) .withKeyDeserializer(FailingDeserializer.class) .withValueDeserializer(FailingDeserializer.class) - .withErrorHandler(eh)); + .withBadRecordErrorHandler(eh)); eh.close(); PAssert.thatSingleton(Objects.requireNonNull(eh.getOutput())).isEqualTo(1L); diff --git a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/ReadFromKafkaDoFnTest.java b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/ReadFromKafkaDoFnTest.java index ed91073e469..48b5b060a29 100644 --- a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/ReadFromKafkaDoFnTest.java +++ b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/ReadFromKafkaDoFnTest.java @@ -609,7 +609,8 @@ public void testProcessElementWithDeserializationExceptionRecordingRecordHandler // handler. This will just configure the ReadSourceDesriptors to route the errors to the output // PCollection instead of rethrowing. ReadSourceDescriptors descriptors = - makeFailingReadSourceDescriptor(consumer).withErrorHandler(new DefaultErrorHandler<>()); + makeFailingReadSourceDescriptor(consumer) + .withBadRecordErrorHandler(new DefaultErrorHandler<>()); ReadFromKafkaDoFn dofnInstance = ReadFromKafkaDoFn.create(descriptors, RECORDS); From 7e3135ef04038dc6ad6f7883cf296b4b86566284 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Wed, 29 Nov 2023 17:15:34 -0500 Subject: [PATCH 63/80] fix failing test case --- .../io/kafka/KafkaIOReadImplementationCompatibility.java | 1 + .../org/apache/beam/sdk/io/kafka/KafkaIOExternalTest.java | 6 +----- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIOReadImplementationCompatibility.java b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIOReadImplementationCompatibility.java index b779de1d9cf..a2cc9aaeb4d 100644 --- a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIOReadImplementationCompatibility.java +++ b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIOReadImplementationCompatibility.java @@ -111,6 +111,7 @@ Object getDefaultValue() { KEY_DESERIALIZER_PROVIDER, VALUE_DESERIALIZER_PROVIDER, CHECK_STOP_READING_FN(SDF), + BAD_RECORD_ERROR_HANDLER(SDF), ; @Nonnull private final ImmutableSet supportedImplementations; diff --git a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOExternalTest.java b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOExternalTest.java index 2ccf7dcc3a9..298bcea554f 100644 --- a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOExternalTest.java +++ b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOExternalTest.java @@ -352,11 +352,7 @@ public void testConstructKafkaWrite() throws Exception { RunnerApi.PTransform writeParDo = result .getComponents() - .getTransformsOrThrow( - result - .getComponents() - .getTransformsOrThrow(writeComposite.getSubtransforms(0)) - .getSubtransforms(0)); + .getTransformsOrThrow(writeComposite.getSubtransforms(0)); RunnerApi.ParDoPayload parDoPayload = RunnerApi.ParDoPayload.parseFrom(writeParDo.getSpec().getPayload()); From 4759f7223486cec23ad6bc7156ef2c5635daf752 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Thu, 30 Nov 2023 11:20:52 -0500 Subject: [PATCH 64/80] Add tests for writing to kafka with exception handling --- .../apache/beam/sdk/io/kafka/KafkaWriter.java | 3 +- .../sdk/io/kafka/KafkaIOExternalTest.java | 4 +- .../apache/beam/sdk/io/kafka/KafkaIOIT.java | 37 +++++- .../apache/beam/sdk/io/kafka/KafkaIOTest.java | 108 +++++++++++++++--- 4 files changed, 134 insertions(+), 18 deletions(-) diff --git a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaWriter.java b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaWriter.java index d96efc6f320..4f4663aa8cc 100644 --- a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaWriter.java +++ b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaWriter.java @@ -93,7 +93,8 @@ public void processElement(ProcessContext ctx, MultiOutputReceiver receiver) thr elementsWritten.inc(); } catch (SerializationException e) { // This exception should only occur during the key and value deserialization when - // creating the Kafka Record + // creating the Kafka Record. We can catch the exception here as producer.send serializes + // the record before starting the future. badRecordRouter.route( receiver, record, diff --git a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOExternalTest.java b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOExternalTest.java index 298bcea554f..38bf723a15a 100644 --- a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOExternalTest.java +++ b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOExternalTest.java @@ -350,9 +350,7 @@ public void testConstructKafkaWrite() throws Exception { RunnerApi.PTransform writeComposite = result.getComponents().getTransformsOrThrow(transform.getSubtransforms(1)); RunnerApi.PTransform writeParDo = - result - .getComponents() - .getTransformsOrThrow(writeComposite.getSubtransforms(0)); + result.getComponents().getTransformsOrThrow(writeComposite.getSubtransforms(0)); RunnerApi.ParDoPayload parDoPayload = RunnerApi.ParDoPayload.parseFrom(writeParDo.getSpec().getPayload()); diff --git a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java index 9f1d0908b51..904e939763a 100644 --- a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java +++ b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java @@ -44,6 +44,7 @@ import org.apache.beam.sdk.io.Read; import org.apache.beam.sdk.io.common.IOITHelper; import org.apache.beam.sdk.io.common.IOTestPipelineOptions; +import org.apache.beam.sdk.io.kafka.KafkaIOTest.FailingLongSerializer; import org.apache.beam.sdk.io.kafka.ReadFromKafkaDoFnTest.FailingDeserializer; import org.apache.beam.sdk.io.synthetic.SyntheticBoundedSource; import org.apache.beam.sdk.io.synthetic.SyntheticSourceOptions; @@ -362,7 +363,7 @@ public void processElement(@Element String element, OutputReceiver outpu // This test verifies that bad data from Kafka is properly sent to the error handler @Test - public void testKafkaIOSDFWithErrorHandler() throws IOException { + public void testKafkaIOSDFReadWithErrorHandler() throws IOException { writePipeline .apply(Create.of(KV.of("key", "val"))) .apply( @@ -409,6 +410,40 @@ public void testKafkaIOSDFWithErrorHandler() throws IOException { assertNotEquals(PipelineResult.State.FAILED, readState); } + @Test + public void testKafkaIOWriteWithErrorHandler() throws IOException { + PTransform, PCollection> sinkTransform = + new PTransform, PCollection>() { + @Override + public @UnknownKeyFor @NonNull @Initialized PCollection expand( + PCollection input) { + return input + .apply("Window", Window.into(CalendarWindows.years(1))) + .apply("Combine", Combine.globally(Count.combineFn()).withoutDefaults()); + } + }; + + BadRecordErrorHandler> eh = + writePipeline.registerBadRecordErrorHandler(sinkTransform); + writePipeline + .apply("Create single KV", Create.of(KV.of("key", 4L))) + .apply( + "Write to Kafka", + KafkaIO.write() + .withBootstrapServers(options.getKafkaBootstrapServerAddresses()) + .withKeySerializer(StringSerializer.class) + .withValueSerializer(FailingLongSerializer.class) + .withTopic(options.getKafkaTopic() + "-failingSerialization") + .withBadRecordErrorHandler(eh)); + eh.close(); + + PAssert.thatSingleton(Objects.requireNonNull(eh.getOutput())).isEqualTo(1L); + + PipelineResult writeResult = writePipeline.run(); + PipelineResult.State writeState = writeResult.waitUntilFinish(); + assertNotEquals(PipelineResult.State.FAILED, writeState); + } + // This test roundtrips a single KV to verify that externalWithMetadata // can handle null keys and values correctly. @Test diff --git a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOTest.java b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOTest.java index aeb5818e913..6e39fcab0e0 100644 --- a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOTest.java +++ b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOTest.java @@ -51,6 +51,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Optional; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; @@ -87,6 +88,7 @@ import org.apache.beam.sdk.testing.ExpectedLogs; import org.apache.beam.sdk.testing.PAssert; import org.apache.beam.sdk.testing.TestPipeline; +import org.apache.beam.sdk.transforms.Combine; import org.apache.beam.sdk.transforms.Count; import org.apache.beam.sdk.transforms.Create; import org.apache.beam.sdk.transforms.Distinct; @@ -95,11 +97,15 @@ import org.apache.beam.sdk.transforms.MapElements; import org.apache.beam.sdk.transforms.Max; import org.apache.beam.sdk.transforms.Min; +import org.apache.beam.sdk.transforms.PTransform; import org.apache.beam.sdk.transforms.ParDo; import org.apache.beam.sdk.transforms.SerializableFunction; import org.apache.beam.sdk.transforms.Values; import org.apache.beam.sdk.transforms.display.DisplayData; +import org.apache.beam.sdk.transforms.errorhandling.BadRecord; +import org.apache.beam.sdk.transforms.errorhandling.ErrorHandler.BadRecordErrorHandler; import org.apache.beam.sdk.transforms.windowing.BoundedWindow; +import org.apache.beam.sdk.transforms.windowing.CalendarWindows; import org.apache.beam.sdk.transforms.windowing.FixedWindows; import org.apache.beam.sdk.transforms.windowing.Window; import org.apache.beam.sdk.util.CoderUtils; @@ -121,9 +127,12 @@ import org.apache.kafka.clients.producer.Producer; import org.apache.kafka.clients.producer.ProducerConfig; import org.apache.kafka.clients.producer.ProducerRecord; +import org.apache.kafka.clients.producer.internals.DefaultPartitioner; +import org.apache.kafka.common.Cluster; import org.apache.kafka.common.KafkaException; import org.apache.kafka.common.PartitionInfo; import org.apache.kafka.common.TopicPartition; +import org.apache.kafka.common.errors.SerializationException; import org.apache.kafka.common.header.Header; import org.apache.kafka.common.header.Headers; import org.apache.kafka.common.header.internals.RecordHeader; @@ -136,7 +145,10 @@ import org.apache.kafka.common.serialization.LongSerializer; import org.apache.kafka.common.serialization.Serializer; import org.apache.kafka.common.utils.Utils; +import org.checkerframework.checker.initialization.qual.Initialized; +import org.checkerframework.checker.nullness.qual.NonNull; import org.checkerframework.checker.nullness.qual.Nullable; +import org.checkerframework.checker.nullness.qual.UnknownKeyFor; import org.hamcrest.collection.IsIterableContainingInAnyOrder; import org.hamcrest.collection.IsIterableWithSize; import org.joda.time.Duration; @@ -1379,7 +1391,7 @@ public void testSink() throws Exception { int numElements = 1000; - try (MockProducerWrapper producerWrapper = new MockProducerWrapper()) { + try (MockProducerWrapper producerWrapper = new MockProducerWrapper(new LongSerializer())) { ProducerSendCompletionThread completionThread = new ProducerSendCompletionThread(producerWrapper.mockProducer).start(); @@ -1404,13 +1416,75 @@ public void testSink() throws Exception { } } + public static class FailingLongSerializer implements Serializer { + // enables instantiation by registrys + public FailingLongSerializer() {} + + @Override + public byte[] serialize(String topic, Long data) { + throw new SerializationException("ExpectedSerializationException"); + } + } + + @Test + public void testSinkWithSerializationErrors() throws Exception { + // Attempt to write 10 elements to Kafka, but they will all fail to serialize, and be sent to + // the DLQ + + int numElements = 10; + + try (MockProducerWrapper producerWrapper = + new MockProducerWrapper(new FailingLongSerializer())) { + + ProducerSendCompletionThread completionThread = + new ProducerSendCompletionThread(producerWrapper.mockProducer).start(); + + String topic = "test"; + + PTransform, PCollection> sinkTransform = + new PTransform, PCollection>() { + @Override + public @UnknownKeyFor @NonNull @Initialized PCollection expand( + PCollection input) { + return input + .apply("Window", Window.into(CalendarWindows.years(1))) + .apply( + "Combine", Combine.globally(Count.combineFn()).withoutDefaults()); + } + }; + + BadRecordErrorHandler> eh = p.registerBadRecordErrorHandler(sinkTransform); + + p.apply(mkKafkaReadTransform(numElements, new ValueAsTimestampFn()).withoutMetadata()) + .apply( + KafkaIO.write() + .withBootstrapServers("none") + .withTopic(topic) + .withKeySerializer(IntegerSerializer.class) + .withValueSerializer(FailingLongSerializer.class) + .withInputTimestamp() + .withProducerFactoryFn(new ProducerFactoryFn(producerWrapper.producerKey)) + .withBadRecordErrorHandler(eh)); + + eh.close(); + + PAssert.thatSingleton(Objects.requireNonNull(eh.getOutput())).isEqualTo(10L); + + p.run(); + + completionThread.shutdown(); + + verifyProducerRecords(producerWrapper.mockProducer, topic, 0, false, true); + } + } + @Test public void testValuesSink() throws Exception { // similar to testSink(), but use values()' interface. int numElements = 1000; - try (MockProducerWrapper producerWrapper = new MockProducerWrapper()) { + try (MockProducerWrapper producerWrapper = new MockProducerWrapper(new LongSerializer())) { ProducerSendCompletionThread completionThread = new ProducerSendCompletionThread(producerWrapper.mockProducer).start(); @@ -1442,7 +1516,7 @@ public void testRecordsSink() throws Exception { int numElements = 1000; - try (MockProducerWrapper producerWrapper = new MockProducerWrapper()) { + try (MockProducerWrapper producerWrapper = new MockProducerWrapper(new LongSerializer())) { ProducerSendCompletionThread completionThread = new ProducerSendCompletionThread(producerWrapper.mockProducer).start(); @@ -1474,7 +1548,7 @@ public void testSinkToMultipleTopics() throws Exception { // Set different output topic names int numElements = 1000; - try (MockProducerWrapper producerWrapper = new MockProducerWrapper()) { + try (MockProducerWrapper producerWrapper = new MockProducerWrapper(new LongSerializer())) { ProducerSendCompletionThread completionThread = new ProducerSendCompletionThread(producerWrapper.mockProducer).start(); @@ -1519,7 +1593,7 @@ public void testKafkaWriteHeaders() throws Exception { // Set different output topic names int numElements = 1; SimpleEntry header = new SimpleEntry<>("header_key", "header_value"); - try (MockProducerWrapper producerWrapper = new MockProducerWrapper()) { + try (MockProducerWrapper producerWrapper = new MockProducerWrapper(new LongSerializer())) { ProducerSendCompletionThread completionThread = new ProducerSendCompletionThread(producerWrapper.mockProducer).start(); @@ -1562,7 +1636,7 @@ public void testKafkaWriteHeaders() throws Exception { public void testSinkProducerRecordsWithCustomTS() throws Exception { int numElements = 1000; - try (MockProducerWrapper producerWrapper = new MockProducerWrapper()) { + try (MockProducerWrapper producerWrapper = new MockProducerWrapper(new LongSerializer())) { ProducerSendCompletionThread completionThread = new ProducerSendCompletionThread(producerWrapper.mockProducer).start(); @@ -1601,7 +1675,7 @@ public void testSinkProducerRecordsWithCustomTS() throws Exception { public void testSinkProducerRecordsWithCustomPartition() throws Exception { int numElements = 1000; - try (MockProducerWrapper producerWrapper = new MockProducerWrapper()) { + try (MockProducerWrapper producerWrapper = new MockProducerWrapper(new LongSerializer())) { ProducerSendCompletionThread completionThread = new ProducerSendCompletionThread(producerWrapper.mockProducer).start(); @@ -1725,7 +1799,7 @@ public void testExactlyOnceSink() { int numElements = 1000; - try (MockProducerWrapper producerWrapper = new MockProducerWrapper()) { + try (MockProducerWrapper producerWrapper = new MockProducerWrapper(new LongSerializer())) { ProducerSendCompletionThread completionThread = new ProducerSendCompletionThread(producerWrapper.mockProducer).start(); @@ -1803,7 +1877,7 @@ public void testSinkWithSendErrors() throws Throwable { int numElements = 1000; - try (MockProducerWrapper producerWrapper = new MockProducerWrapper()) { + try (MockProducerWrapper producerWrapper = new MockProducerWrapper(new LongSerializer())) { ProducerSendCompletionThread completionThreadWithErrors = new ProducerSendCompletionThread(producerWrapper.mockProducer, 10, 100).start(); @@ -1993,7 +2067,7 @@ public void testSourceWithPatternDisplayData() { @Test public void testSinkDisplayData() { - try (MockProducerWrapper producerWrapper = new MockProducerWrapper()) { + try (MockProducerWrapper producerWrapper = new MockProducerWrapper(new LongSerializer())) { KafkaIO.Write write = KafkaIO.write() .withBootstrapServers("myServerA:9092,myServerB:9092") @@ -2017,7 +2091,7 @@ public void testSinkMetrics() throws Exception { int numElements = 1000; - try (MockProducerWrapper producerWrapper = new MockProducerWrapper()) { + try (MockProducerWrapper producerWrapper = new MockProducerWrapper(new LongSerializer())) { ProducerSendCompletionThread completionThread = new ProducerSendCompletionThread(producerWrapper.mockProducer).start(); @@ -2109,14 +2183,22 @@ private static class MockProducerWrapper implements AutoCloseable { } } - MockProducerWrapper() { + MockProducerWrapper(Serializer valueSerializer) { producerKey = String.valueOf(ThreadLocalRandom.current().nextLong()); mockProducer = new MockProducer( + Cluster.empty() + .withPartitions( + ImmutableMap.of( + new TopicPartition("test", 0), + new PartitionInfo("test", 0, null, null, null), + new TopicPartition("test", 1), + new PartitionInfo("test", 1, null, null, null))), false, // disable synchronous completion of send. see ProducerSendCompletionThread // below. + new DefaultPartitioner(), new IntegerSerializer(), - new LongSerializer()) { + valueSerializer) { // override flush() so that it does not complete all the waiting sends, giving a chance // to From d7b48f554ec89d455d1355fdcbaaa6e83b45ac76 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Thu, 30 Nov 2023 13:41:05 -0500 Subject: [PATCH 65/80] fix sdf testing --- .../gradle/kafka/KafkaTestUtilities.groovy | 3 +- sdks/java/io/kafka/kafka-01103/build.gradle | 1 + sdks/java/io/kafka/kafka-100/build.gradle | 3 +- sdks/java/io/kafka/kafka-111/build.gradle | 1 + sdks/java/io/kafka/kafka-201/build.gradle | 1 + sdks/java/io/kafka/kafka-211/build.gradle | 1 + sdks/java/io/kafka/kafka-222/build.gradle | 1 + sdks/java/io/kafka/kafka-231/build.gradle | 1 + sdks/java/io/kafka/kafka-241/build.gradle | 1 + sdks/java/io/kafka/kafka-251/build.gradle | 1 + .../io/kafka/kafka-integration-test.gradle | 2 +- .../apache/beam/sdk/io/kafka/KafkaIOIT.java | 35 ++++++++----------- .../apache/beam/sdk/io/kafka/KafkaIOTest.java | 5 +++ 13 files changed, 33 insertions(+), 23 deletions(-) diff --git a/buildSrc/src/main/groovy/org/apache/beam/gradle/kafka/KafkaTestUtilities.groovy b/buildSrc/src/main/groovy/org/apache/beam/gradle/kafka/KafkaTestUtilities.groovy index cd2875fdb51..bb08e79edd3 100644 --- a/buildSrc/src/main/groovy/org/apache/beam/gradle/kafka/KafkaTestUtilities.groovy +++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/kafka/KafkaTestUtilities.groovy @@ -40,7 +40,7 @@ class KafkaTestUtilities { '"keySizeBytes": "10",' + '"valueSizeBytes": "90"' + '}', - "--readTimeout=120", + "--readTimeout=60", "--kafkaTopic=beam", "--withTestcontainers=true", "--kafkaContainerVersion=5.5.2", @@ -56,6 +56,7 @@ class KafkaTestUtilities { excludeTestsMatching "*SDFResumesCorrectly" //Kafka SDF does not work for kafka versions <2.0.1 excludeTestsMatching "*StopReadingFunction" //Kafka SDF does not work for kafka versions <2.0.1 excludeTestsMatching "*WatermarkUpdateWithSparseMessages" //Kafka SDF does not work for kafka versions <2.0.1 + excludeTestsMatching "*KafkaIOSDFReadWithErrorHandler" } } } diff --git a/sdks/java/io/kafka/kafka-01103/build.gradle b/sdks/java/io/kafka/kafka-01103/build.gradle index a0fa372397a..6e1bdf94671 100644 --- a/sdks/java/io/kafka/kafka-01103/build.gradle +++ b/sdks/java/io/kafka/kafka-01103/build.gradle @@ -18,6 +18,7 @@ project.ext { delimited="0.11.0.3" undelimited="01103" + sdfCompatable=false } apply from: "../kafka-integration-test.gradle" \ No newline at end of file diff --git a/sdks/java/io/kafka/kafka-100/build.gradle b/sdks/java/io/kafka/kafka-100/build.gradle index 15ce8c0deef..fe3ca88c46d 100644 --- a/sdks/java/io/kafka/kafka-100/build.gradle +++ b/sdks/java/io/kafka/kafka-100/build.gradle @@ -18,6 +18,7 @@ project.ext { delimited="1.0.0" undelimited="100" + sdfCompatable=false } -apply from: "../kafka-integration-test.gradle" \ No newline at end of file +apply from: "../kafka-integration-test.gradle" diff --git a/sdks/java/io/kafka/kafka-111/build.gradle b/sdks/java/io/kafka/kafka-111/build.gradle index fee4c382ed4..829a2cf7ac9 100644 --- a/sdks/java/io/kafka/kafka-111/build.gradle +++ b/sdks/java/io/kafka/kafka-111/build.gradle @@ -18,6 +18,7 @@ project.ext { delimited="1.1.1" undelimited="111" + sdfCompatable=false } apply from: "../kafka-integration-test.gradle" \ No newline at end of file diff --git a/sdks/java/io/kafka/kafka-201/build.gradle b/sdks/java/io/kafka/kafka-201/build.gradle index d395d0aa626..32b5b7dc896 100644 --- a/sdks/java/io/kafka/kafka-201/build.gradle +++ b/sdks/java/io/kafka/kafka-201/build.gradle @@ -18,6 +18,7 @@ project.ext { delimited="2.0.1" undelimited="201" + sdfCompatable=true } apply from: "../kafka-integration-test.gradle" \ No newline at end of file diff --git a/sdks/java/io/kafka/kafka-211/build.gradle b/sdks/java/io/kafka/kafka-211/build.gradle index 4de07193b5a..2e331018960 100644 --- a/sdks/java/io/kafka/kafka-211/build.gradle +++ b/sdks/java/io/kafka/kafka-211/build.gradle @@ -18,6 +18,7 @@ project.ext { delimited="2.1.1" undelimited="211" + sdfCompatable=true } apply from: "../kafka-integration-test.gradle" \ No newline at end of file diff --git a/sdks/java/io/kafka/kafka-222/build.gradle b/sdks/java/io/kafka/kafka-222/build.gradle index 57de58e8189..93c4feb0a0f 100644 --- a/sdks/java/io/kafka/kafka-222/build.gradle +++ b/sdks/java/io/kafka/kafka-222/build.gradle @@ -18,6 +18,7 @@ project.ext { delimited="2.2.2" undelimited="222" + sdfCompatable=true } apply from: "../kafka-integration-test.gradle" \ No newline at end of file diff --git a/sdks/java/io/kafka/kafka-231/build.gradle b/sdks/java/io/kafka/kafka-231/build.gradle index 3682791c5b6..7baa0c4c72a 100644 --- a/sdks/java/io/kafka/kafka-231/build.gradle +++ b/sdks/java/io/kafka/kafka-231/build.gradle @@ -18,6 +18,7 @@ project.ext { delimited="2.3.1" undelimited="231" + sdfCompatable=true } apply from: "../kafka-integration-test.gradle" \ No newline at end of file diff --git a/sdks/java/io/kafka/kafka-241/build.gradle b/sdks/java/io/kafka/kafka-241/build.gradle index 358c95aeb2f..3b01bd6dfd4 100644 --- a/sdks/java/io/kafka/kafka-241/build.gradle +++ b/sdks/java/io/kafka/kafka-241/build.gradle @@ -18,6 +18,7 @@ project.ext { delimited="2.4.1" undelimited="241" + sdfCompatable=true } apply from: "../kafka-integration-test.gradle" \ No newline at end of file diff --git a/sdks/java/io/kafka/kafka-251/build.gradle b/sdks/java/io/kafka/kafka-251/build.gradle index f291ecccc36..b4f62630778 100644 --- a/sdks/java/io/kafka/kafka-251/build.gradle +++ b/sdks/java/io/kafka/kafka-251/build.gradle @@ -18,6 +18,7 @@ project.ext { delimited="2.5.1" undelimited="251" + sdfCompatable=true } apply from: "../kafka-integration-test.gradle" \ No newline at end of file diff --git a/sdks/java/io/kafka/kafka-integration-test.gradle b/sdks/java/io/kafka/kafka-integration-test.gradle index 778f8a3c456..3fbb3ee2777 100644 --- a/sdks/java/io/kafka/kafka-integration-test.gradle +++ b/sdks/java/io/kafka/kafka-integration-test.gradle @@ -39,4 +39,4 @@ dependencies { configurations.create("kafkaVersion$undelimited") -tasks.register("kafkaVersion${undelimited}BatchIT",KafkaTestUtilities.KafkaBatchIT, project.ext.delimited, project.ext.undelimited, false, configurations, project) \ No newline at end of file +tasks.register("kafkaVersion${undelimited}BatchIT",KafkaTestUtilities.KafkaBatchIT, project.ext.delimited, project.ext.undelimited, project.ext.sdfCompatable, configurations, project) \ No newline at end of file diff --git a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java index 904e939763a..f6fd5c7de4f 100644 --- a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java +++ b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java @@ -133,8 +133,6 @@ public class KafkaIOIT { private static final String RUN_TIME_METRIC_NAME = "run_time"; - private static final String READ_ELEMENT_METRIC_NAME = "kafka_read_element_count"; - private static final String NAMESPACE = KafkaIOIT.class.getName(); private static final String TEST_ID = UUID.randomUUID().toString(); @@ -576,9 +574,7 @@ public void testKafkaWithDynamicPartitions() throws IOException { public void testKafkaWithStopReadingFunction() { AlwaysStopCheckStopReadingFn checkStopReadingFn = new AlwaysStopCheckStopReadingFn(); - PipelineResult readResult = runWithStopReadingFn(checkStopReadingFn, "stop-reading"); - - assertEquals(-1, readElementMetric(readResult, NAMESPACE, READ_ELEMENT_METRIC_NAME)); + runWithStopReadingFn(checkStopReadingFn, "stop-reading", 0L); } private static class AlwaysStopCheckStopReadingFn implements CheckStopReadingFn { @@ -592,11 +588,7 @@ public Boolean apply(TopicPartition input) { public void testKafkaWithDelayedStopReadingFunction() { DelayedCheckStopReadingFn checkStopReadingFn = new DelayedCheckStopReadingFn(); - PipelineResult readResult = runWithStopReadingFn(checkStopReadingFn, "delayed-stop-reading"); - - assertEquals( - sourceOptions.numRecords, - readElementMetric(readResult, NAMESPACE, READ_ELEMENT_METRIC_NAME)); + runWithStopReadingFn(checkStopReadingFn, "delayed-stop-reading", sourceOptions.numRecords); } public static final Schema KAFKA_TOPIC_SCHEMA = @@ -744,7 +736,7 @@ public Boolean apply(TopicPartition input) { } } - private PipelineResult runWithStopReadingFn(CheckStopReadingFn function, String topicSuffix) { + private void runWithStopReadingFn(CheckStopReadingFn function, String topicSuffix, Long expectedCount) { writePipeline .apply("Generate records", Read.from(new SyntheticBoundedSource(sourceOptions))) .apply("Measure write time", ParDo.of(new TimeMonitor<>(NAMESPACE, WRITE_TIME_METRIC_NAME))) @@ -753,21 +745,29 @@ private PipelineResult runWithStopReadingFn(CheckStopReadingFn function, String writeToKafka().withTopic(options.getKafkaTopic() + "-" + topicSuffix)); readPipeline.getOptions().as(Options.class).setStreaming(true); - readPipeline + PCollection count = readPipeline .apply( "Read from unbounded Kafka", readFromKafka() .withTopic(options.getKafkaTopic() + "-" + topicSuffix) .withCheckStopReadingFn(function)) - .apply("Measure read time", ParDo.of(new TimeMonitor<>(NAMESPACE, READ_TIME_METRIC_NAME))); + .apply("Measure read time", ParDo.of(new TimeMonitor<>(NAMESPACE, READ_TIME_METRIC_NAME))) + .apply("Window", Window.into(CalendarWindows.years(1))) + .apply( + "Counting element", + Combine.globally(Count.>combineFn()).withoutDefaults()); + + if (expectedCount == 0L) { + PAssert.that(count).empty(); + } else { + PAssert.thatSingleton(count).isEqualTo(expectedCount); + } PipelineResult writeResult = writePipeline.run(); writeResult.waitUntilFinish(); PipelineResult readResult = readPipeline.run(); readResult.waitUntilFinish(Duration.standardSeconds(options.getReadTimeout())); - - return readResult; } @Test @@ -845,11 +845,6 @@ public void processElement( } } - private long readElementMetric(PipelineResult result, String namespace, String name) { - MetricsReader metricsReader = new MetricsReader(result, namespace); - return metricsReader.getCounterMetric(name); - } - private Set readMetrics(PipelineResult writeResult, PipelineResult readResult) { BiFunction supplier = (reader, metricName) -> { diff --git a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOTest.java b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOTest.java index 6e39fcab0e0..eff9056192d 100644 --- a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOTest.java +++ b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOTest.java @@ -1424,6 +1424,11 @@ public FailingLongSerializer() {} public byte[] serialize(String topic, Long data) { throw new SerializationException("ExpectedSerializationException"); } + + @Override + public void configure(Map configs, boolean isKey) { + // intentionally left blank for compatibility with older kafka versions + } } @Test From e978f0c5f2046348c85f1678181a1537055ae4f4 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Thu, 30 Nov 2023 13:49:57 -0500 Subject: [PATCH 66/80] fix sdf testing --- .../src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java index f6fd5c7de4f..13ed2fdc3bc 100644 --- a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java +++ b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java @@ -778,7 +778,7 @@ public void testWatermarkUpdateWithSparseMessages() throws IOException, Interrup String topicName = "SparseDataTopicPartition-" + UUID.randomUUID(); Map records = new HashMap<>(); - for (int i = 0; i < 5; i++) { + for (int i = 1; i <= 5; i++) { records.put(i, String.valueOf(i)); } From 2d760705159c6129f72e525cd75736e768216572 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Thu, 30 Nov 2023 14:12:51 -0500 Subject: [PATCH 67/80] spotless --- .../apache/beam/sdk/io/kafka/KafkaIOIT.java | 27 ++++++++++--------- 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java index 13ed2fdc3bc..ada91167b7b 100644 --- a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java +++ b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java @@ -736,7 +736,8 @@ public Boolean apply(TopicPartition input) { } } - private void runWithStopReadingFn(CheckStopReadingFn function, String topicSuffix, Long expectedCount) { + private void runWithStopReadingFn( + CheckStopReadingFn function, String topicSuffix, Long expectedCount) { writePipeline .apply("Generate records", Read.from(new SyntheticBoundedSource(sourceOptions))) .apply("Measure write time", ParDo.of(new TimeMonitor<>(NAMESPACE, WRITE_TIME_METRIC_NAME))) @@ -745,17 +746,19 @@ private void runWithStopReadingFn(CheckStopReadingFn function, String topicSuffi writeToKafka().withTopic(options.getKafkaTopic() + "-" + topicSuffix)); readPipeline.getOptions().as(Options.class).setStreaming(true); - PCollection count = readPipeline - .apply( - "Read from unbounded Kafka", - readFromKafka() - .withTopic(options.getKafkaTopic() + "-" + topicSuffix) - .withCheckStopReadingFn(function)) - .apply("Measure read time", ParDo.of(new TimeMonitor<>(NAMESPACE, READ_TIME_METRIC_NAME))) - .apply("Window", Window.into(CalendarWindows.years(1))) - .apply( - "Counting element", - Combine.globally(Count.>combineFn()).withoutDefaults()); + PCollection count = + readPipeline + .apply( + "Read from unbounded Kafka", + readFromKafka() + .withTopic(options.getKafkaTopic() + "-" + topicSuffix) + .withCheckStopReadingFn(function)) + .apply( + "Measure read time", ParDo.of(new TimeMonitor<>(NAMESPACE, READ_TIME_METRIC_NAME))) + .apply("Window", Window.into(CalendarWindows.years(1))) + .apply( + "Counting element", + Combine.globally(Count.>combineFn()).withoutDefaults()); if (expectedCount == 0L) { PAssert.that(count).empty(); From a9eb5afcc5371cd4df2f11ee4c24b5099563040b Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Thu, 30 Nov 2023 15:42:41 -0500 Subject: [PATCH 68/80] deflake tests --- .../src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java index ada91167b7b..f0e316cd028 100644 --- a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java +++ b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java @@ -728,7 +728,7 @@ private static class DelayedCheckStopReadingFn implements CheckStopReadingFn { @Override public Boolean apply(TopicPartition input) { - if (checkCount >= 5) { + if (checkCount >= 10) { return true; } checkCount++; @@ -820,7 +820,7 @@ public void testWatermarkUpdateWithSparseMessages() throws IOException, Interrup PipelineResult readResult = sdfReadPipeline.run(); - Thread.sleep(options.getReadTimeout() * 1000); + Thread.sleep(options.getReadTimeout() * 1000 * 2); for (String value : records.values()) { kafkaIOITExpectedLogs.verifyError(value); From edd725da6b27e5206e6a0bfa7a1fc7d56d10a0c7 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Fri, 1 Dec 2023 15:13:18 -0500 Subject: [PATCH 69/80] add error handling to kafka streaming example update error handler to be serializable to support using it as a member of an auto-value based PTransform --- .../apache/beam/examples/KafkaStreaming.java | 67 ++++++++++++++++--- .../errorhandling/ErrorHandler.java | 15 +++-- .../org/apache/beam/sdk/io/kafka/KafkaIO.java | 41 +++++++----- .../apache/beam/sdk/io/kafka/KafkaIOIT.java | 36 ++++------ 4 files changed, 107 insertions(+), 52 deletions(-) diff --git a/examples/java/src/main/java/org/apache/beam/examples/KafkaStreaming.java b/examples/java/src/main/java/org/apache/beam/examples/KafkaStreaming.java index 34a4b646555..9b56274bb02 100644 --- a/examples/java/src/main/java/org/apache/beam/examples/KafkaStreaming.java +++ b/examples/java/src/main/java/org/apache/beam/examples/KafkaStreaming.java @@ -49,8 +49,11 @@ import org.apache.beam.sdk.options.PipelineOptionsFactory; import org.apache.beam.sdk.transforms.Combine; import org.apache.beam.sdk.transforms.DoFn; +import org.apache.beam.sdk.transforms.PTransform; import org.apache.beam.sdk.transforms.ParDo; import org.apache.beam.sdk.transforms.Sum; +import org.apache.beam.sdk.transforms.errorhandling.BadRecord; +import org.apache.beam.sdk.transforms.errorhandling.ErrorHandler.BadRecordErrorHandler; import org.apache.beam.sdk.transforms.windowing.AfterProcessingTime; import org.apache.beam.sdk.transforms.windowing.FixedWindows; import org.apache.beam.sdk.transforms.windowing.IntervalWindow; @@ -60,6 +63,8 @@ import org.apache.beam.sdk.transforms.windowing.Window; import org.apache.beam.sdk.values.KV; import org.apache.beam.sdk.values.PCollection; +import org.apache.kafka.common.errors.SerializationException; +import org.apache.kafka.common.serialization.Deserializer; import org.apache.kafka.common.serialization.IntegerDeserializer; import org.apache.kafka.common.serialization.IntegerSerializer; import org.apache.kafka.common.serialization.StringDeserializer; @@ -97,7 +102,7 @@ public interface KafkaStreamingOptions extends PipelineOptions { * to use your own Kafka server. */ @Description("Kafka server host") - @Default.String("kafka_server:9092") + @Default.String("localhost:9092") String getKafkaHost(); void setKafkaHost(String value); @@ -208,15 +213,22 @@ public void run() { // Start reading form Kafka with the latest offset consumerConfig.put("auto.offset.reset", "latest"); - PCollection> pCollection = - pipeline.apply( - KafkaIO.read() - .withBootstrapServers(options.getKafkaHost()) - .withTopic(TOPIC_NAME) - .withKeyDeserializer(StringDeserializer.class) - .withValueDeserializer(IntegerDeserializer.class) - .withConsumerConfigUpdates(consumerConfig) - .withoutMetadata()); + // Register an error handler for any deserialization errors. + // Errors are simulated with an intentionally failing deserializer + PCollection> pCollection; + try (BadRecordErrorHandler> errorHandler = + pipeline.registerBadRecordErrorHandler(new LogErrors())) { + pCollection = + pipeline.apply( + KafkaIO.read() + .withBootstrapServers(options.getKafkaHost()) + .withTopic(TOPIC_NAME) + .withKeyDeserializer(StringDeserializer.class) + .withValueDeserializer(IntermittentlyFailingIntegerDeserializer.class) + .withConsumerConfigUpdates(consumerConfig) + .withBadRecordErrorHandler(errorHandler) + .withoutMetadata()); + } pCollection // Apply a window and a trigger ourput repeatedly. @@ -317,4 +329,39 @@ public void processElement(ProcessContext c, IntervalWindow w) throws Exception c.output(c.element()); } } + + // Simple PTransform to log Error information + static class LogErrors extends PTransform, PCollection> { + + @Override + public PCollection expand(PCollection input) { + return input.apply("Log Errors", ParDo.of(new LogErrorFn())); + } + + static class LogErrorFn extends DoFn { + @ProcessElement + public void processElement(@Element BadRecord record, OutputReceiver receiver) { + System.out.println(record); + receiver.output(record); + } + } + } + + // Intentionally failing deserializer to simulate bad data from Kafka + public static class IntermittentlyFailingIntegerDeserializer implements Deserializer { + + public static final IntegerDeserializer integerDeserializer = new IntegerDeserializer(); + public int deserializeCount = 0; + + public IntermittentlyFailingIntegerDeserializer() {} + + @Override + public Integer deserialize(String topic, byte[] data) { + deserializeCount++; + if (deserializeCount % 10 == 0) { + throw new SerializationException("Expected Serialization Exception"); + } + return integerDeserializer.deserialize(topic, data); + } + } } diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java index 054df020f99..46d83b508cd 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java @@ -17,6 +17,7 @@ */ package org.apache.beam.sdk.transforms.errorhandling; +import java.io.Serializable; import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; @@ -63,8 +64,11 @@ * } * results.apply(SomeOtherTransform); * }
+ * + * This is marked as serializable despite never being needed on the runner, to enable it to be a + * parameter of an Autovalue configured PTransform. */ -public interface ErrorHandler extends AutoCloseable { +public interface ErrorHandler extends AutoCloseable, Serializable { void addErrorCollection(PCollection errorCollection); @@ -79,13 +83,16 @@ class PTransformErrorHandler private static final Logger LOG = LoggerFactory.getLogger(PTransformErrorHandler.class); private final PTransform, OutputT> sinkTransform; - private final Pipeline pipeline; + //transient as Pipelines are not serializable + transient private final Pipeline pipeline; private final Coder coder; - private final List> errorCollections = new ArrayList<>(); + //transient as PCollections are not serializable + transient private final List> errorCollections = new ArrayList<>(); - private @Nullable OutputT sinkOutput = null; + //transient as PCollections are not serializable + transient private @Nullable OutputT sinkOutput = null; private boolean closed = false; diff --git a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java index c0f601f9166..8fd0c34cfa9 100644 --- a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java +++ b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java @@ -480,6 +480,11 @@ * // or you can also set a custom timestamp with a function. * .withPublishTimestampFunction((elem, elemTs) -> ...) * + * // Optionally, records that fail to serialize can be sent to an error handler + * // See {@link ErrorHandler} for details of for details of configuring a bad record error + * // handler + * .withBadRecordErrorHandler(errorHandler) + * * // Optionally enable exactly-once sink (on supported runners). See JavaDoc for withEOS(). * .withEOS(20, "eos-sink-group-id"); * ); @@ -619,6 +624,7 @@ public static WriteRecords writeRecords() { .setNumShards(0) .setConsumerFactoryFn(KafkaIOUtils.KAFKA_CONSUMER_FACTORY_FN) .setBadRecordRouter(BadRecordRouter.THROWING_ROUTER) + .setBadRecordErrorHandler(new DefaultErrorHandler<>()) .build(); } @@ -698,7 +704,7 @@ public abstract static class Read public abstract @Nullable CheckStopReadingFn getCheckStopReadingFn(); @Pure - abstract @Nullable ErrorHandler getBadRecordErrorHandler(); + public abstract @Nullable ErrorHandler getBadRecordErrorHandler(); abstract Builder toBuilder(); @@ -748,14 +754,14 @@ abstract Builder setValueDeserializerProvider( abstract Builder setCheckStopReadingFn(@Nullable CheckStopReadingFn checkStopReadingFn); + abstract Builder setBadRecordErrorHandler( + @Nullable ErrorHandler badRecordErrorHandler); + Builder setCheckStopReadingFn( @Nullable SerializableFunction checkStopReadingFn) { return setCheckStopReadingFn(CheckStopReadingFnWrapper.of(checkStopReadingFn)); } - abstract Builder setBadRecordErrorHandler( - ErrorHandler badRecordErrorHandler); - abstract Read build(); static void setupExternalBuilder( @@ -2562,8 +2568,6 @@ public abstract static class WriteRecords // we shouldn't have to duplicate the same API for similar transforms like {@link Write} and // {@link WriteRecords}. See example at {@link PubsubIO.Write}. - transient ErrorHandler badRecordErrorHandler = new DefaultErrorHandler<>(); - @Pure public abstract @Nullable String getTopic(); @@ -2601,6 +2605,9 @@ public abstract static class WriteRecords @Pure public abstract BadRecordRouter getBadRecordRouter(); + @Pure + public abstract ErrorHandler getBadRecordErrorHandler(); + abstract Builder toBuilder(); @AutoValue.Builder @@ -2630,6 +2637,9 @@ abstract Builder setConsumerFactoryFn( abstract Builder setBadRecordRouter(BadRecordRouter router); + abstract Builder setBadRecordErrorHandler( + ErrorHandler badRecordErrorHandler); + abstract WriteRecords build(); } @@ -2778,10 +2788,10 @@ public WriteRecords withConsumerFactoryFn( public WriteRecords withBadRecordErrorHandler( ErrorHandler badRecordErrorHandler) { - WriteRecords writeRecords = - toBuilder().setBadRecordRouter(BadRecordRouter.RECORDING_ROUTER).build(); - writeRecords.badRecordErrorHandler = badRecordErrorHandler; - return writeRecords; + return toBuilder() + .setBadRecordRouter(BadRecordRouter.RECORDING_ROUTER) + .setBadRecordErrorHandler(badRecordErrorHandler) + .build(); } @Override @@ -2796,7 +2806,7 @@ public PDone expand(PCollection> input) { if (isEOS()) { checkArgument(getTopic() != null, "withTopic() is required when isEOS() is true"); checkArgument( - badRecordErrorHandler instanceof DefaultErrorHandler, + getBadRecordErrorHandler() instanceof DefaultErrorHandler, "BadRecordErrorHandling isn't supported with Kafka Exactly Once writing"); KafkaExactlyOnceSink.ensureEOSSupport(); @@ -2816,10 +2826,11 @@ public PDone expand(PCollection> input) { ParDo.of(new KafkaWriter<>(this)) .withOutputTags( new TupleTag(), TupleTagList.of(BadRecordRouter.BAD_RECORD_TAG))); - badRecordErrorHandler.addErrorCollection( - pCollectionTuple - .get(BadRecordRouter.BAD_RECORD_TAG) - .setCoder(BadRecord.getCoder(input.getPipeline()))); + getBadRecordErrorHandler() + .addErrorCollection( + pCollectionTuple + .get(BadRecordRouter.BAD_RECORD_TAG) + .setCoder(BadRecord.getCoder(input.getPipeline()))); } return PDone.in(input.getPipeline()); } diff --git a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java index f0e316cd028..954e10b4a97 100644 --- a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java +++ b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java @@ -359,6 +359,17 @@ public void processElement(@Element String element, OutputReceiver outpu } } + private static class ErrorSinkTransform + extends PTransform, PCollection> { + + @Override + public @UnknownKeyFor @NonNull @Initialized PCollection expand( + PCollection input) { + return input + .apply("Window", Window.into(CalendarWindows.years(1))) + .apply("Combine", Combine.globally(Count.combineFn()).withoutDefaults()); + } + } // This test verifies that bad data from Kafka is properly sent to the error handler @Test public void testKafkaIOSDFReadWithErrorHandler() throws IOException { @@ -376,19 +387,8 @@ public void testKafkaIOSDFReadWithErrorHandler() throws IOException { PipelineResult.State writeState = writeResult.waitUntilFinish(); assertNotEquals(PipelineResult.State.FAILED, writeState); - PTransform, PCollection> sinkTransform = - new PTransform, PCollection>() { - @Override - public @UnknownKeyFor @NonNull @Initialized PCollection expand( - PCollection input) { - return input - .apply("Window", Window.into(CalendarWindows.years(1))) - .apply("Combine", Combine.globally(Count.combineFn()).withoutDefaults()); - } - }; - BadRecordErrorHandler> eh = - sdfReadPipeline.registerBadRecordErrorHandler(sinkTransform); + sdfReadPipeline.registerBadRecordErrorHandler(new ErrorSinkTransform()); sdfReadPipeline.apply( KafkaIO.read() .withBootstrapServers(options.getKafkaBootstrapServerAddresses()) @@ -410,19 +410,9 @@ public void testKafkaIOSDFReadWithErrorHandler() throws IOException { @Test public void testKafkaIOWriteWithErrorHandler() throws IOException { - PTransform, PCollection> sinkTransform = - new PTransform, PCollection>() { - @Override - public @UnknownKeyFor @NonNull @Initialized PCollection expand( - PCollection input) { - return input - .apply("Window", Window.into(CalendarWindows.years(1))) - .apply("Combine", Combine.globally(Count.combineFn()).withoutDefaults()); - } - }; BadRecordErrorHandler> eh = - writePipeline.registerBadRecordErrorHandler(sinkTransform); + writePipeline.registerBadRecordErrorHandler(new ErrorSinkTransform()); writePipeline .apply("Create single KV", Create.of(KV.of("key", 4L))) .apply( From 864c429ddd2ab6e60e41db5c7292e482654ddd03 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Tue, 5 Dec 2023 10:33:54 -0500 Subject: [PATCH 70/80] apply final comments --- .../transforms/errorhandling/BadRecord.java | 25 ++++++++++++++ .../errorhandling/BadRecordRouter.java | 34 +++++++------------ 2 files changed, 37 insertions(+), 22 deletions(-) diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecord.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecord.java index 6cd9493041f..94fe270f3e8 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecord.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecord.java @@ -67,6 +67,31 @@ public static Coder getCoder(Pipeline pipeline) { } } + public static BadRecord fromExceptionInformation( + RecordT record, + @Nullable Coder coder, + @Nullable Exception exception, + String description) + throws IOException { + // Build up record information + BadRecord.Record.Builder recordBuilder = Record.builder(); + recordBuilder.addHumanReadableJson(record).addCoderAndEncodedRecord(coder, record); + + // Build up failure information + BadRecord.Failure.Builder failureBuilder = Failure.builder().setDescription(description); + + // It's possible for us to want to handle an error scenario where no actual exception object + // exists + if (exception != null) { + failureBuilder.setException(exception.toString()).addExceptionStackTrace(exception); + } + + return BadRecord.builder() + .setRecord(recordBuilder.build()) + .setFailure(failureBuilder.build()) + .build(); + } + @AutoValue.Builder public abstract static class Builder { diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouter.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouter.java index 5d4a9b51015..e55cc202c40 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouter.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouter.java @@ -20,8 +20,6 @@ import java.io.Serializable; import org.apache.beam.sdk.coders.Coder; import org.apache.beam.sdk.transforms.DoFn.MultiOutputReceiver; -import org.apache.beam.sdk.transforms.errorhandling.BadRecord.Failure; -import org.apache.beam.sdk.transforms.errorhandling.BadRecord.Record; import org.apache.beam.sdk.util.Preconditions; import org.apache.beam.sdk.values.TupleTag; import org.checkerframework.checker.nullness.qual.Nullable; @@ -55,7 +53,15 @@ public void route( if (exception != null) { throw exception; } else { - throw new RuntimeException("Throwing default exception from Throwing Bad Record Router"); + String encodedRecord = + BadRecord.Record.builder() + .addHumanReadableJson(record) + .build() + .getHumanReadableJsonRecord(); + if (encodedRecord == null) { + encodedRecord = "Unable to serialize bad record"; + } + throw new RuntimeException("Encountered Bad Record: " + encodedRecord); } } } @@ -72,25 +78,9 @@ public void route( throws Exception { Preconditions.checkArgumentNotNull(record); - // Build up record information - BadRecord.Record.Builder recordBuilder = Record.builder(); - recordBuilder.addHumanReadableJson(record).addCoderAndEncodedRecord(coder, record); - - // Build up failure information - BadRecord.Failure.Builder failureBuilder = Failure.builder().setDescription(description); - - // It's possible for us to want to handle an error scenario where no actual exception object - // exists - if (exception != null) { - failureBuilder.setException(exception.toString()).addExceptionStackTrace(exception); - } - - BadRecord badRecord = - BadRecord.builder() - .setRecord(recordBuilder.build()) - .setFailure(failureBuilder.build()) - .build(); - outputReceiver.get(BAD_RECORD_TAG).output(badRecord); + outputReceiver + .get(BAD_RECORD_TAG) + .output(BadRecord.fromExceptionInformation(record, coder, exception, description)); } } } From 8307b6328373891da31071714539d9207489ec98 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Tue, 5 Dec 2023 10:55:11 -0500 Subject: [PATCH 71/80] apply final comments --- .../beam/sdk/transforms/errorhandling/BadRecordRouter.java | 1 + 1 file changed, 1 insertion(+) diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouter.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouter.java index e55cc202c40..137eb529147 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouter.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouter.java @@ -53,6 +53,7 @@ public void route( if (exception != null) { throw exception; } else { + Preconditions.checkArgumentNotNull(record); String encodedRecord = BadRecord.Record.builder() .addHumanReadableJson(record) From fad9d56695528bb12aff0e3e4f3cc7d4a4d8d88e Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Tue, 5 Dec 2023 11:00:44 -0500 Subject: [PATCH 72/80] apply final comments --- .../apache/beam/sdk/transforms/errorhandling/BadRecord.java | 3 +++ .../beam/sdk/transforms/errorhandling/BadRecordRouter.java | 2 -- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecord.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecord.java index 94fe270f3e8..fd49078350c 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecord.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecord.java @@ -32,6 +32,7 @@ import org.apache.beam.sdk.schemas.SchemaRegistry; import org.apache.beam.sdk.schemas.annotations.DefaultSchema; import org.apache.beam.sdk.util.CoderUtils; +import org.apache.beam.sdk.util.Preconditions; import org.apache.beam.sdk.values.TypeDescriptor; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Charsets; import org.checkerframework.checker.nullness.qual.Nullable; @@ -73,6 +74,8 @@ public static BadRecord fromExceptionInformation( @Nullable Exception exception, String description) throws IOException { + Preconditions.checkArgumentNotNull(record); + // Build up record information BadRecord.Record.Builder recordBuilder = Record.builder(); recordBuilder.addHumanReadableJson(record).addCoderAndEncodedRecord(coder, record); diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouter.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouter.java index 137eb529147..7670baf5c6f 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouter.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/BadRecordRouter.java @@ -77,8 +77,6 @@ public void route( @Nullable Exception exception, String description) throws Exception { - Preconditions.checkArgumentNotNull(record); - outputReceiver .get(BAD_RECORD_TAG) .output(BadRecord.fromExceptionInformation(record, coder, exception, description)); From ad37dda488923bdd2366d16f5239fe0c76ce457e Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Tue, 5 Dec 2023 12:46:25 -0500 Subject: [PATCH 73/80] add line to CHANGES.md --- CHANGES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES.md b/CHANGES.md index e48ee7f1d51..cc8152e305d 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -66,6 +66,7 @@ * TextIO now supports skipping multiple header lines (Java) ([#17990](https://github.com/apache/beam/issues/17990)). * Python GCSIO is now implemented with GCP GCS Client instead of apitools ([#25676](https://github.com/apache/beam/issues/25676)) * Adding support for LowCardinality DataType in ClickHouse (Java) ([#29533](https://github.com/apache/beam/pull/29533)). +* Added support for handling bad records to KafkaIO (Java) ([#29546](https://github.com/apache/beam/pull/29546)) ## New Features / Improvements From 70437c189c71aff2f937a9ce6c0f464aa5dc15d0 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Tue, 5 Dec 2023 13:26:31 -0500 Subject: [PATCH 74/80] fix spotless --- .../transforms/errorhandling/ErrorHandler.java | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java index 46d83b508cd..20e430a58c7 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java @@ -64,9 +64,8 @@ * } * results.apply(SomeOtherTransform); * }
- * - * This is marked as serializable despite never being needed on the runner, to enable it to be a - * parameter of an Autovalue configured PTransform. + * This is marked as serializable despite never being needed on the runner, to enable it to be a + * parameter of an Autovalue configured PTransform. */ public interface ErrorHandler extends AutoCloseable, Serializable { @@ -83,16 +82,16 @@ class PTransformErrorHandler private static final Logger LOG = LoggerFactory.getLogger(PTransformErrorHandler.class); private final PTransform, OutputT> sinkTransform; - //transient as Pipelines are not serializable - transient private final Pipeline pipeline; + // transient as Pipelines are not serializable + private final transient Pipeline pipeline; private final Coder coder; - //transient as PCollections are not serializable - transient private final List> errorCollections = new ArrayList<>(); + // transient as PCollections are not serializable + private final transient List> errorCollections = new ArrayList<>(); - //transient as PCollections are not serializable - transient private @Nullable OutputT sinkOutput = null; + // transient as PCollections are not serializable + private transient @Nullable OutputT sinkOutput = null; private boolean closed = false; From 6a2801f3c9034e1f33dd545b3fb8e4da4ec427ab Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Tue, 5 Dec 2023 14:02:15 -0500 Subject: [PATCH 75/80] fix checkstyle --- .../main/java/org/apache/beam/examples/KafkaStreaming.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/java/src/main/java/org/apache/beam/examples/KafkaStreaming.java b/examples/java/src/main/java/org/apache/beam/examples/KafkaStreaming.java index 9b56274bb02..602c34d4219 100644 --- a/examples/java/src/main/java/org/apache/beam/examples/KafkaStreaming.java +++ b/examples/java/src/main/java/org/apache/beam/examples/KafkaStreaming.java @@ -350,7 +350,7 @@ public void processElement(@Element BadRecord record, OutputReceiver // Intentionally failing deserializer to simulate bad data from Kafka public static class IntermittentlyFailingIntegerDeserializer implements Deserializer { - public static final IntegerDeserializer integerDeserializer = new IntegerDeserializer(); + public static final IntegerDeserializer INTEGER_DESERIALIZER = new IntegerDeserializer(); public int deserializeCount = 0; public IntermittentlyFailingIntegerDeserializer() {} @@ -361,7 +361,7 @@ public Integer deserialize(String topic, byte[] data) { if (deserializeCount % 10 == 0) { throw new SerializationException("Expected Serialization Exception"); } - return integerDeserializer.deserialize(topic, data); + return INTEGER_DESERIALIZER.deserialize(topic, data); } } } From 0c1ab354560e739159e5f5eb0e4528303e04b16c Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Tue, 5 Dec 2023 14:58:58 -0500 Subject: [PATCH 76/80] make sink transform static for serialization --- .../apache/beam/sdk/io/kafka/KafkaIOIT.java | 12 +-------- .../apache/beam/sdk/io/kafka/KafkaIOTest.java | 26 +++++++++---------- 2 files changed, 14 insertions(+), 24 deletions(-) diff --git a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java index 954e10b4a97..50e88fa0c13 100644 --- a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java +++ b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java @@ -44,6 +44,7 @@ import org.apache.beam.sdk.io.Read; import org.apache.beam.sdk.io.common.IOITHelper; import org.apache.beam.sdk.io.common.IOTestPipelineOptions; +import org.apache.beam.sdk.io.kafka.KafkaIOTest.ErrorSinkTransform; import org.apache.beam.sdk.io.kafka.KafkaIOTest.FailingLongSerializer; import org.apache.beam.sdk.io.kafka.ReadFromKafkaDoFnTest.FailingDeserializer; import org.apache.beam.sdk.io.synthetic.SyntheticBoundedSource; @@ -359,17 +360,6 @@ public void processElement(@Element String element, OutputReceiver outpu } } - private static class ErrorSinkTransform - extends PTransform, PCollection> { - - @Override - public @UnknownKeyFor @NonNull @Initialized PCollection expand( - PCollection input) { - return input - .apply("Window", Window.into(CalendarWindows.years(1))) - .apply("Combine", Combine.globally(Count.combineFn()).withoutDefaults()); - } - } // This test verifies that bad data from Kafka is properly sent to the error handler @Test public void testKafkaIOSDFReadWithErrorHandler() throws IOException { diff --git a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOTest.java b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOTest.java index eff9056192d..f09d7d19b8d 100644 --- a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOTest.java +++ b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOTest.java @@ -1446,19 +1446,7 @@ public void testSinkWithSerializationErrors() throws Exception { String topic = "test"; - PTransform, PCollection> sinkTransform = - new PTransform, PCollection>() { - @Override - public @UnknownKeyFor @NonNull @Initialized PCollection expand( - PCollection input) { - return input - .apply("Window", Window.into(CalendarWindows.years(1))) - .apply( - "Combine", Combine.globally(Count.combineFn()).withoutDefaults()); - } - }; - - BadRecordErrorHandler> eh = p.registerBadRecordErrorHandler(sinkTransform); + BadRecordErrorHandler> eh = p.registerBadRecordErrorHandler(new ErrorSinkTransform()); p.apply(mkKafkaReadTransform(numElements, new ValueAsTimestampFn()).withoutMetadata()) .apply( @@ -1483,6 +1471,18 @@ public void testSinkWithSerializationErrors() throws Exception { } } + public static class ErrorSinkTransform + extends PTransform, PCollection> { + + @Override + public @UnknownKeyFor @NonNull @Initialized PCollection expand( + PCollection input) { + return input + .apply("Window", Window.into(CalendarWindows.years(1))) + .apply("Combine", Combine.globally(Count.combineFn()).withoutDefaults()); + } + } + @Test public void testValuesSink() throws Exception { // similar to testSink(), but use values()' interface. From c589fd2b7a7823570397ccb222dbd9fa5a9f101f Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Tue, 5 Dec 2023 15:20:07 -0500 Subject: [PATCH 77/80] spotless --- .../test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java | 5 ----- .../test/java/org/apache/beam/sdk/io/kafka/KafkaIOTest.java | 3 ++- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java index 50e88fa0c13..5b976687f2c 100644 --- a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java +++ b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOIT.java @@ -74,10 +74,8 @@ import org.apache.beam.sdk.transforms.GroupByKey; import org.apache.beam.sdk.transforms.Keys; import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.transforms.PTransform; import org.apache.beam.sdk.transforms.ParDo; import org.apache.beam.sdk.transforms.Values; -import org.apache.beam.sdk.transforms.errorhandling.BadRecord; import org.apache.beam.sdk.transforms.errorhandling.ErrorHandler.BadRecordErrorHandler; import org.apache.beam.sdk.transforms.windowing.CalendarWindows; import org.apache.beam.sdk.transforms.windowing.FixedWindows; @@ -100,10 +98,7 @@ import org.apache.kafka.common.serialization.IntegerSerializer; import org.apache.kafka.common.serialization.StringDeserializer; import org.apache.kafka.common.serialization.StringSerializer; -import org.checkerframework.checker.initialization.qual.Initialized; -import org.checkerframework.checker.nullness.qual.NonNull; import org.checkerframework.checker.nullness.qual.Nullable; -import org.checkerframework.checker.nullness.qual.UnknownKeyFor; import org.joda.time.Duration; import org.junit.AfterClass; import org.junit.BeforeClass; diff --git a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOTest.java b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOTest.java index f09d7d19b8d..b0df82bcdc1 100644 --- a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOTest.java +++ b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOTest.java @@ -1446,7 +1446,8 @@ public void testSinkWithSerializationErrors() throws Exception { String topic = "test"; - BadRecordErrorHandler> eh = p.registerBadRecordErrorHandler(new ErrorSinkTransform()); + BadRecordErrorHandler> eh = + p.registerBadRecordErrorHandler(new ErrorSinkTransform()); p.apply(mkKafkaReadTransform(numElements, new ValueAsTimestampFn()).withoutMetadata()) .apply( From 4b1a6711f58373c0cf039483d188d522938a642d Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Wed, 6 Dec 2023 09:51:33 -0500 Subject: [PATCH 78/80] fix typo --- sdks/java/io/kafka/kafka-01103/build.gradle | 2 +- sdks/java/io/kafka/kafka-100/build.gradle | 2 +- sdks/java/io/kafka/kafka-111/build.gradle | 2 +- sdks/java/io/kafka/kafka-211/build.gradle | 2 +- sdks/java/io/kafka/kafka-222/build.gradle | 2 +- sdks/java/io/kafka/kafka-231/build.gradle | 2 +- sdks/java/io/kafka/kafka-241/build.gradle | 2 +- sdks/java/io/kafka/kafka-251/build.gradle | 2 +- sdks/java/io/kafka/kafka-integration-test.gradle | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/sdks/java/io/kafka/kafka-01103/build.gradle b/sdks/java/io/kafka/kafka-01103/build.gradle index 6e1bdf94671..3a74bf04ef2 100644 --- a/sdks/java/io/kafka/kafka-01103/build.gradle +++ b/sdks/java/io/kafka/kafka-01103/build.gradle @@ -18,7 +18,7 @@ project.ext { delimited="0.11.0.3" undelimited="01103" - sdfCompatable=false + sdfCompatible=false } apply from: "../kafka-integration-test.gradle" \ No newline at end of file diff --git a/sdks/java/io/kafka/kafka-100/build.gradle b/sdks/java/io/kafka/kafka-100/build.gradle index fe3ca88c46d..bd5fa67b1cf 100644 --- a/sdks/java/io/kafka/kafka-100/build.gradle +++ b/sdks/java/io/kafka/kafka-100/build.gradle @@ -18,7 +18,7 @@ project.ext { delimited="1.0.0" undelimited="100" - sdfCompatable=false + sdfCompatible=false } apply from: "../kafka-integration-test.gradle" diff --git a/sdks/java/io/kafka/kafka-111/build.gradle b/sdks/java/io/kafka/kafka-111/build.gradle index 829a2cf7ac9..c2b0c8f8282 100644 --- a/sdks/java/io/kafka/kafka-111/build.gradle +++ b/sdks/java/io/kafka/kafka-111/build.gradle @@ -18,7 +18,7 @@ project.ext { delimited="1.1.1" undelimited="111" - sdfCompatable=false + sdfCompatible=false } apply from: "../kafka-integration-test.gradle" \ No newline at end of file diff --git a/sdks/java/io/kafka/kafka-211/build.gradle b/sdks/java/io/kafka/kafka-211/build.gradle index 2e331018960..433d6c93f36 100644 --- a/sdks/java/io/kafka/kafka-211/build.gradle +++ b/sdks/java/io/kafka/kafka-211/build.gradle @@ -18,7 +18,7 @@ project.ext { delimited="2.1.1" undelimited="211" - sdfCompatable=true + sdfCompatible=true } apply from: "../kafka-integration-test.gradle" \ No newline at end of file diff --git a/sdks/java/io/kafka/kafka-222/build.gradle b/sdks/java/io/kafka/kafka-222/build.gradle index 93c4feb0a0f..0f037e74296 100644 --- a/sdks/java/io/kafka/kafka-222/build.gradle +++ b/sdks/java/io/kafka/kafka-222/build.gradle @@ -18,7 +18,7 @@ project.ext { delimited="2.2.2" undelimited="222" - sdfCompatable=true + sdfCompatible=true } apply from: "../kafka-integration-test.gradle" \ No newline at end of file diff --git a/sdks/java/io/kafka/kafka-231/build.gradle b/sdks/java/io/kafka/kafka-231/build.gradle index 7baa0c4c72a..712158dcd3a 100644 --- a/sdks/java/io/kafka/kafka-231/build.gradle +++ b/sdks/java/io/kafka/kafka-231/build.gradle @@ -18,7 +18,7 @@ project.ext { delimited="2.3.1" undelimited="231" - sdfCompatable=true + sdfCompatible=true } apply from: "../kafka-integration-test.gradle" \ No newline at end of file diff --git a/sdks/java/io/kafka/kafka-241/build.gradle b/sdks/java/io/kafka/kafka-241/build.gradle index 3b01bd6dfd4..c0ac7df674b 100644 --- a/sdks/java/io/kafka/kafka-241/build.gradle +++ b/sdks/java/io/kafka/kafka-241/build.gradle @@ -18,7 +18,7 @@ project.ext { delimited="2.4.1" undelimited="241" - sdfCompatable=true + sdfCompatible=true } apply from: "../kafka-integration-test.gradle" \ No newline at end of file diff --git a/sdks/java/io/kafka/kafka-251/build.gradle b/sdks/java/io/kafka/kafka-251/build.gradle index b4f62630778..4de9f97a738 100644 --- a/sdks/java/io/kafka/kafka-251/build.gradle +++ b/sdks/java/io/kafka/kafka-251/build.gradle @@ -18,7 +18,7 @@ project.ext { delimited="2.5.1" undelimited="251" - sdfCompatable=true + sdfCompatible=true } apply from: "../kafka-integration-test.gradle" \ No newline at end of file diff --git a/sdks/java/io/kafka/kafka-integration-test.gradle b/sdks/java/io/kafka/kafka-integration-test.gradle index 3fbb3ee2777..1aeb0c97f93 100644 --- a/sdks/java/io/kafka/kafka-integration-test.gradle +++ b/sdks/java/io/kafka/kafka-integration-test.gradle @@ -39,4 +39,4 @@ dependencies { configurations.create("kafkaVersion$undelimited") -tasks.register("kafkaVersion${undelimited}BatchIT",KafkaTestUtilities.KafkaBatchIT, project.ext.delimited, project.ext.undelimited, project.ext.sdfCompatable, configurations, project) \ No newline at end of file +tasks.register("kafkaVersion${undelimited}BatchIT",KafkaTestUtilities.KafkaBatchIT, project.ext.delimited, project.ext.undelimited, project.ext.sdfCompatible, configurations, project) \ No newline at end of file From edb419be9e314f95e0a66578c13e9f278313d638 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Wed, 6 Dec 2023 10:01:17 -0500 Subject: [PATCH 79/80] fix typo --- sdks/java/io/kafka/kafka-201/build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdks/java/io/kafka/kafka-201/build.gradle b/sdks/java/io/kafka/kafka-201/build.gradle index 32b5b7dc896..a26ca4ac19c 100644 --- a/sdks/java/io/kafka/kafka-201/build.gradle +++ b/sdks/java/io/kafka/kafka-201/build.gradle @@ -18,7 +18,7 @@ project.ext { delimited="2.0.1" undelimited="201" - sdfCompatable=true + sdfCompatible=true } apply from: "../kafka-integration-test.gradle" \ No newline at end of file From 58f6ca2f68795cfe872040d815f552b36f54be14 Mon Sep 17 00:00:00 2001 From: johnjcasey Date: Wed, 6 Dec 2023 14:11:14 -0500 Subject: [PATCH 80/80] fix spotbugs --- .../sdk/transforms/errorhandling/ErrorHandler.java | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java index 20e430a58c7..e02965b7202 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/errorhandling/ErrorHandler.java @@ -17,6 +17,8 @@ */ package org.apache.beam.sdk.transforms.errorhandling; +import java.io.IOException; +import java.io.ObjectInputStream; import java.io.Serializable; import java.nio.charset.StandardCharsets; import java.util.ArrayList; @@ -88,7 +90,7 @@ class PTransformErrorHandler private final Coder coder; // transient as PCollections are not serializable - private final transient List> errorCollections = new ArrayList<>(); + private transient List> errorCollections = new ArrayList<>(); // transient as PCollections are not serializable private transient @Nullable OutputT sinkOutput = null; @@ -109,6 +111,12 @@ public PTransformErrorHandler( this.coder = coder; } + private void readObject(ObjectInputStream aInputStream) + throws ClassNotFoundException, IOException { + aInputStream.defaultReadObject(); + errorCollections = new ArrayList<>(); + } + @Override public void addErrorCollection(PCollection errorCollection) { errorCollections.add(errorCollection);