diff --git a/.github/ISSUE_TEMPLATE/4-conduit-release.yml b/.github/ISSUE_TEMPLATE/4-conduit-release.yml
new file mode 100644
index 000000000..01da51abc
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/4-conduit-release.yml
@@ -0,0 +1,115 @@
+name: 🚢 Conduit release checklist
+description: Use this template to guide you through the Conduit release process.
+title: '[Release] Conduit vX.Y.Z'
+labels:
+ - release
+assignees: []
+
+body:
+ - type: markdown
+ attributes:
+ value: |
+ # Conduit Release Checklist
+
+ This issue serves as a checklist for releasing a new version of Conduit. Follow the steps below to ensure a smooth release process.
+
+ - type: markdown
+ attributes:
+ value: |
+ ## General Information
+
+ A Conduit release includes:
+ - A GitHub release with packages for different OS and architectures, checksums, a changelog, and source code.
+ - A GitHub package for the official Docker image, available on GitHub's Container Registry, tagged with `latest`.
+
+ - type: markdown
+ attributes:
+ value: |
+ ## Before a Release
+
+ ### Update Dependencies
+
+ Update dependencies in the following order, ensuring all repositories are cloned in the same directory:
+
+ - type: checkboxes
+ attributes:
+ label: Update Dependencies
+ options:
+ - label: '**conduit-commons**: Run `scripts/get-compare-link.sh ../conduit-commons/` to compare the latest tag and `main` branch. If changes are needed, push a new tag.'
+ - label: '**conduit-connector-protocol**: Update `conduit-commons` if necessary: `go get github.com/conduitio/conduit-commons@vX.Y.Z`. Run `scripts/get-compare-link.sh ../conduit-connector-protocol/` and tag if needed.'
+ - label: '**conduit-connector-sdk**: Update dependencies (`conduit-commons`, `conduit-connector-protocol`) as needed. Run `scripts/get-compare-link.sh ../conduit-connector-sdk/` and tag if needed.'
+ - label: '**conduit-processor-sdk**: Update `conduit-commons` if necessary. Run `scripts/get-compare-link.sh ../conduit-processor-sdk/` and tag if needed.'
+ - label: '**conduit-schema-registry**: Update `conduit-commons` if necessary. Run `scripts/get-compare-link.sh ../conduit-schema-registry/` and tag if needed.'
+ - label: '**Connector SDK in conduit-connector-template**: Bump the Connector SDK dependency.'
+ - label: '**conduit-connector-file**: Bump the Connector SDK `scripts/bump-sdk-in-connectors.sh vX.Y.Z`. Run `scripts/get-compare-link.sh ../conduit-connector-file/` and tag if needed.'
+ - label: '**conduit-connector-kafka**: Bump the Connector SDK `scripts/bump-sdk-in-connectors.sh vX.Y.Z`. Run `scripts/get-compare-link.sh ../conduit-connector-kafka/` and tag if needed.'
+ - label: '**conduit-connector-generator**: Bump the Connector SDK `scripts/bump-sdk-in-connectors.sh vX.Y.Z`. Run `scripts/get-compare-link.sh ../conduit-connector-generator/` and tag if needed.'
+ - label: '**conduit-connector-s3**: Bump the Connector SDK `scripts/bump-sdk-in-connectors.sh vX.Y.Z`. Run `scripts/get-compare-link.sh ../conduit-connector-s3/` and tag if needed.'
+ - label: '**conduit-connector-postgres**: Bump the Connector SDK `scripts/bump-sdk-in-connectors.sh vX.Y.Z`. Run `scripts/get-compare-link.sh ../conduit-connector-postgres/` and tag if needed.'
+ - label: '**conduit-connector-log**: Bump the Connector SDK `scripts/bump-sdk-in-connectors.sh vX.Y.Z`. Run `scripts/get-compare-link.sh ../conduit-connector-log/` and tag if needed.'
+ - label: '**Bump built-in connectors on Conduit**: Run `scripts/bump-builtin-connectors.sh`'
+ - label: '**Release Conduit** (see instructions below).'
+
+
+ - type: markdown
+ attributes:
+ value: |
+ ## Documentation
+
+ - type: checkboxes
+ attributes:
+ label: Documentation
+ options:
+ - label: 'Write a blog post.'
+ - label: 'Regenerate processor documentation on [conduit-site](https://github.com/ConduitIO/conduit-site).'
+ - label: 'Update the banner on the website.'
+ - label: 'Search and replace the latest version in `conduit-site`.'
+ - label: 'Search and replace the latest version in [README.md](../README.md).'
+
+ - type: markdown
+ attributes:
+ value: |
+ ## Releasing Conduit
+
+ - type: checkboxes
+ attributes:
+ label: Releasing Conduit
+ options:
+ - label: 'Trigger a release by pushing a new tag starting with `v` (e.g., `v1.2.3`).'
+ - label: 'Use the script [scripts/tag.sh](https://github.com/ConduitIO/conduit/blob/main/scripts/tag.sh) to ensure version conformity.'
+
+ - type: markdown
+ attributes:
+ value: |
+ ```sh
+ scripts/tag.sh 1.2.3
+ ```
+
+ - type: markdown
+ attributes:
+ value: |
+ ## Nightly Builds
+
+ - Nightly builds (binaries and Docker images) are provided and kept for 7 days.
+ - The latest nightly Docker image is tagged with `latest-nightly`.
+
+ - type: markdown
+ attributes:
+ value: |
+ ## Implementation
+
+ - The GitHub release is created with [GoReleaser](https://github.com/goreleaser/goreleaser/).
+ - Nightly builds are triggered by a GitHub action, defined in [trigger-nightly.yml](/.github/workflows/trigger-nightly.yml).
+
+ - type: markdown
+ attributes:
+ value: |
+ ## Notes
+
+ - The "Trigger nightly build" GitHub action requires a personal access token, not the GitHub token provided by Actions.
+
+ For more information, refer to [Triggering a workflow from a workflow](https://docs.github.com/en/actions/using-workflows/triggering-a-workflow#triggering-a-workflow-from-a-workflow).
+
+---
+
+Please ensure each step is completed before closing this issue.
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
index be0d1ab00..fceaa231f 100644
--- a/.github/ISSUE_TEMPLATE/config.yml
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -6,3 +6,6 @@ contact_links:
- name: 📄 Documentation Issue
url: https://github.com/ConduitIO/conduit-site
about: Please report issues regarding documentation here.
+ - name: 💬 Chat with us
+ url: https://discord.com/channels/828680256877363200/933725823813451797
+ about: Join our Discord server to chat with us.
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index c8b6c51c6..83a5bcae4 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -10,6 +10,8 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
+ with:
+ fetch-depth: 0 # Shallow clones should be disabled for a better relevancy of analysis
- name: Set up Go
uses: actions/setup-go@v5
@@ -17,4 +19,11 @@ jobs:
go-version-file: 'go.mod'
- name: Test
- run: make test-integration GOTEST_FLAGS="-v -count=1"
+ # The test coverate and the test report are needed for the SonarCloud analysis
+ run: make test-integration GOTEST_FLAGS="-v -count=1 -coverprofile coverage.out" 2>&1 | tee test-report.out
+
+ - name: SonarCloud Scan
+ uses: SonarSource/sonarcloud-github-action@v3.1.0
+ env:
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # Needed to get PR information, if any
+ SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
diff --git a/.gitignore b/.gitignore
index eb00ee4a3..8448637a5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -97,3 +97,5 @@ pkg/plugin/processor/standalone/test/wasm_processors/*/processor.wasm
# this one is needed for integration tests
!pkg/provisioning/test/source-file.txt
+
+golangci-report.xml
\ No newline at end of file
diff --git a/cmd/cli/cli.go b/cmd/cli/cli.go
new file mode 100644
index 000000000..7849903d3
--- /dev/null
+++ b/cmd/cli/cli.go
@@ -0,0 +1,147 @@
+// Copyright © 2024 Meroxa, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cli
+
+import (
+ "fmt"
+ "os"
+
+ "github.com/conduitio/conduit/pkg/conduit"
+ "github.com/spf13/cobra"
+)
+
+var (
+ initArgs InitArgs
+ pipelinesInitArgs PipelinesInitArgs
+)
+
+type Instance struct {
+ rootCmd *cobra.Command
+}
+
+// New creates a new CLI Instance.
+func New() *Instance {
+ return &Instance{
+ rootCmd: buildRootCmd(),
+ }
+}
+
+func (i *Instance) Run() {
+ if err := i.rootCmd.Execute(); err != nil {
+ _, _ = fmt.Fprintf(os.Stderr, "%v\n", err)
+ os.Exit(1)
+ }
+}
+
+func buildRootCmd() *cobra.Command {
+ cfg := conduit.DefaultConfig()
+
+ cmd := &cobra.Command{
+ Use: "conduit",
+ Short: "Conduit CLI",
+ Long: "Conduit CLI is a command-line that helps you interact with and manage Conduit.",
+ Version: conduit.Version(true),
+ Run: func(cmd *cobra.Command, args []string) {
+ e := &conduit.Entrypoint{}
+ e.Serve(cfg)
+ },
+ }
+ cmd.CompletionOptions.DisableDefaultCmd = true
+ conduit.Flags(&cfg).VisitAll(cmd.Flags().AddGoFlag)
+
+ // init
+ cmd.AddCommand(buildInitCmd())
+
+ // pipelines
+ cmd.AddGroup(&cobra.Group{
+ ID: "pipelines",
+ Title: "Pipelines",
+ })
+ cmd.AddCommand(buildPipelinesCmd())
+
+ return cmd
+}
+
+func buildInitCmd() *cobra.Command {
+ initCmd := &cobra.Command{
+ Use: "init",
+ Short: "Initialize Conduit with a configuration file and directories.",
+ Args: cobra.NoArgs,
+ RunE: func(cmd *cobra.Command, args []string) error {
+ return NewConduitInit(initArgs).Run()
+ },
+ }
+ initCmd.Flags().StringVar(
+ &initArgs.Path,
+ "config.path",
+ "",
+ "path where Conduit will be initialized",
+ )
+
+ return initCmd
+}
+
+func buildPipelinesCmd() *cobra.Command {
+ pipelinesCmd := &cobra.Command{
+ Use: "pipelines",
+ Short: "Initialize and manage pipelines",
+ Args: cobra.NoArgs,
+ GroupID: "pipelines",
+ }
+
+ pipelinesCmd.AddCommand(buildPipelinesInitCmd())
+
+ return pipelinesCmd
+}
+
+func buildPipelinesInitCmd() *cobra.Command {
+ pipelinesInitCmd := &cobra.Command{
+ Use: "init [pipeline-name]",
+ Short: "Initialize an example pipeline.",
+ Long: `Initialize a pipeline configuration file, with all of parameters for source and destination connectors
+initialized and described. The source and destination connector can be chosen via flags. If no connectors are chosen, then
+a simple and runnable generator-to-log pipeline is configured.`,
+ Args: cobra.MaximumNArgs(1),
+ Example: " conduit pipelines init awesome-pipeline-name --source postgres --destination kafka --path pipelines/pg-to-kafka.yaml",
+ RunE: func(cmd *cobra.Command, args []string) error {
+ if len(args) > 0 {
+ pipelinesInitArgs.Name = args[0]
+ }
+ return NewPipelinesInit(pipelinesInitArgs).Run()
+ },
+ }
+
+ // Add flags to pipelines init command
+ pipelinesInitCmd.Flags().StringVar(
+ &pipelinesInitArgs.Source,
+ "source",
+ "",
+ "Source connector (any of the built-in connectors).",
+ )
+ pipelinesInitCmd.Flags().StringVar(
+ &pipelinesInitArgs.Destination,
+ "destination",
+ "",
+ "Destination connector (any of the built-in connectors).",
+ )
+ pipelinesInitCmd.Flags().StringVar(
+ &pipelinesInitArgs.Path,
+ "pipelines.path",
+ "./pipelines",
+ "Path where the pipeline will be saved.",
+ )
+
+ return pipelinesInitCmd
+}
diff --git a/cmd/cli/conduit_init.go b/cmd/cli/conduit_init.go
new file mode 100644
index 000000000..e0d23ac2f
--- /dev/null
+++ b/cmd/cli/conduit_init.go
@@ -0,0 +1,129 @@
+// Copyright © 2024 Meroxa, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cli
+
+import (
+ "flag"
+ "fmt"
+ "os"
+ "path/filepath"
+ "strings"
+
+ "github.com/conduitio/conduit/cmd/cli/internal"
+ "github.com/conduitio/conduit/pkg/conduit"
+ "github.com/conduitio/conduit/pkg/foundation/cerrors"
+ "github.com/conduitio/yaml/v3"
+)
+
+type InitArgs struct {
+ Path string
+}
+
+type ConduitInit struct {
+ args InitArgs
+}
+
+func NewConduitInit(args InitArgs) *ConduitInit {
+ return &ConduitInit{args: args}
+}
+
+func (i *ConduitInit) Run() error {
+ err := i.createDirs()
+ if err != nil {
+ return err
+ }
+
+ err = i.createConfigYAML()
+ if err != nil {
+ return fmt.Errorf("failed to create config YAML: %w", err)
+ }
+
+ fmt.Println(`
+Conduit has been initialized!
+
+To quickly create an example pipeline, run 'conduit pipelines init'.
+To see how you can customize your first pipeline, run 'conduit pipelines init --help'.`)
+
+ return nil
+}
+
+func (i *ConduitInit) createConfigYAML() error {
+ cfgYAML := internal.NewYAMLTree()
+ i.conduitCfgFlags().VisitAll(func(f *flag.Flag) {
+ if i.isHiddenFlag(f.Name) {
+ return // hide flag from output
+ }
+ cfgYAML.Insert(f.Name, f.DefValue, f.Usage)
+ })
+
+ yamlData, err := yaml.Marshal(cfgYAML.Root)
+ if err != nil {
+ return cerrors.Errorf("error marshaling YAML: %w\n", err)
+ }
+
+ path := filepath.Join(i.path(), "conduit.yaml")
+ err = os.WriteFile(path, yamlData, 0o600)
+ if err != nil {
+ return cerrors.Errorf("error writing conduit.yaml: %w", err)
+ }
+ fmt.Printf("Configuration file written to %v\n", path)
+
+ return nil
+}
+
+func (i *ConduitInit) createDirs() error {
+ dirs := []string{"processors", "connectors", "pipelines"}
+
+ for _, dir := range dirs {
+ path := filepath.Join(i.path(), dir)
+
+ // Attempt to create the directory, skipping if it already exists
+ if err := os.Mkdir(path, os.ModePerm); err != nil {
+ if os.IsExist(err) {
+ fmt.Printf("Directory '%s' already exists, skipping...\n", path)
+ continue
+ }
+ return fmt.Errorf("failed to create directory '%s': %w", path, err)
+ }
+
+ fmt.Printf("Created directory: %s\n", path)
+ }
+
+ return nil
+}
+
+func (i *ConduitInit) isHiddenFlag(name string) bool {
+ return name == "dev" ||
+ strings.HasPrefix(name, "dev.") ||
+ conduit.DeprecatedFlags[name]
+}
+
+func (i *ConduitInit) conduitCfgFlags() *flag.FlagSet {
+ cfg := conduit.DefaultConfigWithBasePath(i.path())
+ return conduit.Flags(&cfg)
+}
+
+func (i *ConduitInit) path() string {
+ if i.args.Path != "" {
+ return i.args.Path
+ }
+
+ path, err := os.Getwd()
+ if err != nil {
+ panic(cerrors.Errorf("failed to get current working directory: %w", err))
+ }
+
+ return path
+}
diff --git a/cmd/cli/internal/yaml.go b/cmd/cli/internal/yaml.go
new file mode 100644
index 000000000..28ef80f04
--- /dev/null
+++ b/cmd/cli/internal/yaml.go
@@ -0,0 +1,85 @@
+// Copyright © 2024 Meroxa, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package internal
+
+import (
+ "strings"
+
+ "github.com/conduitio/yaml/v3"
+)
+
+// YAMLTree represents a YAML document.
+// It makes it possible to insert value nodes with comments.
+type YAMLTree struct {
+ Root *yaml.Node
+}
+
+func NewYAMLTree() *YAMLTree {
+ return &YAMLTree{
+ Root: &yaml.Node{
+ Kind: yaml.MappingNode,
+ },
+ }
+}
+
+// Insert adds a path with a value to the tree
+func (t *YAMLTree) Insert(path, value, comment string) {
+ parts := strings.Split(path, ".")
+ current := t.Root
+
+ // For each part of the path
+ for i, part := range parts {
+ // Create key node
+ keyNode := &yaml.Node{
+ Kind: yaml.ScalarNode,
+ Value: part,
+ }
+
+ // Find or create value node
+ var valueNode *yaml.Node
+ found := false
+
+ // Look for existing key in current mapping
+ for i := 0; i < len(current.Content); i += 2 {
+ if current.Content[i].Value == part {
+ valueNode = current.Content[i+1]
+ found = true
+ break
+ }
+ }
+
+ // If not found, create new node
+ if !found {
+ // If this is the last part, create scalar value node
+ if i == len(parts)-1 {
+ valueNode = &yaml.Node{
+ Kind: yaml.ScalarNode,
+ Value: value,
+ }
+ keyNode.HeadComment = comment
+ } else {
+ // Otherwise create mapping node for nesting
+ valueNode = &yaml.Node{
+ Kind: yaml.MappingNode,
+ }
+ }
+ // Add key-value pair to current node's content
+ current.Content = append(current.Content, keyNode, valueNode)
+ }
+
+ // Move to next level
+ current = valueNode
+ }
+}
diff --git a/cmd/cli/pipeline.tmpl b/cmd/cli/pipeline.tmpl
new file mode 100644
index 000000000..b2346c3ec
--- /dev/null
+++ b/cmd/cli/pipeline.tmpl
@@ -0,0 +1,30 @@
+version: "2.2"
+pipelines:
+ - id: example-pipeline
+ status: running
+ name: "{{ .Name }}"
+ connectors:
+ - id: example-source
+ type: source
+ plugin: "{{ .SourceSpec.Name }}"
+ {{ if gt (len .SourceSpec.Params) 0 -}}
+ settings:
+ {{- range $name, $param := .SourceSpec.Params }}
+ {{ formatParameterDescriptionYAML $param.Description }}
+ # Type: {{ $param.Type }}
+ # {{ formatParameterRequired $param }}
+ {{ $name }}: {{ formatParameterValueYAML $param.Default }}
+ {{- end }}
+ {{- end }}
+ - id: example-destination
+ type: destination
+ plugin: "{{ .DestinationSpec.Name }}"
+ {{ if gt (len .DestinationSpec.Params) 0 -}}
+ settings:
+ {{- range $name, $param := .DestinationSpec.Params }}
+ {{ formatParameterDescriptionYAML $param.Description }}
+ # Type: {{ $param.Type }}
+ # {{ formatParameterRequired $param }}
+ {{ $name }}: {{ formatParameterValueYAML $param.Default }}
+ {{- end }}
+ {{- end }}
diff --git a/cmd/cli/pipelines_init.go b/cmd/cli/pipelines_init.go
new file mode 100644
index 000000000..2094b162f
--- /dev/null
+++ b/cmd/cli/pipelines_init.go
@@ -0,0 +1,357 @@
+// Copyright © 2024 Meroxa, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cli
+
+import (
+ _ "embed"
+ "fmt"
+ "log"
+ "os"
+ "path/filepath"
+ "strings"
+ "text/template"
+
+ "github.com/conduitio/conduit-commons/config"
+ "github.com/conduitio/conduit/pkg/foundation/cerrors"
+ "github.com/conduitio/conduit/pkg/plugin"
+ "github.com/conduitio/conduit/pkg/plugin/connector/builtin"
+)
+
+//go:embed pipeline.tmpl
+var pipelineCfgTmpl string
+
+var funcMap = template.FuncMap{
+ "formatParameterValueTable": formatParameterValueTable,
+ "formatParameterValueYAML": formatParameterValueYAML,
+ "formatParameterDescriptionYAML": formatParameterDescriptionYAML,
+ "formatParameterRequired": formatParameterRequired,
+}
+
+func formatParameterRequired(param config.Parameter) string {
+ for _, v := range param.Validations {
+ if v.Type() == config.ValidationTypeRequired {
+ return "Required"
+ }
+ }
+
+ return "Optional"
+}
+
+// formatParameterValue formats the value of a configuration parameter.
+func formatParameterValueTable(value string) string {
+ switch {
+ case value == "":
+ return ``
+ case strings.Contains(value, "\n"):
+ // specifically used in the javascript processor
+ return fmt.Sprintf("\n```js\n%s\n```\n", value)
+ default:
+ return fmt.Sprintf("`%s`", value)
+ }
+}
+
+func formatParameterDescriptionYAML(description string) string {
+ const (
+ indentLen = 10
+ prefix = "# "
+ lineLen = 80
+ tmpNewLine = "〠"
+ )
+
+ // remove markdown new lines
+ description = strings.ReplaceAll(description, "\n\n", tmpNewLine)
+ description = strings.ReplaceAll(description, "\n", " ")
+ description = strings.ReplaceAll(description, tmpNewLine, "\n")
+
+ formattedDescription := formatMultiline(description, strings.Repeat(" ", indentLen)+prefix, lineLen)
+ // remove first indent and last new line
+ formattedDescription = formattedDescription[indentLen : len(formattedDescription)-1]
+ return formattedDescription
+}
+
+func formatMultiline(
+ input string,
+ prefix string,
+ maxLineLen int,
+) string {
+ textLen := maxLineLen - len(prefix)
+
+ // split the input into lines of length textLen
+ lines := strings.Split(input, "\n")
+ var formattedLines []string
+ for _, line := range lines {
+ if len(line) <= textLen {
+ formattedLines = append(formattedLines, line)
+ continue
+ }
+
+ // split the line into multiple lines, don't break words
+ words := strings.Fields(line)
+ var formattedLine string
+ for _, word := range words {
+ if len(formattedLine)+len(word) > textLen {
+ formattedLines = append(formattedLines, formattedLine[1:])
+ formattedLine = ""
+ }
+ formattedLine += " " + word
+ }
+ if formattedLine != "" {
+ formattedLines = append(formattedLines, formattedLine[1:])
+ }
+ }
+
+ // combine lines including indent and prefix
+ var formatted string
+ for _, line := range formattedLines {
+ formatted += prefix + line + "\n"
+ }
+
+ return formatted
+}
+
+func formatParameterValueYAML(value string) string {
+ switch {
+ case value == "":
+ return `""`
+ case strings.Contains(value, "\n"):
+ // specifically used in the javascript processor
+ formattedValue := formatMultiline(value, " ", 10000)
+ return fmt.Sprintf("|\n%s", formattedValue)
+ default:
+ return fmt.Sprintf(`'%s'`, value)
+ }
+}
+
+const (
+ defaultDestination = "file"
+ defaultSource = "generator"
+)
+
+type pipelineTemplate struct {
+ Name string
+ SourceSpec connectorTemplate
+ DestinationSpec connectorTemplate
+}
+
+type connectorTemplate struct {
+ Name string
+ Params config.Parameters
+}
+
+type PipelinesInitArgs struct {
+ Name string
+ Source string
+ Destination string
+ Path string
+}
+
+type PipelinesInit struct {
+ args PipelinesInitArgs
+}
+
+func NewPipelinesInit(args PipelinesInitArgs) *PipelinesInit {
+ return &PipelinesInit{args: args}
+}
+
+func (pi *PipelinesInit) Run() error {
+ var pipeline pipelineTemplate
+ // if no source/destination arguments are provided,
+ // we build a runnable example pipeline
+ if pi.args.Source == "" && pi.args.Destination == "" {
+ pipeline = pi.buildDemoPipeline()
+ } else {
+ p, err := pi.buildTemplatePipeline()
+ if err != nil {
+ return err
+ }
+ pipeline = p
+ }
+
+ err := pi.write(pipeline)
+ if err != nil {
+ return cerrors.Errorf("could not write pipeline: %w", err)
+ }
+
+ fmt.Printf(`Your pipeline has been initialized and created at %s.
+
+To run the pipeline, execute:
+
+conduit --pipelines.path %s`,
+ pi.configFilePath(), pi.configFilePath())
+
+ return nil
+}
+
+func (pi *PipelinesInit) buildTemplatePipeline() (pipelineTemplate, error) {
+ srcParams, err := pi.getSourceParams()
+ if err != nil {
+ return pipelineTemplate{}, cerrors.Errorf("failed getting source params: %w", err)
+ }
+
+ dstParams, err := pi.getDestinationParams()
+ if err != nil {
+ return pipelineTemplate{}, cerrors.Errorf("failed getting destination params: %w", err)
+ }
+
+ return pipelineTemplate{
+ Name: pi.pipelineName(),
+ SourceSpec: srcParams,
+ DestinationSpec: dstParams,
+ }, nil
+}
+
+func (pi *PipelinesInit) buildDemoPipeline() pipelineTemplate {
+ srcParams, _ := pi.getSourceParams()
+ dstParams, _ := pi.getDestinationParams()
+
+ return pipelineTemplate{
+ Name: pi.pipelineName(),
+ SourceSpec: connectorTemplate{
+ Name: defaultSource,
+ Params: map[string]config.Parameter{
+ "format.type": {
+ Description: srcParams.Params["format.type"].Description,
+ Type: srcParams.Params["format.type"].Type,
+ Default: "structured",
+ Validations: srcParams.Params["format.type"].Validations,
+ },
+ "format.options.scheduledDeparture": {
+ Description: "Generate field 'scheduledDeparture' of type 'time'",
+ Type: config.ParameterTypeString,
+ Default: "time",
+ },
+ "format.options.airline": {
+ Description: "Generate field 'airline' of type string",
+ Type: config.ParameterTypeString,
+ Default: "string",
+ },
+ "rate": {
+ Description: srcParams.Params["rate"].Description,
+ Type: srcParams.Params["rate"].Type,
+ Default: "1",
+ },
+ },
+ },
+ DestinationSpec: connectorTemplate{
+ Name: defaultDestination,
+ Params: map[string]config.Parameter{
+ "path": {
+ Description: dstParams.Params["path"].Description,
+ Type: dstParams.Params["path"].Type,
+ Default: "./destination.txt",
+ },
+ },
+ },
+ }
+}
+
+func (pi *PipelinesInit) getOutput() *os.File {
+ output, err := os.OpenFile(pi.configFilePath(), os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0o600)
+ if err != nil {
+ log.Fatalf("error: failed to open %s: %v", pi.args.Path, err)
+ }
+
+ return output
+}
+
+func (pi *PipelinesInit) write(pipeline pipelineTemplate) error {
+ t, err := template.New("").Funcs(funcMap).Option("missingkey=zero").Parse(pipelineCfgTmpl)
+ if err != nil {
+ return cerrors.Errorf("failed parsing template: %w", err)
+ }
+
+ output := pi.getOutput()
+ defer output.Close()
+
+ err = t.Execute(output, pipeline)
+ if err != nil {
+ return cerrors.Errorf("failed executing template: %w", err)
+ }
+
+ return nil
+}
+
+func (pi *PipelinesInit) getSourceParams() (connectorTemplate, error) {
+ for _, conn := range builtin.DefaultBuiltinConnectors {
+ specs := conn.NewSpecification()
+ if specs.Name == pi.sourceConnector() || specs.Name == "builtin:"+pi.sourceConnector() {
+ if conn.NewSource == nil {
+ return connectorTemplate{}, cerrors.Errorf("plugin %v has no source", pi.sourceConnector())
+ }
+
+ return connectorTemplate{
+ Name: specs.Name,
+ Params: conn.NewSource().Parameters(),
+ }, nil
+ }
+ }
+
+ return connectorTemplate{}, cerrors.Errorf("%v: %w", pi.sourceConnector(), plugin.ErrPluginNotFound)
+}
+
+func (pi *PipelinesInit) getDestinationParams() (connectorTemplate, error) {
+ for _, conn := range builtin.DefaultBuiltinConnectors {
+ specs := conn.NewSpecification()
+ if specs.Name == pi.destinationConnector() || specs.Name == "builtin:"+pi.destinationConnector() {
+ if conn.NewDestination == nil {
+ return connectorTemplate{}, cerrors.Errorf("plugin %v has no source", pi.destinationConnector())
+ }
+
+ return connectorTemplate{
+ Name: specs.Name,
+ Params: conn.NewDestination().Parameters(),
+ }, nil
+ }
+ }
+
+ return connectorTemplate{}, cerrors.Errorf("%v: %w", pi.destinationConnector(), plugin.ErrPluginNotFound)
+}
+
+func (pi *PipelinesInit) configFilePath() string {
+ path := pi.args.Path
+ if path == "" {
+ path = "./pipelines"
+ }
+
+ return filepath.Join(path, pi.configFileName())
+}
+
+func (pi *PipelinesInit) configFileName() string {
+ return fmt.Sprintf("pipeline-%s.yaml", pi.pipelineName())
+}
+
+func (pi *PipelinesInit) sourceConnector() string {
+ if pi.args.Source != "" {
+ return pi.args.Source
+ }
+
+ return defaultSource
+}
+
+func (pi *PipelinesInit) destinationConnector() string {
+ if pi.args.Destination != "" {
+ return pi.args.Destination
+ }
+
+ return defaultDestination
+}
+
+func (pi *PipelinesInit) pipelineName() string {
+ if pi.args.Name != "" {
+ return pi.args.Name
+ }
+
+ return fmt.Sprintf("%s-to-%s", pi.sourceConnector(), pi.destinationConnector())
+}
diff --git a/cmd/conduit/main.go b/cmd/conduit/main.go
index 89d784a5d..a744d7f3e 100644
--- a/cmd/conduit/main.go
+++ b/cmd/conduit/main.go
@@ -14,8 +14,10 @@
package main
-import "github.com/conduitio/conduit/pkg/conduit"
+import (
+ "github.com/conduitio/conduit/cmd/cli"
+)
func main() {
- conduit.Serve(conduit.DefaultConfig())
+ cli.New().Run()
}
diff --git a/docs/releases.md b/docs/releases.md
index 18edec92d..e5bb99c6c 100644
--- a/docs/releases.md
+++ b/docs/releases.md
@@ -12,69 +12,6 @@ A Conduit release has the following parts:
- a GitHub package, which is the official Docker image for Conduit. It's available on GitHub's Container Registry. The
latest Docker image which is not a nightly is tagged with `latest`.
-## Before a release
-
-### Update dependencies
-
-Dependencies should be updated in the order described below. The instructions
-assume that this repository and the other Conduit repositories are all cloned in
-the same directory.
-
-1. [`conduit-commons`](https://github.com/ConduitIO/conduit-commons)
- - Run `scripts/get-compare-link.sh ../conduit-commons/` to compare the latest tag and the `main` branch.
- - If the changes should be released/tagged, push a new tag.
-2. [`conduit-connector-protocol`](https://github.com/conduitio/conduit-connector-protocol)
- - Update `conduit-commons` if needed: `go get github.com/conduitio/conduit-commons@vX.Y.Z`
- - Run `scripts/get-compare-link.sh ../conduit-connector-protocol/` to compare the latest tag and the `main` branch.
- - If the changes should be released/tagged, push a new tag.
-3. [`conduit-connector-sdk`](https://github.com/ConduitIO/conduit-connector-sdk)
- - Update `conduit-commons` if needed: `go get github.com/conduitio/conduit-commons@vX.Y.Z`
- - Update `conduit-connector-protocol` if needed: `go get github.com/conduitio/conduit-connector-protocol@vX.Y.Z`
- - Run `scripts/get-compare-link.sh ../conduit-connector-sdk/` to compare the latest tag and the `main` branch.
- - If the changes should be released/tagged, push a new tag.
-4. [`conduit-processor-sdk`](https://github.com/ConduitIO/conduit-processor-sdk)
- - Update `conduit-commons` if needed: `go get github.com/conduitio/conduit-commons@vX.Y.Z`
- - Run `scripts/get-compare-link.sh ../conduit-processor-sdk/` to compare the latest tag and the `main` branch.
- - If the changes should be released/tagged, push a new tag.
-5. [`conduit-schema-registry`](https://github.com/ConduitIO/conduit-schema-registry/)
- - Update `conduit-commons` if needed: `go get github.com/conduitio/conduit-commons@vX.Y.Z`
- - Run `scripts/get-compare-link.sh ../conduit-schema-registry/` to compare the latest tag and the `main` branch.
- - If the changes should be released/tagged, push a new tag.
-6. Bump the Connector SDK dependency on [`conduit-connector-template`](https://github.com/ConduitIO/conduit-connector-template)
-7. Bump the Connector SDK in all the built-in connectors: `scripts/bump-sdk-in-connectors.sh vX.Y.Z`
-8. For each of the built-in connectors (file, kafka, generator, s3, postgres, log):
- - Run `scripts/get-compare-link.sh ../conduit-processor-sdk/` to compare the latest tag and the `main` branch.
- - If the changes should be released/tagged, push a new tag.
-9. Bump the built-in connectors: `scripts/bump-builtin-connectors.sh`
-10. Conduit itself
- - Update `conduit-schema-registry` if needed
- - Update `conduit-connector-sdk` if needed
- - Update `conduit-processor-sdk` if needed
- - Update `conduit-connector-protocol` if needed
- - Update `conduit-commons` if needed
- - Release Conduit (see instructions below)
-
-## Documentation
-
-1. Write a blog post.
-2. Regenerate processor documentation on [`conduit-site`](https://github.com/ConduitIO/conduit-site).
-3. Update banner on the
- web-site ([example](https://github.com/ConduitIO/conduit-site/pull/47/files#diff-cc8abb6104e21d495dc8f64639c7b03419226d920d1c545df51be9b0b73b2784)).
-4. Search for the latest version in `conduit-site` and replace with new
- version (e.g. search for 0.11.1 and replace with 0.12.0)
-5. Search for the latest version in [README.md](../README.md) and replace with
- new version (e.g. search for 0.11 and replace with 0.12)
-
-## Releasing Conduit
-
-A release is triggered by pushing a new tag which starts with `v` (for example `v1.2.3`). Everything else is then
-handled by GoReleaser and GitHub actions. To push a new tag, please use the script [scripts/tag.sh](https://github.com/ConduitIO/conduit/blob/main/scripts/tag.sh),
-which also checks if the version conforms to SemVer. Example:
-
-```sh
-scripts/tag.sh 1.2.3
-```
-
## Nightly builds
We provide nightly builds (binaries and Docker images) and keep them for 7 days. The latest nightly Docker image is tagged
@@ -94,3 +31,11 @@ Docker images.
The "Trigger nightly build" GH action requires a personal access token, and _not_ a GitHub token provided by Actions. The
reason is that a workflow which produces an event using a GitHub token cannot trigger another workflow through that event.
For more information, please check [Triggering a workflow from a workflow](https://docs.github.com/en/actions/using-workflows/triggering-a-workflow#triggering-a-workflow-from-a-workflow).
+
+## How to release
+
+In order to create a new Conduit release, you'll need to create a new issue using the [Conduit release template](https://github.com/ConduitIO/conduit/issues/new?assignees=&labels=release&projects=&template=4-conduit-release.yml&title=%5BRelease%5D+Conduit+vX.Y.Z).
+
+The issue will guide you through the process of creating a new release.
+
+It will also provide you with a checklist to make sure you don't forget anything.
diff --git a/go.mod b/go.mod
index e8a2dc8df..6972f897e 100644
--- a/go.mod
+++ b/go.mod
@@ -16,9 +16,9 @@ require (
github.com/conduitio/conduit-connector-postgres v0.9.1
github.com/conduitio/conduit-connector-protocol v0.8.0
github.com/conduitio/conduit-connector-s3 v0.7.0
- github.com/conduitio/conduit-connector-sdk v0.11.0
+ github.com/conduitio/conduit-connector-sdk v0.11.1
github.com/conduitio/conduit-processor-sdk v0.3.0
- github.com/conduitio/conduit-schema-registry v0.2.0
+ github.com/conduitio/conduit-schema-registry v0.2.1
github.com/conduitio/yaml/v3 v3.3.0
github.com/dop251/goja v0.0.0-20240806095544-3491d4a58fbe
github.com/dop251/goja_nodejs v0.0.0-20231122114759-e84d9a924c5c
@@ -28,7 +28,7 @@ require (
github.com/google/go-cmp v0.6.0
github.com/google/uuid v1.6.0
github.com/gorilla/websocket v1.5.3
- github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0
+ github.com/grpc-ecosystem/grpc-gateway/v2 v2.23.0
github.com/hashicorp/go-hclog v1.6.3
github.com/hashicorp/go-plugin v1.6.2
github.com/jpillora/backoff v1.0.0
@@ -40,6 +40,8 @@ require (
github.com/prometheus/client_model v0.6.1
github.com/prometheus/common v0.60.1
github.com/rs/zerolog v1.33.0
+ github.com/sourcegraph/conc v0.3.0
+ github.com/spf13/cobra v1.8.1
github.com/stealthrocket/wazergo v0.19.1
github.com/tetratelabs/wazero v1.8.1
github.com/twmb/franz-go/pkg/sr v1.2.0
@@ -48,8 +50,8 @@ require (
golang.org/x/exp v0.0.0-20241009180824-f66d83c29e7c
golang.org/x/tools v0.26.0
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028
- google.golang.org/genproto/googleapis/api v0.0.0-20240930140551-af27646dc61f
- google.golang.org/grpc v1.67.1
+ google.golang.org/genproto/googleapis/api v0.0.0-20241021214115-324edc3d5d38
+ google.golang.org/grpc v1.68.0
google.golang.org/protobuf v1.35.1
gopkg.in/tomb.v2 v2.0.0-20161208151619-d5d1b5820637
mvdan.cc/gofumpt v0.7.0
@@ -311,11 +313,9 @@ require (
github.com/sivchari/containedctx v1.0.3 // indirect
github.com/sivchari/tenv v1.10.0 // indirect
github.com/sonatard/noctx v0.0.2 // indirect
- github.com/sourcegraph/conc v0.3.0 // indirect
github.com/sourcegraph/go-diff v0.7.0 // indirect
github.com/spf13/afero v1.11.0 // indirect
github.com/spf13/cast v1.7.0 // indirect
- github.com/spf13/cobra v1.8.1 // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/spf13/viper v1.19.0 // indirect
github.com/ssgreg/nlreturn/v2 v2.2.1 // indirect
@@ -369,7 +369,7 @@ require (
golang.org/x/term v0.25.0 // indirect
golang.org/x/text v0.19.0 // indirect
golang.org/x/time v0.7.0 // indirect
- google.golang.org/genproto/googleapis/rpc v0.0.0-20240930140551-af27646dc61f // indirect
+ google.golang.org/genproto/googleapis/rpc v0.0.0-20241021214115-324edc3d5d38 // indirect
gopkg.in/ini.v1 v1.67.0 // indirect
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
diff --git a/go.sum b/go.sum
index 1d196e151..042c38e84 100644
--- a/go.sum
+++ b/go.sum
@@ -236,12 +236,12 @@ github.com/conduitio/conduit-connector-protocol v0.8.0 h1:HnuvFC30H3v/aw/czEvKFe
github.com/conduitio/conduit-connector-protocol v0.8.0/go.mod h1:qCRWXzp1vMN3PjuacY8FTLizp7gyE8wzgk6ieujB1zg=
github.com/conduitio/conduit-connector-s3 v0.7.0 h1:dRRvk++hlSQ+F36r1D4s1Ob9qpmT+IyaD8MEwH0M9ac=
github.com/conduitio/conduit-connector-s3 v0.7.0/go.mod h1:IjipaXERahO3hP6yBBia9FINkoYAsUdDMdszZQGTSEM=
-github.com/conduitio/conduit-connector-sdk v0.11.0 h1:u4zEOJl/FHqWY/m7CYIhKk8N0J2bwAo5AGp0lEzAqcE=
-github.com/conduitio/conduit-connector-sdk v0.11.0/go.mod h1:wuy6E77sR5GRbi3sYh81WFemJaUWT/KDBxQKvkaHpA4=
+github.com/conduitio/conduit-connector-sdk v0.11.1 h1:UzwburNYLYRDfJV06x1K7JXAdQ+HnxLkm4vnEq3c+pY=
+github.com/conduitio/conduit-connector-sdk v0.11.1/go.mod h1:aAgT+uw0IJBNDI4KIOiQW7vhR7tC2r+ETK1BL2bIC28=
github.com/conduitio/conduit-processor-sdk v0.3.0 h1:ZLvkPrgjnHc/fkivHi959mN9QWwLAPNCsaGpdPs48u4=
github.com/conduitio/conduit-processor-sdk v0.3.0/go.mod h1:tnep/roIO45cxU8F9YpgjtZpQ1y4CBXZU8+YV963bUs=
-github.com/conduitio/conduit-schema-registry v0.2.0 h1:OVgjiyXxQEup1jTmFYrKHcetWL7Dmw7h5w8jGlWkB1I=
-github.com/conduitio/conduit-schema-registry v0.2.0/go.mod h1:lKvHtr1bvV9SD+5JkheXo2EG+nmEGaVoIgZw0zFfbcw=
+github.com/conduitio/conduit-schema-registry v0.2.1 h1:VAJ4mvYCyGqBO4d4DjGoUDGQ12OTEWEQ5nMgN38W9Cg=
+github.com/conduitio/conduit-schema-registry v0.2.1/go.mod h1:l8H2Ci4xtLuv8M15DvGQtmDYppdJT/P5DywO03WcLm4=
github.com/conduitio/yaml/v3 v3.3.0 h1:kbbaOSHcuH39gP4+rgbJGl6DSbLZcJgEaBvkEXJlCsI=
github.com/conduitio/yaml/v3 v3.3.0/go.mod h1:JNgFMOX1t8W4YJuRZOh6GggVtSMsgP9XgTw+7dIenpc=
github.com/containerd/cgroups/v3 v3.0.3 h1:S5ByHZ/h9PMe5IOQoN7E+nMc2UcLEM/V48DGDJ9kip0=
@@ -492,8 +492,8 @@ github.com/gostaticanalysis/nilerr v0.1.1/go.mod h1:wZYb6YI5YAxxq0i1+VJbY0s2YONW
github.com/gostaticanalysis/testutil v0.3.1-0.20210208050101-bfb5c8eec0e4/go.mod h1:D+FIZ+7OahH3ePw/izIEeH5I06eKs1IKI4Xr64/Am3M=
github.com/gostaticanalysis/testutil v0.4.0 h1:nhdCmubdmDF6VEatUNjgUZBJKWRqugoISdUv3PPQgHY=
github.com/gostaticanalysis/testutil v0.4.0/go.mod h1:bLIoPefWXrRi/ssLFWX1dx7Repi5x3CuviD3dgAZaBU=
-github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0 h1:asbCHRVmodnJTuQ3qamDwqVOIjwqUPTYmYuemVOx+Ys=
-github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0/go.mod h1:ggCgvZ2r7uOoQjOyu2Y1NhHmEPPzzuhWgcza5M1Ji1I=
+github.com/grpc-ecosystem/grpc-gateway/v2 v2.23.0 h1:ad0vkEBuk23VJzZR9nkLVG0YAoN9coASF1GusYX6AlU=
+github.com/grpc-ecosystem/grpc-gateway/v2 v2.23.0/go.mod h1:igFoXX2ELCW06bol23DWPB5BEWfZISOzSP5K2sbLea0=
github.com/hamba/avro/v2 v2.26.0 h1:IaT5l6W3zh7K67sMrT2+RreJyDTllBGVJm4+Hedk9qE=
github.com/hamba/avro/v2 v2.26.0/go.mod h1:I8glyswHnpED3Nlx2ZdUe+4LJnCOOyiCzLMno9i/Uu0=
github.com/hashicorp/go-hclog v1.6.3 h1:Qr2kF+eVWjTiYmU7Y31tYlP1h0q/X3Nl3tPGdaB11/k=
@@ -1248,10 +1248,10 @@ google.golang.org/genproto v0.0.0-20200204135345-fa8e72b47b90/go.mod h1:GmwEX6Z4
google.golang.org/genproto v0.0.0-20200212174721-66ed5ce911ce/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
google.golang.org/genproto v0.0.0-20200224152610-e50cd9704f63/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
-google.golang.org/genproto/googleapis/api v0.0.0-20240930140551-af27646dc61f h1:jTm13A2itBi3La6yTGqn8bVSrc3ZZ1r8ENHlIXBfnRA=
-google.golang.org/genproto/googleapis/api v0.0.0-20240930140551-af27646dc61f/go.mod h1:CLGoBuH1VHxAUXVPP8FfPwPEVJB6lz3URE5mY2SuayE=
-google.golang.org/genproto/googleapis/rpc v0.0.0-20240930140551-af27646dc61f h1:cUMEy+8oS78BWIH9OWazBkzbr090Od9tWBNtZHkOhf0=
-google.golang.org/genproto/googleapis/rpc v0.0.0-20240930140551-af27646dc61f/go.mod h1:UqMtugtsSgubUsoxbuAoiCXvqvErP7Gf0so0mK9tHxU=
+google.golang.org/genproto/googleapis/api v0.0.0-20241021214115-324edc3d5d38 h1:2oV8dfuIkM1Ti7DwXc0BJfnwr9csz4TDXI9EmiI+Rbw=
+google.golang.org/genproto/googleapis/api v0.0.0-20241021214115-324edc3d5d38/go.mod h1:vuAjtvlwkDKF6L1GQ0SokiRLCGFfeBUXWr/aFFkHACc=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20241021214115-324edc3d5d38 h1:zciRKQ4kBpFgpfC5QQCVtnnNAcLIqweL7plyZRQHVpI=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20241021214115-324edc3d5d38/go.mod h1:GX3210XPVPUjJbTUbvwI8f2IpZDMZuPJWDzDuebbviI=
google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38=
google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM=
@@ -1261,8 +1261,8 @@ google.golang.org/grpc v1.26.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8
google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
google.golang.org/grpc v1.27.1/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc=
-google.golang.org/grpc v1.67.1 h1:zWnc1Vrcno+lHZCOofnIMvycFcc0QRGIzm9dhnDX68E=
-google.golang.org/grpc v1.67.1/go.mod h1:1gLDyUQU7CTLJI90u3nXZ9ekeghjeM7pTDZlqFNg2AA=
+google.golang.org/grpc v1.68.0 h1:aHQeeJbo8zAkAa3pRzrVjZlbz6uSfeOXlJNQM0RAbz0=
+google.golang.org/grpc v1.68.0/go.mod h1:fmSPC5AsjSBCK54MyHRx48kpOti1/jRfOlwEWywNjWA=
google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
diff --git a/pkg/conduit/config.go b/pkg/conduit/config.go
index 6d15cffd0..0f9e3639e 100644
--- a/pkg/conduit/config.go
+++ b/pkg/conduit/config.go
@@ -16,6 +16,7 @@ package conduit
import (
"os"
+ "path/filepath"
"time"
"github.com/conduitio/conduit-commons/database"
@@ -111,6 +112,11 @@ type Config struct {
}
}
+ Preview struct {
+ // PipelineArchV2 enables the new pipeline architecture.
+ PipelineArchV2 bool
+ }
+
dev struct {
cpuprofile string
memprofile string
@@ -119,12 +125,16 @@ type Config struct {
}
func DefaultConfig() Config {
+ return DefaultConfigWithBasePath(".")
+}
+
+func DefaultConfigWithBasePath(basePath string) Config {
var cfg Config
cfg.DB.Type = DBTypeBadger
- cfg.DB.Badger.Path = "conduit.db"
+ cfg.DB.Badger.Path = filepath.Join(basePath, "conduit.db")
cfg.DB.Postgres.Table = "conduit_kv_store"
- cfg.DB.SQLite.Path = "conduit.db"
+ cfg.DB.SQLite.Path = filepath.Join(basePath, "conduit.db")
cfg.DB.SQLite.Table = "conduit_kv_store"
cfg.API.Enabled = true
@@ -135,11 +145,11 @@ func DefaultConfig() Config {
cfg.Log.Level = "info"
cfg.Log.Format = "cli"
- cfg.Connectors.Path = "./connectors"
+ cfg.Connectors.Path = filepath.Join(basePath, "connectors")
- cfg.Processors.Path = "./processors"
+ cfg.Processors.Path = filepath.Join(basePath, "processors")
- cfg.Pipelines.Path = "./pipelines"
+ cfg.Pipelines.Path = filepath.Join(basePath, "pipelines")
cfg.Pipelines.ErrorRecovery.MinDelay = time.Second
cfg.Pipelines.ErrorRecovery.MaxDelay = 10 * time.Minute
cfg.Pipelines.ErrorRecovery.BackoffFactor = 2
@@ -263,7 +273,7 @@ func (c Config) Validate() error {
}
// check if folder exists
_, err = os.Stat(c.Pipelines.Path)
- if c.Pipelines.Path != "./pipelines" && os.IsNotExist(err) {
+ if c.Pipelines.Path != "pipelines" && os.IsNotExist(err) {
return invalidConfigFieldErr("pipelines.path")
}
diff --git a/pkg/conduit/entrypoint.go b/pkg/conduit/entrypoint.go
index e175b3374..a3bf84b4b 100644
--- a/pkg/conduit/entrypoint.go
+++ b/pkg/conduit/entrypoint.go
@@ -27,10 +27,8 @@ import (
"github.com/peterbourgon/ff/v3/ffyaml"
)
-// Serve is a shortcut for Entrypoint.Serve.
-func Serve(cfg Config) {
- e := &Entrypoint{}
- e.Serve(cfg)
+var DeprecatedFlags = map[string]bool{
+ "pipelines.exit-on-error": true,
}
const (
@@ -38,6 +36,12 @@ const (
exitCodeInterrupt = 2
)
+// Serve is a shortcut for Entrypoint.Serve.
+func Serve(cfg Config) {
+ e := &Entrypoint{}
+ e.Serve(cfg)
+}
+
// Entrypoint provides methods related to the Conduit entrypoint (parsing
// config, managing interrupt signals etc.).
type Entrypoint struct{}
@@ -52,8 +56,9 @@ type Entrypoint struct{}
// - environment variables
// - config file (lowest priority)
func (e *Entrypoint) Serve(cfg Config) {
- flags := e.Flags(&cfg)
+ flags := Flags(&cfg)
e.ParseConfig(flags)
+
if cfg.Log.Format == "cli" {
_, _ = fmt.Fprintf(os.Stdout, "%s\n", e.Splash())
}
@@ -73,7 +78,7 @@ func (e *Entrypoint) Serve(cfg Config) {
// Flags returns a flag set that, when parsed, stores the values in the provided
// config struct.
-func (*Entrypoint) Flags(cfg *Config) *flag.FlagSet {
+func Flags(cfg *Config) *flag.FlagSet {
// TODO extract flags from config struct rather than defining flags manually
flags := flag.NewFlagSet("conduit", flag.ExitOnError)
@@ -156,22 +161,19 @@ func (*Entrypoint) Flags(cfg *Config) *flag.FlagSet {
flags.StringVar(&cfg.SchemaRegistry.Type, "schema-registry.type", cfg.SchemaRegistry.Type, "schema registry type; accepts builtin,confluent")
flags.StringVar(&cfg.SchemaRegistry.Confluent.ConnectionString, "schema-registry.confluent.connection-string", cfg.SchemaRegistry.Confluent.ConnectionString, "confluent schema registry connection string")
+ flags.BoolVar(&cfg.Preview.PipelineArchV2, "preview.pipeline-arch-v2", cfg.Preview.PipelineArchV2, "enables experimental pipeline architecture v2 (note that the new architecture currently supports only 1 source and 1 destination per pipeline)")
+
// NB: flags with prefix dev.* are hidden from help output by default, they only show up using '-dev -help'
showDevHelp := flags.Bool("dev", false, "used together with the dev flag it shows dev flags")
flags.StringVar(&cfg.dev.cpuprofile, "dev.cpuprofile", "", "write cpu profile to file")
flags.StringVar(&cfg.dev.memprofile, "dev.memprofile", "", "write memory profile to file")
flags.StringVar(&cfg.dev.blockprofile, "dev.blockprofile", "", "write block profile to file")
- // Deprecated flags that are hidden from help output
- deprecatedFlags := map[string]bool{
- "pipelines.exit-on-error": true,
- }
-
// show user or dev flags
flags.Usage = func() {
tmpFlags := flag.NewFlagSet("conduit", flag.ExitOnError)
flags.VisitAll(func(f *flag.Flag) {
- if f.Name == "dev" || strings.HasPrefix(f.Name, "dev.") != *showDevHelp || deprecatedFlags[f.Name] {
+ if f.Name == "dev" || strings.HasPrefix(f.Name, "dev.") != *showDevHelp || DeprecatedFlags[f.Name] {
return // hide flag from output
}
// reset value to its default, to ensure default is shown correctly
diff --git a/pkg/conduit/runtime.go b/pkg/conduit/runtime.go
index f1fcc7de6..74df8dcb2 100644
--- a/pkg/conduit/runtime.go
+++ b/pkg/conduit/runtime.go
@@ -46,6 +46,7 @@ import (
"github.com/conduitio/conduit/pkg/foundation/metrics/measure"
"github.com/conduitio/conduit/pkg/foundation/metrics/prometheus"
"github.com/conduitio/conduit/pkg/lifecycle"
+ lifecycle_v2 "github.com/conduitio/conduit/pkg/lifecycle-poc"
"github.com/conduitio/conduit/pkg/orchestrator"
"github.com/conduitio/conduit/pkg/pipeline"
conn_plugin "github.com/conduitio/conduit/pkg/plugin/connector"
@@ -77,7 +78,7 @@ import (
)
const (
- exitTimeout = 10 * time.Second
+ exitTimeout = 30 * time.Second
)
// Runtime sets up all services for serving and monitoring a Conduit instance.
@@ -95,7 +96,7 @@ type Runtime struct {
pipelineService *pipeline.Service
connectorService *connector.Service
processorService *processor.Service
- lifecycleService *lifecycle.Service
+ lifecycleService lifecycleService
connectorPluginService *conn_plugin.PluginService
processorPluginService *proc_plugin.PluginService
@@ -107,6 +108,14 @@ type Runtime struct {
logger log.CtxLogger
}
+// lifecycleService is an interface that we use temporarily to allow for
+// both the old and new lifecycle services to be used interchangeably.
+type lifecycleService interface {
+ Start(ctx context.Context, pipelineID string) error
+ Stop(ctx context.Context, pipelineID string, force bool) error
+ Init(ctx context.Context) error
+}
+
// NewRuntime sets up a Runtime instance and primes it for start.
func NewRuntime(cfg Config) (*Runtime, error) {
if err := cfg.Validate(); err != nil {
@@ -203,21 +212,28 @@ func createServices(r *Runtime) error {
tokenService,
)
- // Error recovery configuration
- errRecoveryCfg := &lifecycle.ErrRecoveryCfg{
- MinDelay: r.Config.Pipelines.ErrorRecovery.MinDelay,
- MaxDelay: r.Config.Pipelines.ErrorRecovery.MaxDelay,
- BackoffFactor: r.Config.Pipelines.ErrorRecovery.BackoffFactor,
- MaxRetries: r.Config.Pipelines.ErrorRecovery.MaxRetries,
- MaxRetriesWindow: r.Config.Pipelines.ErrorRecovery.MaxRetriesWindow,
- }
-
plService := pipeline.NewService(r.logger, r.DB)
connService := connector.NewService(r.logger, r.DB, r.connectorPersister)
procService := processor.NewService(r.logger, r.DB, procPluginService)
- lifecycleService := lifecycle.NewService(r.logger, errRecoveryCfg, connService, procService, connPluginService, plService)
- provisionService := provisioning.NewService(r.DB, r.logger, plService, connService, procService, connPluginService, lifecycleService, r.Config.Pipelines.Path)
+ var lifecycleService lifecycleService
+ if r.Config.Preview.PipelineArchV2 {
+ r.logger.Info(context.Background()).Msg("using lifecycle service v2")
+ lifecycleService = lifecycle_v2.NewService(r.logger, connService, procService, connPluginService, plService)
+ } else {
+ // Error recovery configuration
+ errRecoveryCfg := &lifecycle.ErrRecoveryCfg{
+ MinDelay: r.Config.Pipelines.ErrorRecovery.MinDelay,
+ MaxDelay: r.Config.Pipelines.ErrorRecovery.MaxDelay,
+ BackoffFactor: r.Config.Pipelines.ErrorRecovery.BackoffFactor,
+ MaxRetries: r.Config.Pipelines.ErrorRecovery.MaxRetries,
+ MaxRetriesWindow: r.Config.Pipelines.ErrorRecovery.MaxRetriesWindow,
+ }
+
+ lifecycleService = lifecycle.NewService(r.logger, errRecoveryCfg, connService, procService, connPluginService, plService)
+ }
+
+ provisionService := provisioning.NewService(r.DB, r.logger, plService, connService, procService, connPluginService, lifecycleService, r.Config.Pipelines.Path)
orc := orchestrator.NewOrchestrator(r.DB, r.logger, plService, connService, procService, connPluginService, procPluginService, lifecycleService)
r.Orchestrator = orc
@@ -415,6 +431,15 @@ func (r *Runtime) initProfiling(ctx context.Context) (deferred func(), err error
}
func (r *Runtime) registerCleanup(t *tomb.Tomb) {
+ if r.Config.Preview.PipelineArchV2 {
+ r.registerCleanupV2(t)
+ } else {
+ r.registerCleanupV1(t)
+ }
+}
+
+func (r *Runtime) registerCleanupV1(t *tomb.Tomb) {
+ ls := r.lifecycleService.(*lifecycle.Service)
t.Go(func() error {
<-t.Dying()
// start cleanup with a fresh context
@@ -423,12 +448,12 @@ func (r *Runtime) registerCleanup(t *tomb.Tomb) {
// t.Err() can be nil, when we had a call: t.Kill(nil)
// t.Err() will be context.Canceled, if the tomb's context was canceled
if t.Err() == nil || cerrors.Is(t.Err(), context.Canceled) {
- r.lifecycleService.StopAll(ctx, pipeline.ErrGracefulShutdown)
+ ls.StopAll(ctx, pipeline.ErrGracefulShutdown)
} else {
// tomb died due to a real error
- r.lifecycleService.StopAll(ctx, cerrors.Errorf("conduit experienced an error: %w", t.Err()))
+ ls.StopAll(ctx, cerrors.Errorf("conduit experienced an error: %w", t.Err()))
}
- err := r.lifecycleService.Wait(exitTimeout)
+ err := ls.Wait(exitTimeout)
t.Go(func() error {
r.connectorPersister.Wait()
return r.DB.Close()
@@ -437,6 +462,62 @@ func (r *Runtime) registerCleanup(t *tomb.Tomb) {
})
}
+func (r *Runtime) registerCleanupV2(t *tomb.Tomb) {
+ ls := r.lifecycleService.(*lifecycle_v2.Service)
+ t.Go(func() error {
+ <-t.Dying()
+ // start cleanup with a fresh context
+ ctx := context.Background()
+
+ err := ls.StopAll(ctx, false)
+ if err != nil {
+ r.logger.Err(ctx, err).Msg("some pipelines stopped with an error")
+ }
+
+ // Wait for the pipelines to stop
+ const (
+ count = 6
+ interval = exitTimeout / count
+ )
+
+ pipelinesStopped := make(chan struct{})
+ go func() {
+ for i := count; i > 0; i-- {
+ if i == 1 {
+ // on last try, stop forcefully
+ _ = ls.StopAll(ctx, true)
+ }
+
+ r.logger.Info(ctx).Msgf("waiting for pipelines to stop running (time left: %s)", time.Duration(i)*interval)
+ select {
+ case <-time.After(interval):
+ case <-pipelinesStopped:
+ return
+ }
+ }
+ }()
+
+ err = ls.Wait(exitTimeout)
+ switch {
+ case err != nil && err != context.DeadlineExceeded:
+ r.logger.Warn(ctx).Err(err).Msg("some pipelines stopped with an error")
+ case err == context.DeadlineExceeded:
+ r.logger.Warn(ctx).Msg("some pipelines did not stop in time")
+ default:
+ r.logger.Info(ctx).Msg("all pipelines stopped gracefully")
+ }
+
+ pipelinesStopped <- struct{}{}
+
+ t.Go(func() error {
+ r.connectorPersister.Wait()
+ return r.DB.Close()
+ })
+
+ return nil
+ })
+}
+
func (r *Runtime) newHTTPMetricsHandler() http.Handler {
return promhttp.Handler()
}
@@ -770,13 +851,25 @@ func (r *Runtime) initServices(ctx context.Context, t *tomb.Tomb) error {
}
if r.Config.Pipelines.ExitOnDegraded {
- r.lifecycleService.OnFailure(func(e lifecycle.FailureEvent) {
- r.logger.Warn(ctx).
- Err(e.Error).
- Str(log.PipelineIDField, e.ID).
- Msg("Conduit will shut down due to a pipeline failure and 'exit-on-degraded' enabled")
- t.Kill(cerrors.Errorf("shut down due to 'exit-on-degraded' error: %w", e.Error))
- })
+ if r.Config.Preview.PipelineArchV2 {
+ ls := r.lifecycleService.(*lifecycle_v2.Service)
+ ls.OnFailure(func(e lifecycle_v2.FailureEvent) {
+ r.logger.Warn(ctx).
+ Err(e.Error).
+ Str(log.PipelineIDField, e.ID).
+ Msg("Conduit will shut down due to a pipeline failure and 'exit-on-degraded' enabled")
+ t.Kill(cerrors.Errorf("shut down due to 'exit-on-degraded' error: %w", e.Error))
+ })
+ } else {
+ ls := r.lifecycleService.(*lifecycle.Service)
+ ls.OnFailure(func(e lifecycle.FailureEvent) {
+ r.logger.Warn(ctx).
+ Err(e.Error).
+ Str(log.PipelineIDField, e.ID).
+ Msg("Conduit will shut down due to a pipeline failure and 'exit-on-degraded' enabled")
+ t.Kill(cerrors.Errorf("shut down due to 'exit-on-degraded' error: %w", e.Error))
+ })
+ }
}
err = r.pipelineService.Init(ctx)
if err != nil {
diff --git a/pkg/connector/source.go b/pkg/connector/source.go
index 3ce1820d1..1b84161dd 100644
--- a/pkg/connector/source.go
+++ b/pkg/connector/source.go
@@ -16,7 +16,9 @@ package connector
import (
"context"
+ "strconv"
"sync"
+ "time"
"github.com/conduitio/conduit-commons/opencdc"
"github.com/conduitio/conduit-connector-protocol/pconnector"
@@ -153,6 +155,7 @@ func (s *Source) Teardown(ctx context.Context) error {
return plugin.ErrPluginNotRunning
}
+ s.Instance.logger.Debug(ctx).Msg("closing stream")
// close stream
if s.stopStream != nil {
s.stopStream()
@@ -192,8 +195,9 @@ func (s *Source) Read(ctx context.Context) ([]opencdc.Record, error) {
return nil, err
}
+ now := strconv.FormatInt(time.Now().UnixNano(), 10)
for _, r := range resp.Records {
- s.sanitizeRecord(&r)
+ s.sanitizeRecord(&r, now)
}
s.Instance.inspector.Send(ctx, resp.Records)
@@ -375,7 +379,7 @@ func (s *Source) triggerLifecycleEvent(ctx context.Context, oldConfig, newConfig
}
}
-func (s *Source) sanitizeRecord(r *opencdc.Record) {
+func (s *Source) sanitizeRecord(r *opencdc.Record, now string) {
if r.Key == nil {
r.Key = opencdc.RawData{}
}
@@ -385,12 +389,19 @@ func (s *Source) sanitizeRecord(r *opencdc.Record) {
if r.Payload.After == nil {
r.Payload.After = opencdc.RawData{}
}
-
if r.Metadata == nil {
- r.Metadata = opencdc.Metadata{}
+ r.Metadata = opencdc.Metadata{
+ opencdc.MetadataReadAt: now,
+ opencdc.MetadataConduitSourceConnectorID: s.Instance.ID,
+ }
+ } else {
+ if r.Metadata[opencdc.MetadataReadAt] == "" {
+ r.Metadata[opencdc.MetadataReadAt] = now
+ }
+ if r.Metadata[opencdc.MetadataConduitSourceConnectorID] == "" {
+ r.Metadata[opencdc.MetadataConduitSourceConnectorID] = s.Instance.ID
+ }
}
- // source connector ID is added to all records
- r.Metadata.SetConduitSourceConnectorID(s.Instance.ID)
}
func (*Source) isEqual(cfg1, cfg2 map[string]string) bool {
diff --git a/pkg/foundation/metrics/metrics.go b/pkg/foundation/metrics/metrics.go
index 60cb0971c..1d9871b29 100644
--- a/pkg/foundation/metrics/metrics.go
+++ b/pkg/foundation/metrics/metrics.go
@@ -405,14 +405,18 @@ func (mt *labeledHistogram) WithValues(vs ...string) Histogram {
// RecordBytesHistogram wraps a histrogram metric and allows to observe record
// sizes in bytes.
type RecordBytesHistogram struct {
- h Histogram
+ H Histogram
}
func NewRecordBytesHistogram(h Histogram) RecordBytesHistogram {
- return RecordBytesHistogram{h}
+ return RecordBytesHistogram{H: h}
}
func (m RecordBytesHistogram) Observe(r opencdc.Record) {
+ m.H.Observe(m.SizeOf(r))
+}
+
+func (m RecordBytesHistogram) SizeOf(r opencdc.Record) float64 {
// TODO for now we call method Bytes() on key and payload to get the
// bytes representation. In case of a structured payload or key it
// is marshaled into JSON, which might not be the correct way to
@@ -429,5 +433,5 @@ func (m RecordBytesHistogram) Observe(r opencdc.Record) {
if r.Payload.After != nil {
bytes += len(r.Payload.After.Bytes())
}
- m.h.Observe(float64(bytes))
+ return float64(bytes)
}
diff --git a/pkg/lifecycle-poc/funnel/batch.go b/pkg/lifecycle-poc/funnel/batch.go
new file mode 100644
index 000000000..1b3e4ef1f
--- /dev/null
+++ b/pkg/lifecycle-poc/funnel/batch.go
@@ -0,0 +1,178 @@
+// Copyright © 2024 Meroxa, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//go:generate stringer -type=RecordFlag -linecomment
+
+package funnel
+
+import (
+ "fmt"
+ "slices"
+
+ "github.com/conduitio/conduit-commons/opencdc"
+)
+
+// Batch represents a batch of records that are processed together. It keeps
+// track of the status of each record in the batch, and provides methods to
+// update the status of records.
+type Batch struct {
+ records []opencdc.Record
+ recordStatuses []RecordStatus
+ positions []opencdc.Position
+
+ // filterCount is updated any time a record is marked as filtered, to make it
+ // easier to construct the set of active records.
+ filterCount int
+
+ // If a batch is tainted it means that parts need to be either nacked or
+ // retried. Such a batch needs to be split into multiple batches, each
+ // containing only records with the same status (filtered counts as acked).
+ tainted bool
+}
+
+func NewBatch(records []opencdc.Record) *Batch {
+ // Store positions separately, as we need the original positions when acking
+ // records in the source, in case a processor tries to change the position.
+ positions := make([]opencdc.Position, len(records))
+ for i, r := range records {
+ positions[i] = r.Position
+ }
+
+ return &Batch{
+ records: records,
+ recordStatuses: make([]RecordStatus, len(records)),
+ positions: positions,
+ filterCount: 0,
+ tainted: false,
+ }
+}
+
+// Nack marks the record at index i as nacked. If multiple errors are provided,
+// they are assigned to the records starting at index i.
+func (b *Batch) Nack(i int, errs ...error) {
+ b.setFlagWithErr(RecordFlagNack, i, errs)
+ b.tainted = true
+}
+
+// Retry marks the record at index i to be retried. If a second index is
+// provided, all records between i and j are marked as acked. If multiple
+// indices are provided, the method panics.
+func (b *Batch) Retry(i int, j ...int) {
+ b.setFlagNoErr(RecordFlagRetry, i, j...)
+ b.tainted = true
+}
+
+// Filter marks the record at index i as filtered out. If a second index is
+// provided, all records between i and j are marked as filtered. If multiple
+// indices are provided, the method panics.
+func (b *Batch) Filter(i int, j ...int) {
+ b.setFlagNoErr(RecordFlagFilter, i, j...)
+ end := i + 1
+ if len(j) == 1 {
+ end = j[0]
+ }
+ b.filterCount += end - i
+}
+
+// SetRecords replaces the records in the batch starting at index i with the
+// provided records.
+func (b *Batch) SetRecords(i int, recs []opencdc.Record) {
+ copy(b.records[i:], recs)
+}
+
+func (b *Batch) setFlagNoErr(f RecordFlag, i int, j ...int) {
+ switch len(j) {
+ case 0:
+ b.recordStatuses[i].Flag = f
+ b.recordStatuses[i].Error = nil
+ case 1:
+ if i >= j[0] {
+ panic(fmt.Sprintf("invalid range (%d >= %d)", i, j[0]))
+ }
+ for k := i; k < j[0]; k++ {
+ b.recordStatuses[k].Flag = f
+ b.recordStatuses[k].Error = nil
+ }
+ default:
+ panic(fmt.Sprintf("too many arguments (%d)", len(j)))
+ }
+}
+
+func (b *Batch) setFlagWithErr(f RecordFlag, i int, errs []error) {
+ for k, err := range errs {
+ b.recordStatuses[i+k].Flag = f
+ b.recordStatuses[i+k].Error = err
+ }
+}
+
+func (b *Batch) clone() *Batch {
+ records := make([]opencdc.Record, len(b.records))
+ for i, r := range b.records {
+ records[i] = r.Clone()
+ }
+
+ return &Batch{
+ records: records,
+ recordStatuses: slices.Clone(b.recordStatuses),
+ positions: b.positions,
+ tainted: b.tainted,
+ filterCount: b.filterCount,
+ }
+}
+
+func (b *Batch) sub(from, to int) *Batch {
+ filterCount := 0
+ for _, status := range b.recordStatuses[from:to] {
+ if status.Flag == RecordFlagFilter {
+ filterCount++
+ }
+ }
+ return &Batch{
+ records: b.records[from:to],
+ recordStatuses: b.recordStatuses[from:to],
+ positions: b.positions[from:to],
+ filterCount: filterCount,
+ tainted: false,
+ }
+}
+
+// ActiveRecords returns the records that are not filtered.
+func (b *Batch) ActiveRecords() []opencdc.Record {
+ if b.filterCount == 0 {
+ return b.records
+ }
+ active := make([]opencdc.Record, 0, len(b.records)-b.filterCount)
+ for i, r := range b.records {
+ if b.recordStatuses[i].Flag != RecordFlagFilter {
+ active = append(active, r)
+ }
+ }
+ return active
+}
+
+// RecordStatus holds the status of a record in a batch. The flag indicates the
+// status of the record, and the error is set if the record was nacked.
+type RecordStatus struct {
+ Flag RecordFlag
+ Error error
+}
+
+type RecordFlag int
+
+const (
+ RecordFlagAck RecordFlag = iota // ack
+ RecordFlagNack // nack
+ RecordFlagRetry // retry
+ RecordFlagFilter // filter
+)
diff --git a/pkg/lifecycle-poc/funnel/destination.go b/pkg/lifecycle-poc/funnel/destination.go
new file mode 100644
index 000000000..4c0932ff9
--- /dev/null
+++ b/pkg/lifecycle-poc/funnel/destination.go
@@ -0,0 +1,145 @@
+// Copyright © 2024 Meroxa, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//go:generate mockgen -typed -destination=mock/destination.go -package=mock -mock_names=Destination=Destination . Destination
+
+package funnel
+
+import (
+ "bytes"
+ "context"
+ "time"
+
+ "github.com/conduitio/conduit-commons/opencdc"
+ "github.com/conduitio/conduit/pkg/connector"
+ "github.com/conduitio/conduit/pkg/foundation/cerrors"
+ "github.com/conduitio/conduit/pkg/foundation/log"
+ "github.com/conduitio/conduit/pkg/foundation/metrics"
+)
+
+type DestinationTask struct {
+ id string
+ destination Destination
+ logger log.CtxLogger
+
+ timer metrics.Timer
+ histogram metrics.RecordBytesHistogram
+}
+
+type Destination interface {
+ ID() string
+ Open(context.Context) error
+ Write(context.Context, []opencdc.Record) error
+ Ack(context.Context) ([]connector.DestinationAck, error)
+ Teardown(context.Context) error
+ // TODO figure out if we want to handle these errors. This returns errors
+ // coming from the persister, which persists the connector asynchronously.
+ // Are we even interested in these errors in the pipeline? Sounds like
+ // something we could surface and handle globally in the runtime instead.
+ Errors() <-chan error
+}
+
+func NewDestinationTask(
+ id string,
+ destination Destination,
+ logger log.CtxLogger,
+ timer metrics.Timer,
+ histogram metrics.Histogram,
+) *DestinationTask {
+ logger = logger.WithComponent("task:destination")
+ logger.Logger = logger.With().Str(log.ConnectorIDField, id).Logger()
+ return &DestinationTask{
+ id: id,
+ destination: destination,
+ logger: logger,
+ timer: timer,
+ histogram: metrics.NewRecordBytesHistogram(histogram),
+ }
+}
+
+func (t *DestinationTask) ID() string {
+ return t.id
+}
+
+func (t *DestinationTask) Open(ctx context.Context) error {
+ t.logger.Debug(ctx).Msg("opening destination")
+ err := t.destination.Open(ctx)
+ if err != nil {
+ return cerrors.Errorf("failed to open destination connector: %w", err)
+ }
+ t.logger.Debug(ctx).Msg("destination open")
+ return nil
+}
+
+func (t *DestinationTask) Close(ctx context.Context) error {
+ return t.destination.Teardown(ctx)
+}
+
+func (t *DestinationTask) Do(ctx context.Context, batch *Batch) error {
+ records := batch.ActiveRecords()
+ positions := make([]opencdc.Position, len(records))
+ for i, rec := range records {
+ positions[i] = rec.Position
+ }
+
+ start := time.Now()
+ err := t.destination.Write(ctx, records)
+ if err != nil {
+ return cerrors.Errorf("failed to write %d records to destination: %w", len(positions), err)
+ }
+
+ acks := make([]connector.DestinationAck, 0, len(positions))
+ for range len(positions) {
+ acksResp, err := t.destination.Ack(ctx)
+ if err != nil {
+ return cerrors.Errorf("failed to receive acks for %d records from destination: %w", len(positions), err)
+ }
+ t.observeMetrics(records[len(acks):len(acks)+len(acksResp)], start)
+ acks = append(acks, acksResp...)
+ if len(acks) >= len(positions) {
+ break
+ }
+ }
+ if len(acks) != len(positions) {
+ return cerrors.Errorf("received %d acks, but expected %d", len(acks), len(positions))
+ }
+
+ for i, ack := range acks {
+ if !bytes.Equal(positions[i], ack.Position) {
+ // TODO is this a fatal error? Looks like a bug in the connector
+ return cerrors.Errorf("received unexpected ack, expected position %q but got %q", positions[i], ack.Position)
+ }
+ if ack.Error != nil {
+ batch.Nack(i, ack.Error)
+ }
+ }
+
+ return nil
+}
+
+func (t *DestinationTask) observeMetrics(records []opencdc.Record, start time.Time) {
+ // Precalculate sizes so that we don't need to hold a reference to records
+ // and observations can happen in a goroutine.
+ sizes := make([]float64, len(records))
+ for i, rec := range records {
+ sizes[i] = t.histogram.SizeOf(rec)
+ }
+ tookPerRecord := time.Since(start) / time.Duration(len(sizes))
+ go func() {
+ for i := range len(sizes) {
+ t.timer.Update(tookPerRecord)
+ t.histogram.H.Observe(sizes[i])
+ }
+ }()
+}
diff --git a/pkg/lifecycle-poc/funnel/dlq.go b/pkg/lifecycle-poc/funnel/dlq.go
new file mode 100644
index 000000000..31d101783
--- /dev/null
+++ b/pkg/lifecycle-poc/funnel/dlq.go
@@ -0,0 +1,244 @@
+// Copyright © 2024 Meroxa, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package funnel
+
+import (
+ "context"
+ "sync"
+ "time"
+
+ "github.com/conduitio/conduit-commons/opencdc"
+ "github.com/conduitio/conduit/pkg/foundation/cerrors"
+ "github.com/conduitio/conduit/pkg/foundation/log"
+ "github.com/conduitio/conduit/pkg/foundation/metrics"
+)
+
+type DLQ struct {
+ task *DestinationTask
+
+ windowSize int
+ windowNackThreshold int
+
+ // window keeps track of the last N acks and nacks
+ window *dlqWindow
+ m sync.Mutex
+}
+
+func NewDLQ(
+ id string,
+ destination Destination,
+ logger log.CtxLogger,
+ timer metrics.Timer,
+ histogram metrics.Histogram,
+
+ windowSize int,
+ windowNackThreshold int,
+) *DLQ {
+ return &DLQ{
+ task: NewDestinationTask(id, destination, logger, timer, histogram),
+ windowSize: windowSize,
+ windowNackThreshold: windowNackThreshold,
+
+ window: newDLQWindow(windowSize, windowNackThreshold),
+ }
+}
+
+func (d *DLQ) ID() string {
+ return d.task.id
+}
+
+func (d *DLQ) Open(ctx context.Context) error {
+ return d.task.Open(ctx)
+}
+
+func (d *DLQ) Close(ctx context.Context) error {
+ return d.task.Close(ctx)
+}
+
+func (d *DLQ) Ack(_ context.Context, batch *Batch) {
+ if len(batch.records) == 0 {
+ return
+ }
+
+ d.m.Lock()
+ defer d.m.Unlock()
+
+ d.window.Ack(len(batch.records))
+}
+
+func (d *DLQ) Nack(ctx context.Context, batch *Batch, taskID string) (int, error) {
+ if len(batch.records) == 0 {
+ return 0, nil
+ }
+
+ d.m.Lock()
+ defer d.m.Unlock()
+
+ nacked := d.window.Nack(len(batch.records))
+
+ if nacked > 0 {
+ b := batch
+ if nacked < len(batch.records) {
+ b = batch.sub(0, nacked)
+ }
+
+ // The window successfully accepted nacks (at least some of them), send
+ // them to the DLQ.
+ successCount, err := d.sendToDLQ(ctx, b, taskID)
+ if err != nil {
+ // The DLQ write failed, we need to stop the pipeline, as recovering
+ // could lead to an endless loop of restarts.
+ return successCount, cerrors.FatalError(err)
+ }
+ }
+ if nacked < len(batch.records) {
+ // Not all records were nacked, we need to return an error.
+ if d.windowNackThreshold > 0 {
+ // If the threshold is greater than 0 the DLQ is enabled and we
+ // need to respect the threshold by stopping the pipeline with a
+ // fatal error.
+ return nacked, cerrors.FatalError(
+ cerrors.Errorf(
+ "DLQ nack threshold exceeded (%d/%d), original error: %w",
+ d.windowNackThreshold, d.windowSize, batch.recordStatuses[nacked].Error,
+ ),
+ )
+ }
+ // DLQ is disabled, we don't need to wrap the error message
+ return nacked, batch.recordStatuses[nacked].Error
+ }
+
+ return nacked, nil
+}
+
+func (d *DLQ) sendToDLQ(ctx context.Context, batch *Batch, taskID string) (int, error) {
+ // Create a new batch with the DLQ records and write it to the destination.
+ dlqRecords := make([]opencdc.Record, len(batch.records))
+ for i, req := range batch.records {
+ dlqRecords[i] = d.dlqRecord(req, batch.recordStatuses[i], taskID)
+ }
+ dlqBatch := NewBatch(dlqRecords)
+
+ err := d.task.Do(ctx, dlqBatch)
+ if err != nil {
+ return 0, cerrors.Errorf("failed to write %d records to the DLQ: %w", len(dlqRecords), err)
+ }
+
+ ackCount := 0
+ for ackCount < len(dlqRecords) && dlqBatch.recordStatuses[ackCount].Flag == RecordFlagAck {
+ ackCount++
+ }
+ if ackCount < len(dlqRecords) {
+ // Not all records were acked, we need to return an error.
+ return ackCount, cerrors.Errorf("failed to write record %d to the DLQ: %w", ackCount, dlqBatch.recordStatuses[ackCount].Error)
+ }
+
+ return ackCount, nil
+}
+
+func (d *DLQ) dlqRecord(r opencdc.Record, status RecordStatus, taskID string) opencdc.Record {
+ out := opencdc.Record{
+ Position: r.Position,
+ Operation: opencdc.OperationCreate,
+ Metadata: opencdc.Metadata{},
+ Key: nil,
+ Payload: opencdc.Change{
+ Before: nil,
+ After: opencdc.StructuredData(r.Map()), // failed record is stored here
+ },
+ }
+
+ connID, _ := r.Metadata.GetConduitSourceConnectorID()
+
+ out.Metadata.SetCreatedAt(time.Now())
+ out.Metadata.SetConduitSourceConnectorID(connID)
+ out.Metadata.SetConduitDLQNackError(status.Error.Error())
+ out.Metadata.SetConduitDLQNackNodeID(taskID) // TODO rename to DLQNackTaskID
+ return out
+}
+
+// dlqWindow is responsible for tracking the last N nacks/acks and enforce a
+// threshold of nacks that should not be exceeded.
+type dlqWindow struct {
+ // window acts as a ring buffer for storing acks/nacks (true = nack).
+ // When initialized it contains only acks.
+ window []bool
+ // cursor is the index pointing to the last message in the window.
+ cursor int
+ // nackThreshold represents the number of tolerated nacks, if the threshold
+ // is exceeded the window is frozen and returns an error for all further
+ // nacks.
+ nackThreshold int
+
+ ackCount int
+ nackCount int
+}
+
+func newDLQWindow(size, threshold int) *dlqWindow {
+ if size > 0 && threshold == 0 {
+ // optimization - if threshold is 0 the window size does not matter,
+ // setting it to 1 ensures we don't use more memory than needed
+ size = 1
+ }
+ return &dlqWindow{
+ window: make([]bool, size),
+ cursor: 0,
+ nackThreshold: threshold,
+
+ ackCount: size,
+ nackCount: 0,
+ }
+}
+
+// Ack stores an ack in the window.
+func (w *dlqWindow) Ack(count int) {
+ _ = w.store(count, false)
+}
+
+// Nack stores a nack in the window and returns true (ok). If the nack threshold
+// gets exceeded the window will be frozen and will return false for all further
+// calls to Nack.
+func (w *dlqWindow) Nack(count int) int {
+ return w.store(count, true)
+}
+
+func (w *dlqWindow) store(count int, nacked bool) int {
+ if len(w.window) == 0 || w.nackThreshold < w.nackCount {
+ return 0 // window disabled or threshold already reached
+ }
+
+ for i := range count {
+ // move cursor before updating the window
+ w.cursor = (w.cursor + 1) % len(w.window)
+ if w.window[w.cursor] == nacked {
+ continue // the old message has the same status, nothing changes
+ }
+
+ w.window[w.cursor] = nacked
+ switch nacked {
+ case false:
+ w.nackCount--
+ w.ackCount++
+ case true:
+ w.nackCount++
+ w.ackCount--
+ if w.nackThreshold < w.nackCount {
+ return i
+ }
+ }
+ }
+
+ return count
+}
diff --git a/pkg/lifecycle-poc/funnel/funnel_test.go b/pkg/lifecycle-poc/funnel/funnel_test.go
new file mode 100644
index 000000000..f48183c89
--- /dev/null
+++ b/pkg/lifecycle-poc/funnel/funnel_test.go
@@ -0,0 +1,330 @@
+// Copyright © 2024 Meroxa, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package funnel
+
+import (
+ "context"
+ "fmt"
+ "strconv"
+ "testing"
+ "time"
+
+ "github.com/conduitio/conduit-commons/csync"
+ "github.com/conduitio/conduit-commons/opencdc"
+ "github.com/conduitio/conduit/pkg/connector"
+ "github.com/conduitio/conduit/pkg/foundation/ctxutil"
+ "github.com/conduitio/conduit/pkg/foundation/log"
+ "github.com/conduitio/conduit/pkg/foundation/metrics/noop"
+ funnelmock "github.com/conduitio/conduit/pkg/lifecycle-poc/funnel/mock"
+ "github.com/rs/zerolog"
+ "go.uber.org/mock/gomock"
+)
+
+func Example_simpleStream() {
+ ctx, killAll := context.WithCancel(context.Background())
+ defer killAll()
+
+ logger := newLogger()
+ ctrl := gomockCtrl(logger)
+
+ batchCount := 10
+ batchSize := 1
+
+ dlq := NewDLQ(
+ "dlq",
+ noopDLQDestination(ctrl),
+ logger,
+ noop.Timer{},
+ noop.Histogram{},
+ 1,
+ 0,
+ )
+ srcTask := NewSourceTask(
+ "generator",
+ generatorSource(ctrl, logger, "generator", batchSize, batchCount),
+ logger,
+ noop.Timer{},
+ noop.Histogram{},
+ )
+ destTask := NewDestinationTask(
+ "printer",
+ printerDestination(ctrl, logger, "printer", batchSize),
+ logger,
+ noop.Timer{},
+ noop.Histogram{},
+ )
+
+ w, err := NewWorker(
+ []Task{srcTask, destTask},
+ [][]int{{1}, {}},
+ dlq,
+ logger,
+ noop.Timer{},
+ )
+ if err != nil {
+ panic(err)
+ }
+
+ err = w.Open(ctx)
+ if err != nil {
+ panic(err)
+ }
+
+ var wg csync.WaitGroup
+ wg.Add(1)
+ go func() {
+ defer wg.Done()
+ err := w.Do(ctx)
+ if err != nil {
+ panic(err)
+ }
+ }()
+
+ // stop node after 150ms, which should be enough to process the 10 messages
+ time.AfterFunc(150*time.Millisecond, func() { _ = w.Stop(ctx) })
+
+ if err := wg.WaitTimeout(ctx, time.Second); err != nil {
+ killAll()
+ } else {
+ logger.Info(ctx).Msg("finished successfully")
+ }
+
+ err = w.Close(ctx)
+ if err != nil {
+ panic(err)
+ }
+
+ // Output:
+ // DBG opening source component=task:source connector_id=generator
+ // DBG source open component=task:source connector_id=generator
+ // DBG opening destination component=task:destination connector_id=printer
+ // DBG destination open component=task:destination connector_id=printer
+ // DBG opening destination component=task:destination connector_id=dlq
+ // DBG destination open component=task:destination connector_id=dlq
+ // DBG got record node_id=printer position=generator/0
+ // DBG received ack node_id=generator
+ // DBG got record node_id=printer position=generator/1
+ // DBG received ack node_id=generator
+ // DBG got record node_id=printer position=generator/2
+ // DBG received ack node_id=generator
+ // DBG got record node_id=printer position=generator/3
+ // DBG received ack node_id=generator
+ // DBG got record node_id=printer position=generator/4
+ // DBG received ack node_id=generator
+ // DBG got record node_id=printer position=generator/5
+ // DBG received ack node_id=generator
+ // DBG got record node_id=printer position=generator/6
+ // DBG received ack node_id=generator
+ // DBG got record node_id=printer position=generator/7
+ // DBG received ack node_id=generator
+ // DBG got record node_id=printer position=generator/8
+ // DBG received ack node_id=generator
+ // DBG got record node_id=printer position=generator/9
+ // DBG received ack node_id=generator
+ // INF finished successfully
+}
+
+func BenchmarkStreamNew(b *testing.B) {
+ ctx, killAll := context.WithCancel(context.Background())
+ defer killAll()
+
+ logger := log.Nop()
+ ctrl := gomockCtrl(logger)
+
+ b.ReportAllocs()
+ b.StopTimer()
+ for i := 0; i < b.N; i++ {
+ batchCount := 100
+ batchSize := 1000
+
+ dlq := NewDLQ(
+ "dlq",
+ noopDLQDestination(ctrl),
+ logger,
+ noop.Timer{},
+ noop.Histogram{},
+ 1,
+ 0,
+ )
+ srcTask := NewSourceTask(
+ "generator",
+ generatorSource(ctrl, logger, "generator", batchSize, batchCount),
+ logger,
+ noop.Timer{},
+ noop.Histogram{},
+ )
+ destTask := NewDestinationTask(
+ "printer",
+ printerDestination(ctrl, logger, "printer", batchSize),
+ logger,
+ noop.Timer{},
+ noop.Histogram{},
+ )
+
+ w, err := NewWorker(
+ []Task{srcTask, destTask},
+ [][]int{{1}, {}},
+ dlq,
+ logger,
+ noop.Timer{},
+ )
+ if err != nil {
+ panic(err)
+ }
+
+ b.StartTimer()
+
+ var wg csync.WaitGroup
+ wg.Add(1)
+ go func() {
+ defer wg.Done()
+ err := w.Do(ctx)
+ if err != nil {
+ panic(err)
+ }
+ }()
+
+ // stop node after 150ms, which should be enough to process the 10 messages
+ time.AfterFunc(150*time.Millisecond, func() { _ = w.Stop(ctx) })
+
+ if err := wg.WaitTimeout(ctx, time.Second); err != nil {
+ killAll()
+ }
+
+ err = w.Close(ctx)
+ if err != nil {
+ panic(err)
+ }
+
+ b.StopTimer()
+ }
+}
+
+func newLogger() log.CtxLogger {
+ w := zerolog.NewConsoleWriter()
+ w.NoColor = true
+ w.PartsExclude = []string{zerolog.TimestampFieldName}
+
+ zlogger := zerolog.New(w)
+ zlogger = zlogger.Level(zerolog.DebugLevel)
+ logger := log.New(zlogger)
+ logger.Logger = logger.Hook(ctxutil.MessageIDLogCtxHook{})
+
+ return logger
+}
+
+func generatorSource(ctrl *gomock.Controller, logger log.CtxLogger, nodeID string, batchSize, batchCount int) Source {
+ position := 0
+
+ teardown := make(chan struct{})
+ source := funnelmock.NewSource(ctrl)
+ source.EXPECT().ID().Return(nodeID).AnyTimes()
+ source.EXPECT().Open(gomock.Any()).Return(nil)
+ source.EXPECT().Teardown(gomock.Any()).DoAndReturn(func(context.Context) error {
+ close(teardown)
+ return nil
+ })
+ source.EXPECT().Ack(gomock.Any(), gomock.Any()).DoAndReturn(func(ctx context.Context, p []opencdc.Position) error {
+ logger.Debug(ctx).Str("node_id", nodeID).Msg("received ack")
+ return nil
+ }).Times(batchCount * batchSize)
+ source.EXPECT().Read(gomock.Any()).DoAndReturn(func(ctx context.Context) ([]opencdc.Record, error) {
+ if position == batchCount*batchSize {
+ // block until Teardown is called
+ <-teardown
+ return nil, context.Canceled
+ }
+
+ recs := make([]opencdc.Record, batchSize)
+ for i := 0; i < batchSize; i++ {
+ recs[i] = opencdc.Record{
+ Metadata: opencdc.Metadata{
+ opencdc.MetadataConduitSourceConnectorID: nodeID,
+ },
+ Position: opencdc.Position(strconv.Itoa(position)),
+ }
+ position++
+ }
+
+ return recs, nil
+ }).MinTimes(batchCount + 1)
+ source.EXPECT().Errors().Return(make(chan error))
+
+ return source
+}
+
+func printerDestination(ctrl *gomock.Controller, logger log.CtxLogger, nodeID string, batchSize int) Destination {
+ var lastPosition opencdc.Position
+ _ = lastPosition
+ rchan := make(chan opencdc.Record, batchSize)
+ destination := funnelmock.NewDestination(ctrl)
+ destination.EXPECT().Open(gomock.Any()).Return(nil)
+ destination.EXPECT().Write(gomock.Any(), gomock.Any()).DoAndReturn(func(ctx context.Context, recs []opencdc.Record) error {
+ for _, r := range recs {
+ connID, _ := r.Metadata.GetConduitSourceConnectorID()
+ logger.Debug(ctx).
+ Str("position", fmt.Sprintf("%s/%s", connID, r.Position)).
+ Str("node_id", nodeID).
+ Msg("got record")
+ lastPosition = r.Position
+ rchan <- r
+ }
+ return nil
+ }).AnyTimes()
+ destination.EXPECT().Ack(gomock.Any()).DoAndReturn(func(ctx context.Context) ([]connector.DestinationAck, error) {
+ acks := make([]connector.DestinationAck, 0, batchSize)
+ for {
+ select {
+ case <-ctx.Done():
+ return nil, ctx.Err()
+ case r, ok := <-rchan:
+ if !ok {
+ return nil, nil
+ }
+ acks = append(acks, connector.DestinationAck{Position: r.Position})
+ default:
+ return acks, nil
+ }
+ }
+ }).AnyTimes()
+ destination.EXPECT().Teardown(gomock.Any()).DoAndReturn(func(ctx context.Context) error {
+ close(rchan)
+ return nil
+ })
+ destination.EXPECT().Errors().Return(make(chan error))
+
+ return destination
+}
+
+func noopDLQDestination(ctrl *gomock.Controller) Destination {
+ destination := funnelmock.NewDestination(ctrl)
+ destination.EXPECT().Open(gomock.Any()).Return(nil)
+ destination.EXPECT().Teardown(gomock.Any()).Return(nil)
+ return destination
+}
+
+func gomockCtrl(logger log.CtxLogger) *gomock.Controller {
+ return gomock.NewController(gomockLogger(logger))
+}
+
+type gomockLogger log.CtxLogger
+
+func (g gomockLogger) Errorf(format string, args ...interface{}) {
+ g.Error().Msgf(format, args...)
+}
+
+func (g gomockLogger) Fatalf(format string, args ...interface{}) {
+ g.Fatal().Msgf(format, args...)
+}
diff --git a/pkg/lifecycle-poc/funnel/mock/destination.go b/pkg/lifecycle-poc/funnel/mock/destination.go
new file mode 100644
index 000000000..11ab25c6a
--- /dev/null
+++ b/pkg/lifecycle-poc/funnel/mock/destination.go
@@ -0,0 +1,272 @@
+// Code generated by MockGen. DO NOT EDIT.
+// Source: github.com/conduitio/conduit/pkg/lifecycle-poc/funnel (interfaces: Destination)
+//
+// Generated by this command:
+//
+// mockgen -typed -destination=mock/destination.go -package=mock -mock_names=Destination=Destination . Destination
+//
+
+// Package mock is a generated GoMock package.
+package mock
+
+import (
+ context "context"
+ reflect "reflect"
+
+ opencdc "github.com/conduitio/conduit-commons/opencdc"
+ connector "github.com/conduitio/conduit/pkg/connector"
+ gomock "go.uber.org/mock/gomock"
+)
+
+// Destination is a mock of Destination interface.
+type Destination struct {
+ ctrl *gomock.Controller
+ recorder *DestinationMockRecorder
+ isgomock struct{}
+}
+
+// DestinationMockRecorder is the mock recorder for Destination.
+type DestinationMockRecorder struct {
+ mock *Destination
+}
+
+// NewDestination creates a new mock instance.
+func NewDestination(ctrl *gomock.Controller) *Destination {
+ mock := &Destination{ctrl: ctrl}
+ mock.recorder = &DestinationMockRecorder{mock}
+ return mock
+}
+
+// EXPECT returns an object that allows the caller to indicate expected use.
+func (m *Destination) EXPECT() *DestinationMockRecorder {
+ return m.recorder
+}
+
+// Ack mocks base method.
+func (m *Destination) Ack(arg0 context.Context) ([]connector.DestinationAck, error) {
+ m.ctrl.T.Helper()
+ ret := m.ctrl.Call(m, "Ack", arg0)
+ ret0, _ := ret[0].([]connector.DestinationAck)
+ ret1, _ := ret[1].(error)
+ return ret0, ret1
+}
+
+// Ack indicates an expected call of Ack.
+func (mr *DestinationMockRecorder) Ack(arg0 any) *DestinationAckCall {
+ mr.mock.ctrl.T.Helper()
+ call := mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Ack", reflect.TypeOf((*Destination)(nil).Ack), arg0)
+ return &DestinationAckCall{Call: call}
+}
+
+// DestinationAckCall wrap *gomock.Call
+type DestinationAckCall struct {
+ *gomock.Call
+}
+
+// Return rewrite *gomock.Call.Return
+func (c *DestinationAckCall) Return(arg0 []connector.DestinationAck, arg1 error) *DestinationAckCall {
+ c.Call = c.Call.Return(arg0, arg1)
+ return c
+}
+
+// Do rewrite *gomock.Call.Do
+func (c *DestinationAckCall) Do(f func(context.Context) ([]connector.DestinationAck, error)) *DestinationAckCall {
+ c.Call = c.Call.Do(f)
+ return c
+}
+
+// DoAndReturn rewrite *gomock.Call.DoAndReturn
+func (c *DestinationAckCall) DoAndReturn(f func(context.Context) ([]connector.DestinationAck, error)) *DestinationAckCall {
+ c.Call = c.Call.DoAndReturn(f)
+ return c
+}
+
+// Errors mocks base method.
+func (m *Destination) Errors() <-chan error {
+ m.ctrl.T.Helper()
+ ret := m.ctrl.Call(m, "Errors")
+ ret0, _ := ret[0].(<-chan error)
+ return ret0
+}
+
+// Errors indicates an expected call of Errors.
+func (mr *DestinationMockRecorder) Errors() *DestinationErrorsCall {
+ mr.mock.ctrl.T.Helper()
+ call := mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Errors", reflect.TypeOf((*Destination)(nil).Errors))
+ return &DestinationErrorsCall{Call: call}
+}
+
+// DestinationErrorsCall wrap *gomock.Call
+type DestinationErrorsCall struct {
+ *gomock.Call
+}
+
+// Return rewrite *gomock.Call.Return
+func (c *DestinationErrorsCall) Return(arg0 <-chan error) *DestinationErrorsCall {
+ c.Call = c.Call.Return(arg0)
+ return c
+}
+
+// Do rewrite *gomock.Call.Do
+func (c *DestinationErrorsCall) Do(f func() <-chan error) *DestinationErrorsCall {
+ c.Call = c.Call.Do(f)
+ return c
+}
+
+// DoAndReturn rewrite *gomock.Call.DoAndReturn
+func (c *DestinationErrorsCall) DoAndReturn(f func() <-chan error) *DestinationErrorsCall {
+ c.Call = c.Call.DoAndReturn(f)
+ return c
+}
+
+// ID mocks base method.
+func (m *Destination) ID() string {
+ m.ctrl.T.Helper()
+ ret := m.ctrl.Call(m, "ID")
+ ret0, _ := ret[0].(string)
+ return ret0
+}
+
+// ID indicates an expected call of ID.
+func (mr *DestinationMockRecorder) ID() *DestinationIDCall {
+ mr.mock.ctrl.T.Helper()
+ call := mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ID", reflect.TypeOf((*Destination)(nil).ID))
+ return &DestinationIDCall{Call: call}
+}
+
+// DestinationIDCall wrap *gomock.Call
+type DestinationIDCall struct {
+ *gomock.Call
+}
+
+// Return rewrite *gomock.Call.Return
+func (c *DestinationIDCall) Return(arg0 string) *DestinationIDCall {
+ c.Call = c.Call.Return(arg0)
+ return c
+}
+
+// Do rewrite *gomock.Call.Do
+func (c *DestinationIDCall) Do(f func() string) *DestinationIDCall {
+ c.Call = c.Call.Do(f)
+ return c
+}
+
+// DoAndReturn rewrite *gomock.Call.DoAndReturn
+func (c *DestinationIDCall) DoAndReturn(f func() string) *DestinationIDCall {
+ c.Call = c.Call.DoAndReturn(f)
+ return c
+}
+
+// Open mocks base method.
+func (m *Destination) Open(arg0 context.Context) error {
+ m.ctrl.T.Helper()
+ ret := m.ctrl.Call(m, "Open", arg0)
+ ret0, _ := ret[0].(error)
+ return ret0
+}
+
+// Open indicates an expected call of Open.
+func (mr *DestinationMockRecorder) Open(arg0 any) *DestinationOpenCall {
+ mr.mock.ctrl.T.Helper()
+ call := mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Open", reflect.TypeOf((*Destination)(nil).Open), arg0)
+ return &DestinationOpenCall{Call: call}
+}
+
+// DestinationOpenCall wrap *gomock.Call
+type DestinationOpenCall struct {
+ *gomock.Call
+}
+
+// Return rewrite *gomock.Call.Return
+func (c *DestinationOpenCall) Return(arg0 error) *DestinationOpenCall {
+ c.Call = c.Call.Return(arg0)
+ return c
+}
+
+// Do rewrite *gomock.Call.Do
+func (c *DestinationOpenCall) Do(f func(context.Context) error) *DestinationOpenCall {
+ c.Call = c.Call.Do(f)
+ return c
+}
+
+// DoAndReturn rewrite *gomock.Call.DoAndReturn
+func (c *DestinationOpenCall) DoAndReturn(f func(context.Context) error) *DestinationOpenCall {
+ c.Call = c.Call.DoAndReturn(f)
+ return c
+}
+
+// Teardown mocks base method.
+func (m *Destination) Teardown(arg0 context.Context) error {
+ m.ctrl.T.Helper()
+ ret := m.ctrl.Call(m, "Teardown", arg0)
+ ret0, _ := ret[0].(error)
+ return ret0
+}
+
+// Teardown indicates an expected call of Teardown.
+func (mr *DestinationMockRecorder) Teardown(arg0 any) *DestinationTeardownCall {
+ mr.mock.ctrl.T.Helper()
+ call := mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Teardown", reflect.TypeOf((*Destination)(nil).Teardown), arg0)
+ return &DestinationTeardownCall{Call: call}
+}
+
+// DestinationTeardownCall wrap *gomock.Call
+type DestinationTeardownCall struct {
+ *gomock.Call
+}
+
+// Return rewrite *gomock.Call.Return
+func (c *DestinationTeardownCall) Return(arg0 error) *DestinationTeardownCall {
+ c.Call = c.Call.Return(arg0)
+ return c
+}
+
+// Do rewrite *gomock.Call.Do
+func (c *DestinationTeardownCall) Do(f func(context.Context) error) *DestinationTeardownCall {
+ c.Call = c.Call.Do(f)
+ return c
+}
+
+// DoAndReturn rewrite *gomock.Call.DoAndReturn
+func (c *DestinationTeardownCall) DoAndReturn(f func(context.Context) error) *DestinationTeardownCall {
+ c.Call = c.Call.DoAndReturn(f)
+ return c
+}
+
+// Write mocks base method.
+func (m *Destination) Write(arg0 context.Context, arg1 []opencdc.Record) error {
+ m.ctrl.T.Helper()
+ ret := m.ctrl.Call(m, "Write", arg0, arg1)
+ ret0, _ := ret[0].(error)
+ return ret0
+}
+
+// Write indicates an expected call of Write.
+func (mr *DestinationMockRecorder) Write(arg0, arg1 any) *DestinationWriteCall {
+ mr.mock.ctrl.T.Helper()
+ call := mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Write", reflect.TypeOf((*Destination)(nil).Write), arg0, arg1)
+ return &DestinationWriteCall{Call: call}
+}
+
+// DestinationWriteCall wrap *gomock.Call
+type DestinationWriteCall struct {
+ *gomock.Call
+}
+
+// Return rewrite *gomock.Call.Return
+func (c *DestinationWriteCall) Return(arg0 error) *DestinationWriteCall {
+ c.Call = c.Call.Return(arg0)
+ return c
+}
+
+// Do rewrite *gomock.Call.Do
+func (c *DestinationWriteCall) Do(f func(context.Context, []opencdc.Record) error) *DestinationWriteCall {
+ c.Call = c.Call.Do(f)
+ return c
+}
+
+// DoAndReturn rewrite *gomock.Call.DoAndReturn
+func (c *DestinationWriteCall) DoAndReturn(f func(context.Context, []opencdc.Record) error) *DestinationWriteCall {
+ c.Call = c.Call.DoAndReturn(f)
+ return c
+}
diff --git a/pkg/lifecycle-poc/funnel/mock/processor.go b/pkg/lifecycle-poc/funnel/mock/processor.go
new file mode 100644
index 000000000..f6c404300
--- /dev/null
+++ b/pkg/lifecycle-poc/funnel/mock/processor.go
@@ -0,0 +1,157 @@
+// Code generated by MockGen. DO NOT EDIT.
+// Source: github.com/conduitio/conduit/pkg/lifecycle-poc/funnel (interfaces: Processor)
+//
+// Generated by this command:
+//
+// mockgen -typed -destination=mock/processor.go -package=mock -mock_names=Processor=Processor . Processor
+//
+
+// Package mock is a generated GoMock package.
+package mock
+
+import (
+ context "context"
+ reflect "reflect"
+
+ opencdc "github.com/conduitio/conduit-commons/opencdc"
+ sdk "github.com/conduitio/conduit-processor-sdk"
+ gomock "go.uber.org/mock/gomock"
+)
+
+// Processor is a mock of Processor interface.
+type Processor struct {
+ ctrl *gomock.Controller
+ recorder *ProcessorMockRecorder
+ isgomock struct{}
+}
+
+// ProcessorMockRecorder is the mock recorder for Processor.
+type ProcessorMockRecorder struct {
+ mock *Processor
+}
+
+// NewProcessor creates a new mock instance.
+func NewProcessor(ctrl *gomock.Controller) *Processor {
+ mock := &Processor{ctrl: ctrl}
+ mock.recorder = &ProcessorMockRecorder{mock}
+ return mock
+}
+
+// EXPECT returns an object that allows the caller to indicate expected use.
+func (m *Processor) EXPECT() *ProcessorMockRecorder {
+ return m.recorder
+}
+
+// Open mocks base method.
+func (m *Processor) Open(ctx context.Context) error {
+ m.ctrl.T.Helper()
+ ret := m.ctrl.Call(m, "Open", ctx)
+ ret0, _ := ret[0].(error)
+ return ret0
+}
+
+// Open indicates an expected call of Open.
+func (mr *ProcessorMockRecorder) Open(ctx any) *ProcessorOpenCall {
+ mr.mock.ctrl.T.Helper()
+ call := mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Open", reflect.TypeOf((*Processor)(nil).Open), ctx)
+ return &ProcessorOpenCall{Call: call}
+}
+
+// ProcessorOpenCall wrap *gomock.Call
+type ProcessorOpenCall struct {
+ *gomock.Call
+}
+
+// Return rewrite *gomock.Call.Return
+func (c *ProcessorOpenCall) Return(arg0 error) *ProcessorOpenCall {
+ c.Call = c.Call.Return(arg0)
+ return c
+}
+
+// Do rewrite *gomock.Call.Do
+func (c *ProcessorOpenCall) Do(f func(context.Context) error) *ProcessorOpenCall {
+ c.Call = c.Call.Do(f)
+ return c
+}
+
+// DoAndReturn rewrite *gomock.Call.DoAndReturn
+func (c *ProcessorOpenCall) DoAndReturn(f func(context.Context) error) *ProcessorOpenCall {
+ c.Call = c.Call.DoAndReturn(f)
+ return c
+}
+
+// Process mocks base method.
+func (m *Processor) Process(arg0 context.Context, arg1 []opencdc.Record) []sdk.ProcessedRecord {
+ m.ctrl.T.Helper()
+ ret := m.ctrl.Call(m, "Process", arg0, arg1)
+ ret0, _ := ret[0].([]sdk.ProcessedRecord)
+ return ret0
+}
+
+// Process indicates an expected call of Process.
+func (mr *ProcessorMockRecorder) Process(arg0, arg1 any) *ProcessorProcessCall {
+ mr.mock.ctrl.T.Helper()
+ call := mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Process", reflect.TypeOf((*Processor)(nil).Process), arg0, arg1)
+ return &ProcessorProcessCall{Call: call}
+}
+
+// ProcessorProcessCall wrap *gomock.Call
+type ProcessorProcessCall struct {
+ *gomock.Call
+}
+
+// Return rewrite *gomock.Call.Return
+func (c *ProcessorProcessCall) Return(arg0 []sdk.ProcessedRecord) *ProcessorProcessCall {
+ c.Call = c.Call.Return(arg0)
+ return c
+}
+
+// Do rewrite *gomock.Call.Do
+func (c *ProcessorProcessCall) Do(f func(context.Context, []opencdc.Record) []sdk.ProcessedRecord) *ProcessorProcessCall {
+ c.Call = c.Call.Do(f)
+ return c
+}
+
+// DoAndReturn rewrite *gomock.Call.DoAndReturn
+func (c *ProcessorProcessCall) DoAndReturn(f func(context.Context, []opencdc.Record) []sdk.ProcessedRecord) *ProcessorProcessCall {
+ c.Call = c.Call.DoAndReturn(f)
+ return c
+}
+
+// Teardown mocks base method.
+func (m *Processor) Teardown(arg0 context.Context) error {
+ m.ctrl.T.Helper()
+ ret := m.ctrl.Call(m, "Teardown", arg0)
+ ret0, _ := ret[0].(error)
+ return ret0
+}
+
+// Teardown indicates an expected call of Teardown.
+func (mr *ProcessorMockRecorder) Teardown(arg0 any) *ProcessorTeardownCall {
+ mr.mock.ctrl.T.Helper()
+ call := mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Teardown", reflect.TypeOf((*Processor)(nil).Teardown), arg0)
+ return &ProcessorTeardownCall{Call: call}
+}
+
+// ProcessorTeardownCall wrap *gomock.Call
+type ProcessorTeardownCall struct {
+ *gomock.Call
+}
+
+// Return rewrite *gomock.Call.Return
+func (c *ProcessorTeardownCall) Return(arg0 error) *ProcessorTeardownCall {
+ c.Call = c.Call.Return(arg0)
+ return c
+}
+
+// Do rewrite *gomock.Call.Do
+func (c *ProcessorTeardownCall) Do(f func(context.Context) error) *ProcessorTeardownCall {
+ c.Call = c.Call.Do(f)
+ return c
+}
+
+// DoAndReturn rewrite *gomock.Call.DoAndReturn
+func (c *ProcessorTeardownCall) DoAndReturn(f func(context.Context) error) *ProcessorTeardownCall {
+ c.Call = c.Call.DoAndReturn(f)
+ return c
+}
diff --git a/pkg/lifecycle-poc/funnel/mock/source.go b/pkg/lifecycle-poc/funnel/mock/source.go
new file mode 100644
index 000000000..30225260f
--- /dev/null
+++ b/pkg/lifecycle-poc/funnel/mock/source.go
@@ -0,0 +1,271 @@
+// Code generated by MockGen. DO NOT EDIT.
+// Source: github.com/conduitio/conduit/pkg/lifecycle-poc/funnel (interfaces: Source)
+//
+// Generated by this command:
+//
+// mockgen -typed -destination=mock/source.go -package=mock -mock_names=Source=Source . Source
+//
+
+// Package mock is a generated GoMock package.
+package mock
+
+import (
+ context "context"
+ reflect "reflect"
+
+ opencdc "github.com/conduitio/conduit-commons/opencdc"
+ gomock "go.uber.org/mock/gomock"
+)
+
+// Source is a mock of Source interface.
+type Source struct {
+ ctrl *gomock.Controller
+ recorder *SourceMockRecorder
+ isgomock struct{}
+}
+
+// SourceMockRecorder is the mock recorder for Source.
+type SourceMockRecorder struct {
+ mock *Source
+}
+
+// NewSource creates a new mock instance.
+func NewSource(ctrl *gomock.Controller) *Source {
+ mock := &Source{ctrl: ctrl}
+ mock.recorder = &SourceMockRecorder{mock}
+ return mock
+}
+
+// EXPECT returns an object that allows the caller to indicate expected use.
+func (m *Source) EXPECT() *SourceMockRecorder {
+ return m.recorder
+}
+
+// Ack mocks base method.
+func (m *Source) Ack(arg0 context.Context, arg1 []opencdc.Position) error {
+ m.ctrl.T.Helper()
+ ret := m.ctrl.Call(m, "Ack", arg0, arg1)
+ ret0, _ := ret[0].(error)
+ return ret0
+}
+
+// Ack indicates an expected call of Ack.
+func (mr *SourceMockRecorder) Ack(arg0, arg1 any) *SourceAckCall {
+ mr.mock.ctrl.T.Helper()
+ call := mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Ack", reflect.TypeOf((*Source)(nil).Ack), arg0, arg1)
+ return &SourceAckCall{Call: call}
+}
+
+// SourceAckCall wrap *gomock.Call
+type SourceAckCall struct {
+ *gomock.Call
+}
+
+// Return rewrite *gomock.Call.Return
+func (c *SourceAckCall) Return(arg0 error) *SourceAckCall {
+ c.Call = c.Call.Return(arg0)
+ return c
+}
+
+// Do rewrite *gomock.Call.Do
+func (c *SourceAckCall) Do(f func(context.Context, []opencdc.Position) error) *SourceAckCall {
+ c.Call = c.Call.Do(f)
+ return c
+}
+
+// DoAndReturn rewrite *gomock.Call.DoAndReturn
+func (c *SourceAckCall) DoAndReturn(f func(context.Context, []opencdc.Position) error) *SourceAckCall {
+ c.Call = c.Call.DoAndReturn(f)
+ return c
+}
+
+// Errors mocks base method.
+func (m *Source) Errors() <-chan error {
+ m.ctrl.T.Helper()
+ ret := m.ctrl.Call(m, "Errors")
+ ret0, _ := ret[0].(<-chan error)
+ return ret0
+}
+
+// Errors indicates an expected call of Errors.
+func (mr *SourceMockRecorder) Errors() *SourceErrorsCall {
+ mr.mock.ctrl.T.Helper()
+ call := mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Errors", reflect.TypeOf((*Source)(nil).Errors))
+ return &SourceErrorsCall{Call: call}
+}
+
+// SourceErrorsCall wrap *gomock.Call
+type SourceErrorsCall struct {
+ *gomock.Call
+}
+
+// Return rewrite *gomock.Call.Return
+func (c *SourceErrorsCall) Return(arg0 <-chan error) *SourceErrorsCall {
+ c.Call = c.Call.Return(arg0)
+ return c
+}
+
+// Do rewrite *gomock.Call.Do
+func (c *SourceErrorsCall) Do(f func() <-chan error) *SourceErrorsCall {
+ c.Call = c.Call.Do(f)
+ return c
+}
+
+// DoAndReturn rewrite *gomock.Call.DoAndReturn
+func (c *SourceErrorsCall) DoAndReturn(f func() <-chan error) *SourceErrorsCall {
+ c.Call = c.Call.DoAndReturn(f)
+ return c
+}
+
+// ID mocks base method.
+func (m *Source) ID() string {
+ m.ctrl.T.Helper()
+ ret := m.ctrl.Call(m, "ID")
+ ret0, _ := ret[0].(string)
+ return ret0
+}
+
+// ID indicates an expected call of ID.
+func (mr *SourceMockRecorder) ID() *SourceIDCall {
+ mr.mock.ctrl.T.Helper()
+ call := mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ID", reflect.TypeOf((*Source)(nil).ID))
+ return &SourceIDCall{Call: call}
+}
+
+// SourceIDCall wrap *gomock.Call
+type SourceIDCall struct {
+ *gomock.Call
+}
+
+// Return rewrite *gomock.Call.Return
+func (c *SourceIDCall) Return(arg0 string) *SourceIDCall {
+ c.Call = c.Call.Return(arg0)
+ return c
+}
+
+// Do rewrite *gomock.Call.Do
+func (c *SourceIDCall) Do(f func() string) *SourceIDCall {
+ c.Call = c.Call.Do(f)
+ return c
+}
+
+// DoAndReturn rewrite *gomock.Call.DoAndReturn
+func (c *SourceIDCall) DoAndReturn(f func() string) *SourceIDCall {
+ c.Call = c.Call.DoAndReturn(f)
+ return c
+}
+
+// Open mocks base method.
+func (m *Source) Open(arg0 context.Context) error {
+ m.ctrl.T.Helper()
+ ret := m.ctrl.Call(m, "Open", arg0)
+ ret0, _ := ret[0].(error)
+ return ret0
+}
+
+// Open indicates an expected call of Open.
+func (mr *SourceMockRecorder) Open(arg0 any) *SourceOpenCall {
+ mr.mock.ctrl.T.Helper()
+ call := mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Open", reflect.TypeOf((*Source)(nil).Open), arg0)
+ return &SourceOpenCall{Call: call}
+}
+
+// SourceOpenCall wrap *gomock.Call
+type SourceOpenCall struct {
+ *gomock.Call
+}
+
+// Return rewrite *gomock.Call.Return
+func (c *SourceOpenCall) Return(arg0 error) *SourceOpenCall {
+ c.Call = c.Call.Return(arg0)
+ return c
+}
+
+// Do rewrite *gomock.Call.Do
+func (c *SourceOpenCall) Do(f func(context.Context) error) *SourceOpenCall {
+ c.Call = c.Call.Do(f)
+ return c
+}
+
+// DoAndReturn rewrite *gomock.Call.DoAndReturn
+func (c *SourceOpenCall) DoAndReturn(f func(context.Context) error) *SourceOpenCall {
+ c.Call = c.Call.DoAndReturn(f)
+ return c
+}
+
+// Read mocks base method.
+func (m *Source) Read(arg0 context.Context) ([]opencdc.Record, error) {
+ m.ctrl.T.Helper()
+ ret := m.ctrl.Call(m, "Read", arg0)
+ ret0, _ := ret[0].([]opencdc.Record)
+ ret1, _ := ret[1].(error)
+ return ret0, ret1
+}
+
+// Read indicates an expected call of Read.
+func (mr *SourceMockRecorder) Read(arg0 any) *SourceReadCall {
+ mr.mock.ctrl.T.Helper()
+ call := mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Read", reflect.TypeOf((*Source)(nil).Read), arg0)
+ return &SourceReadCall{Call: call}
+}
+
+// SourceReadCall wrap *gomock.Call
+type SourceReadCall struct {
+ *gomock.Call
+}
+
+// Return rewrite *gomock.Call.Return
+func (c *SourceReadCall) Return(arg0 []opencdc.Record, arg1 error) *SourceReadCall {
+ c.Call = c.Call.Return(arg0, arg1)
+ return c
+}
+
+// Do rewrite *gomock.Call.Do
+func (c *SourceReadCall) Do(f func(context.Context) ([]opencdc.Record, error)) *SourceReadCall {
+ c.Call = c.Call.Do(f)
+ return c
+}
+
+// DoAndReturn rewrite *gomock.Call.DoAndReturn
+func (c *SourceReadCall) DoAndReturn(f func(context.Context) ([]opencdc.Record, error)) *SourceReadCall {
+ c.Call = c.Call.DoAndReturn(f)
+ return c
+}
+
+// Teardown mocks base method.
+func (m *Source) Teardown(arg0 context.Context) error {
+ m.ctrl.T.Helper()
+ ret := m.ctrl.Call(m, "Teardown", arg0)
+ ret0, _ := ret[0].(error)
+ return ret0
+}
+
+// Teardown indicates an expected call of Teardown.
+func (mr *SourceMockRecorder) Teardown(arg0 any) *SourceTeardownCall {
+ mr.mock.ctrl.T.Helper()
+ call := mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Teardown", reflect.TypeOf((*Source)(nil).Teardown), arg0)
+ return &SourceTeardownCall{Call: call}
+}
+
+// SourceTeardownCall wrap *gomock.Call
+type SourceTeardownCall struct {
+ *gomock.Call
+}
+
+// Return rewrite *gomock.Call.Return
+func (c *SourceTeardownCall) Return(arg0 error) *SourceTeardownCall {
+ c.Call = c.Call.Return(arg0)
+ return c
+}
+
+// Do rewrite *gomock.Call.Do
+func (c *SourceTeardownCall) Do(f func(context.Context) error) *SourceTeardownCall {
+ c.Call = c.Call.Do(f)
+ return c
+}
+
+// DoAndReturn rewrite *gomock.Call.DoAndReturn
+func (c *SourceTeardownCall) DoAndReturn(f func(context.Context) error) *SourceTeardownCall {
+ c.Call = c.Call.DoAndReturn(f)
+ return c
+}
diff --git a/pkg/lifecycle-poc/funnel/processor.go b/pkg/lifecycle-poc/funnel/processor.go
new file mode 100644
index 000000000..d03695cce
--- /dev/null
+++ b/pkg/lifecycle-poc/funnel/processor.go
@@ -0,0 +1,163 @@
+// Copyright © 2024 Meroxa, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//go:generate mockgen -typed -destination=mock/processor.go -package=mock -mock_names=Processor=Processor . Processor
+
+package funnel
+
+import (
+ "context"
+ "time"
+
+ "github.com/conduitio/conduit-commons/opencdc"
+ sdk "github.com/conduitio/conduit-processor-sdk"
+ "github.com/conduitio/conduit/pkg/foundation/cerrors"
+ "github.com/conduitio/conduit/pkg/foundation/log"
+ "github.com/conduitio/conduit/pkg/foundation/metrics"
+)
+
+type ProcessorTask struct {
+ id string
+ processor Processor
+ logger log.CtxLogger
+ timer metrics.Timer
+}
+
+type Processor interface {
+ // Open configures and opens a processor plugin
+ Open(ctx context.Context) error
+ Process(context.Context, []opencdc.Record) []sdk.ProcessedRecord
+ // Teardown tears down a processor plugin.
+ // In case of standalone plugins, that means stopping the WASM module.
+ Teardown(context.Context) error
+}
+
+func NewProcessorTask(
+ id string,
+ processor Processor,
+ logger log.CtxLogger,
+ timer metrics.Timer,
+) *ProcessorTask {
+ logger = logger.WithComponent("task:processor")
+ logger.Logger = logger.With().Str(log.ProcessorIDField, id).Logger()
+ return &ProcessorTask{
+ id: id,
+ processor: processor,
+ logger: logger,
+ timer: timer,
+ }
+}
+
+func (t *ProcessorTask) ID() string {
+ return t.id
+}
+
+func (t *ProcessorTask) Open(ctx context.Context) error {
+ t.logger.Debug(ctx).Msg("opening processor")
+ err := t.processor.Open(ctx)
+ if err != nil {
+ return cerrors.Errorf("failed to open processor: %w", err)
+ }
+ t.logger.Debug(ctx).Msg("processor open")
+ return nil
+}
+
+func (t *ProcessorTask) Close(ctx context.Context) error {
+ t.logger.Debug(ctx).Msg("tearing down processor")
+ return t.processor.Teardown(ctx)
+}
+
+func (t *ProcessorTask) Do(ctx context.Context, b *Batch) error {
+ start := time.Now()
+ recsIn := b.ActiveRecords()
+ recsOut := t.processor.Process(ctx, recsIn)
+
+ if len(recsOut) == 0 {
+ return cerrors.Errorf("processor didn't return any records")
+ }
+ t.observeMetrics(len(recsOut), start)
+
+ // Mark records in the batch as processed, filtered or errored.
+ // We do this a bit smarter, by collecting ranges of records that are
+ // processed, filtered or errored, and then marking them in one go.
+
+ from := 0 // Start of the current range of records with the same type
+ rangeType := 0 // 0 = SingleRecord, 1 = FilterRecord, 2 = ErrorRecord
+
+ for i, rec := range recsOut {
+ var currentType int
+ switch rec.(type) {
+ case sdk.SingleRecord:
+ currentType = 0
+ case sdk.FilterRecord:
+ currentType = 1
+ case sdk.ErrorRecord:
+ currentType = 2
+ default:
+ err := cerrors.Errorf("processor returned unknown record type: %T", rec)
+ return cerrors.FatalError(err)
+ }
+
+ if currentType == rangeType {
+ continue
+ }
+
+ t.markBatchRecords(b, from, recsOut[from:i])
+ from, rangeType = i, currentType
+ }
+
+ // Mark the last range of records.
+ t.markBatchRecords(b, from, recsOut[from:])
+
+ if len(recsIn) > len(recsOut) {
+ // Processor skipped some records, mark them to be retried.
+ b.Retry(len(recsOut), len(recsIn))
+ }
+
+ return nil
+}
+
+// markBatchRecords marks a range of records in a batch as processed, filtered or
+// errored, based on the type of records returned by the processor. The worker
+// can then use this information to continue processing the batch.
+func (t *ProcessorTask) markBatchRecords(b *Batch, from int, records []sdk.ProcessedRecord) {
+ if len(records) == 0 {
+ return // This can happen if the first record is not a SingleRecord.
+ }
+ switch records[0].(type) {
+ case sdk.SingleRecord:
+ recs := make([]opencdc.Record, len(records))
+ for i, rec := range records {
+ recs[i] = opencdc.Record(rec.(sdk.SingleRecord))
+ }
+ b.SetRecords(from, recs)
+ case sdk.FilterRecord:
+ b.Filter(from, len(records))
+ case sdk.ErrorRecord:
+ errs := make([]error, len(records))
+ for i, rec := range records {
+ errs[i] = rec.(sdk.ErrorRecord).Error
+ }
+ b.Nack(from, errs...)
+ }
+}
+
+func (t *ProcessorTask) observeMetrics(n int, start time.Time) {
+ tookPerRecord := time.Since(start) / time.Duration(n)
+ go func() {
+ for range n {
+ t.timer.Update(tookPerRecord)
+ }
+ }()
+}
diff --git a/pkg/lifecycle-poc/funnel/recordflag_string.go b/pkg/lifecycle-poc/funnel/recordflag_string.go
new file mode 100644
index 000000000..f183e9d24
--- /dev/null
+++ b/pkg/lifecycle-poc/funnel/recordflag_string.go
@@ -0,0 +1,26 @@
+// Code generated by "stringer -type=RecordFlag -linecomment"; DO NOT EDIT.
+
+package funnel
+
+import "strconv"
+
+func _() {
+ // An "invalid array index" compiler error signifies that the constant values have changed.
+ // Re-run the stringer command to generate them again.
+ var x [1]struct{}
+ _ = x[RecordFlagAck-0]
+ _ = x[RecordFlagNack-1]
+ _ = x[RecordFlagRetry-2]
+ _ = x[RecordFlagFilter-3]
+}
+
+const _RecordFlag_name = "acknackretryfilter"
+
+var _RecordFlag_index = [...]uint8{0, 3, 7, 12, 18}
+
+func (i RecordFlag) String() string {
+ if i < 0 || i >= RecordFlag(len(_RecordFlag_index)-1) {
+ return "RecordFlag(" + strconv.FormatInt(int64(i), 10) + ")"
+ }
+ return _RecordFlag_name[_RecordFlag_index[i]:_RecordFlag_index[i+1]]
+}
diff --git a/pkg/lifecycle-poc/funnel/source.go b/pkg/lifecycle-poc/funnel/source.go
new file mode 100644
index 000000000..13ff88a5b
--- /dev/null
+++ b/pkg/lifecycle-poc/funnel/source.go
@@ -0,0 +1,121 @@
+// Copyright © 2024 Meroxa, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//go:generate mockgen -typed -destination=mock/source.go -package=mock -mock_names=Source=Source . Source
+
+package funnel
+
+import (
+ "context"
+ "time"
+
+ "github.com/conduitio/conduit-commons/opencdc"
+ "github.com/conduitio/conduit/pkg/foundation/cerrors"
+ "github.com/conduitio/conduit/pkg/foundation/log"
+ "github.com/conduitio/conduit/pkg/foundation/metrics"
+)
+
+type SourceTask struct {
+ id string
+ source Source
+ logger log.CtxLogger
+
+ timer metrics.Timer
+ histogram metrics.RecordBytesHistogram
+}
+
+type Source interface {
+ ID() string
+ Open(context.Context) error
+ Read(context.Context) ([]opencdc.Record, error)
+ Ack(context.Context, []opencdc.Position) error
+ Teardown(context.Context) error
+ // TODO figure out if we want to handle these errors. This returns errors
+ // coming from the persister, which persists the connector asynchronously.
+ // Are we even interested in these errors in the pipeline? Sounds like
+ // something we could surface and handle globally in the runtime instead.
+ Errors() <-chan error
+}
+
+func NewSourceTask(
+ id string,
+ source Source,
+ logger log.CtxLogger,
+ timer metrics.Timer,
+ histogram metrics.Histogram,
+) *SourceTask {
+ logger = logger.WithComponent("task:source")
+ logger.Logger = logger.With().Str(log.ConnectorIDField, id).Logger()
+ return &SourceTask{
+ id: id,
+ source: source,
+ logger: logger,
+ timer: timer,
+ histogram: metrics.NewRecordBytesHistogram(histogram),
+ }
+}
+
+func (t *SourceTask) ID() string {
+ return t.id
+}
+
+func (t *SourceTask) Open(ctx context.Context) error {
+ t.logger.Debug(ctx).Msg("opening source")
+ err := t.source.Open(ctx)
+ if err != nil {
+ return cerrors.Errorf("failed to open source connector: %w", err)
+ }
+ t.logger.Debug(ctx).Msg("source open")
+ return nil
+}
+
+func (t *SourceTask) Close(context.Context) error {
+ // source is torn down in the worker on stop
+ return nil
+}
+
+func (t *SourceTask) Do(ctx context.Context, b *Batch) error {
+ start := time.Now()
+
+ recs, err := t.source.Read(ctx)
+ if err != nil {
+ return cerrors.Errorf("failed to read from source: %w", err)
+ }
+
+ t.observeMetrics(recs, start)
+
+ // Overwrite the batch with the new records.
+ *b = *NewBatch(recs)
+ return nil
+}
+
+func (t *SourceTask) observeMetrics(records []opencdc.Record, start time.Time) {
+ // Precalculate sizes so that we don't need to hold a reference to records
+ // and observations can happen in a goroutine.
+ sizes := make([]float64, len(records))
+ for i, rec := range records {
+ sizes[i] = t.histogram.SizeOf(rec)
+ }
+ tookPerRecord := time.Since(start) / time.Duration(len(sizes))
+ go func() {
+ for i := range len(sizes) {
+ t.timer.Update(tookPerRecord)
+ t.histogram.H.Observe(sizes[i])
+ }
+ }()
+}
+
+func (t *SourceTask) GetSource() Source {
+ return t.source
+}
diff --git a/pkg/lifecycle-poc/funnel/worker.go b/pkg/lifecycle-poc/funnel/worker.go
new file mode 100644
index 000000000..08fb5bd24
--- /dev/null
+++ b/pkg/lifecycle-poc/funnel/worker.go
@@ -0,0 +1,576 @@
+// Copyright © 2024 Meroxa, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package funnel
+
+import (
+ "context"
+ "sync/atomic"
+ "time"
+
+ "github.com/conduitio/conduit-commons/opencdc"
+ "github.com/conduitio/conduit-commons/rollback"
+ "github.com/conduitio/conduit/pkg/foundation/cerrors"
+ "github.com/conduitio/conduit/pkg/foundation/log"
+ "github.com/conduitio/conduit/pkg/foundation/metrics"
+ "github.com/conduitio/conduit/pkg/plugin"
+ "github.com/sourcegraph/conc/pool"
+)
+
+// Task is a unit of work that can be executed by a Worker. Each Task in a
+// pipeline is executed sequentially, except for tasks related to different
+// destinations, which can be executed in parallel.
+type Task interface {
+ // ID returns the identifier of this Task. Each Task in a pipeline must be
+ // uniquely identified by the ID.
+ ID() string
+
+ // Open opens the Task for processing. It is called once before the worker
+ // starts processing records.
+ Open(context.Context) error
+ // Close closes the Task. It is called once after the worker has stopped
+ // processing records.
+ Close(context.Context) error
+ // Do processes the given batch of records. It is called for each batch of
+ // records that the worker processes.
+ Do(context.Context, *Batch) error
+}
+
+// Worker collects the tasks that need to be executed in a pipeline for a
+// specific source. It processes records from the source through the tasks until
+// it is stopped. The worker is responsible for coordinating tasks and
+// acking/nacking records.
+//
+// Batches are processed in the following way:
+// - The first task is always a source task which reads a batch of records
+// from the source. The batch is then passed to the next task.
+// - Any task between the source and the destination can process the batch by
+// updating the records or their status (see [RecordStatus]). If a record in
+// the batch is marked as filtered, the next task will skip processing it
+// and consider it as already processed. If a record is marked as nacked,
+// the record will be sent to the DLQ. If a record is marked as retry, the
+// record will be reprocessed by the same task (relevant if a task processed
+// only part of the batch, experienced an error and skipped the rest).
+// - The last task is always a destination task which writes the batch of
+// records to the destination. The batch is then acked.
+//
+// Note that if a task marks a record in the middle of a batch as nacked, the
+// batch is split into sub-batches. The records that were successfully processed
+// continue to the next task (and ideally to the end of the pipeline), because
+// Conduit provides ordering guarantees. Only once the records before the nacked
+// record are end-to-end processed, will the nacked record be sent to the DLQ.
+// The rest of the records are processed as a sub-batch, and the same rules
+// apply to them.
+type Worker struct {
+ Source Source
+ Tasks []Task
+ // Order defines the next task to be executed. Multiple indices are used to
+ // show parallel execution of tasks.
+ //
+ // Example:
+ // [[1], [2], [3,5], [4], [], []]
+ //
+ // /-> 3 -> 4
+ // 0 -> 1 -> 2
+ // \-> 5
+ Order Order
+ DLQ *DLQ
+
+ lastReadAt time.Time
+ timer metrics.Timer
+
+ // processingLock is a lock in form of a channel with a buffer size of 1 to
+ // be able to acquire the lock with a context timeout.
+ processingLock chan struct{}
+ // stop stores the information if a graceful stop was triggered.
+ stop atomic.Bool
+
+ logger log.CtxLogger
+}
+
+func NewWorker(
+ tasks []Task,
+ order Order,
+ dlq *DLQ,
+ logger log.CtxLogger,
+ timer metrics.Timer,
+) (*Worker, error) {
+ err := validateTaskOrder(tasks, order)
+ if err != nil {
+ return nil, cerrors.Errorf("invalid task order: %w", err)
+ }
+
+ st, ok := tasks[0].(interface{ GetSource() Source })
+ if !ok {
+ return nil, cerrors.Errorf("first task must be a source task, got %T", tasks[0])
+ }
+
+ return &Worker{
+ Source: st.GetSource(),
+ Tasks: tasks,
+ Order: order,
+ DLQ: dlq,
+ logger: logger.WithComponent("funnel.Worker"),
+ timer: timer,
+
+ processingLock: make(chan struct{}, 1),
+ }, nil
+}
+
+func validateTaskOrder(tasks []Task, order Order) error {
+ // Traverse the tasks according to the order and validate that each task
+ // is included exactly once.
+ if len(order) != len(tasks) {
+ return cerrors.Errorf("order length (%d) does not match tasks length (%d)", len(order), len(tasks))
+ }
+ seenCount := make([]int, len(tasks))
+ var traverse func(i int) error
+ traverse = func(i int) error {
+ if i < 0 || i >= len(tasks) {
+ return cerrors.Errorf("invalid index (%d), expected a number between 0 and %d", i, len(tasks)-1)
+ }
+ seenCount[i]++
+ if seenCount[i] > 1 {
+ return cerrors.Errorf("task %d included multiple times in order", i)
+ }
+ for _, nextIdx := range order[i] {
+ if nextIdx == i {
+ return cerrors.Errorf("task %d cannot call itself as next task", i)
+ }
+ err := traverse(nextIdx)
+ if err != nil {
+ return err
+ }
+ }
+ return nil
+ }
+ err := traverse(0)
+ if err != nil {
+ return err
+ }
+ for i, count := range seenCount {
+ if count == 0 {
+ return cerrors.Errorf("task %d not included in order", i)
+ }
+ }
+ return nil
+}
+
+// Open opens the worker for processing. It opens all tasks and the DLQ. If any
+// task fails to open, the worker is not opened and the error is returned.
+// Once a worker is opened, it can start processing records. The worker should
+// be closed using Close after it is no longer needed.
+func (w *Worker) Open(ctx context.Context) (err error) {
+ var r rollback.R
+ defer func() {
+ rollbackErr := r.Execute()
+ err = cerrors.LogOrReplace(err, rollbackErr, func() {
+ w.logger.Err(ctx, rollbackErr).Msg("failed to execute rollback")
+ })
+ }()
+
+ for _, task := range w.Tasks {
+ err = task.Open(ctx)
+ if err != nil {
+ return cerrors.Errorf("task %s failed to open: %w", task.ID(), err)
+ }
+
+ r.Append(func() error {
+ return task.Close(ctx)
+ })
+ }
+
+ err = w.DLQ.Open(ctx)
+ if err != nil {
+ return cerrors.Errorf("failed to open DLQ: %w", err)
+ }
+
+ r.Skip()
+ return nil
+}
+
+// Stop stops the worker from processing more records. It does not stop the
+// current batch from being processed. If a batch is currently being processed,
+// the method will block and trigger the stop after the batch is processed.
+func (w *Worker) Stop(ctx context.Context) error {
+ // The lock is locked every time a batch is being processed. We lock it
+ // to be sure no batch is currently being processed.
+ release, err := w.acquireProcessingLock(ctx)
+ if err != nil {
+ return err
+ }
+ defer release()
+
+ // Lock acquired, teardown the source and set stop to true to signal the
+ // worker it should stop processing, since it won't be able to deliver
+ // any acks.
+ err = w.Source.Teardown(ctx)
+ if err != nil {
+ return cerrors.Errorf("failed to tear down source: %w", err)
+ }
+ w.stop.Store(true)
+ return nil
+}
+
+// acquireProcessingLock tries to acquire the processing lock. It returns a
+// release function that should be called to release the lock. If the context is
+// canceled before the lock is acquired, it returns the context error.
+func (w *Worker) acquireProcessingLock(ctx context.Context) (release func(), err error) {
+ select {
+ case w.processingLock <- struct{}{}:
+ return func() { <-w.processingLock }, nil
+ case <-ctx.Done():
+ // lock not acquired
+ return func() {}, ctx.Err()
+ }
+}
+
+func (w *Worker) Close(ctx context.Context) error {
+ var errs []error
+
+ for _, task := range w.Tasks {
+ err := task.Close(ctx)
+ if err != nil {
+ errs = append(errs, cerrors.Errorf("task %s failed to close: %w", task.ID(), err))
+ }
+ }
+
+ err := w.DLQ.Close(ctx)
+ if err != nil {
+ errs = append(errs, cerrors.Errorf("failed to close DLQ: %w", err))
+ }
+
+ return cerrors.Join(errs...)
+}
+
+// Do processes records from the source until the worker is stopped. It returns
+// no error if the worker is stopped gracefully.
+func (w *Worker) Do(ctx context.Context) error {
+ for !w.stop.Load() {
+ w.logger.Trace(ctx).Msg("starting next batch")
+ if err := w.doTask(ctx, 0, &Batch{}, w); err != nil {
+ return err
+ }
+ w.logger.Trace(ctx).Msg("batch done")
+ }
+ return nil
+}
+
+//nolint:gocyclo // TODO: refactor
+func (w *Worker) doTask(ctx context.Context, currentIndex int, b *Batch, acker ackNacker) error {
+ t := w.Tasks[currentIndex]
+
+ w.logger.Trace(ctx).
+ Str("task_id", t.ID()).
+ Int("batch_size", len(b.records)).
+ Msg("executing task")
+
+ err := t.Do(ctx, b)
+
+ w.logger.Trace(ctx).
+ Err(err).
+ Str("task_id", t.ID()).
+ Int("batch_size", len(b.records)).
+ Msg("task done")
+
+ if err != nil {
+ // Canceled error can be returned if the worker is stopped while reading
+ // the next batch from the source (graceful stop).
+ // ErrPluginNotRunning can be returned if the plugin is stopped before
+ // trying to read the next batch.
+ // Both are considered as graceful stop, just return the context error, if any.
+ if currentIndex == 0 && (cerrors.Is(err, context.Canceled) ||
+ (cerrors.Is(err, plugin.ErrPluginNotRunning) && w.stop.Load())) {
+ return ctx.Err()
+ }
+ return cerrors.Errorf("task %s: %w", t.ID(), err)
+ }
+
+ if currentIndex == 0 {
+ // The first task has some specifics:
+ // - Store last time we read a batch from the source for metrics.
+ // - It locks the stop lock, so that no stop signal can be received while
+ // the batch is being processed.
+ // - It checks if the source was torn down after receiving the batch and
+ // before acquiring the lock.
+ w.lastReadAt = time.Now()
+
+ release, err := w.acquireProcessingLock(ctx)
+ if err != nil {
+ return err
+ }
+ // Unlock after the batch is end-to-end processed.
+ defer release()
+
+ if w.stop.Load() {
+ // The source was already torn down, we won't be able to deliver
+ // any acks so throw away the batch and gracefully return.
+ w.logger.Warn(ctx).
+ Str("task_id", t.ID()).
+ Int("batch_size", len(b.records)).
+ Msg("stop signal received just before starting to process next batch, gracefully stopping without flushing the batch")
+ return nil
+ }
+ }
+
+ if !b.tainted {
+ w.logger.Trace(ctx).
+ Str("task_id", t.ID()).
+ Msg("task returned clean batch")
+
+ // Shortcut.
+ if !w.hasNextTask(currentIndex) {
+ // This is the last task, the batch has made it end-to-end, let's ack!
+ return acker.Ack(ctx, b)
+ }
+ // There is at least one task after this one, let's continue.
+ return w.nextTask(ctx, currentIndex, b, acker)
+ }
+
+ w.logger.Trace(ctx).
+ Str("task_id", t.ID()).
+ Msg("task returned tainted batch, splitting into sub-batches")
+
+ // Batch is tainted, we need to go through all statuses and group them by
+ // status before further processing.
+ idx := 0
+ for {
+ subBatch := w.subBatchByFlag(b, idx)
+ if subBatch == nil {
+ w.logger.Trace(ctx).Msg("processed last batch")
+ break
+ }
+
+ w.logger.Trace(ctx).
+ Str("task_id", t.ID()).
+ Int("batch_size", len(b.records)).
+ Str("record_flag", b.recordStatuses[0].Flag.String()).
+ Msg("collected sub-batch")
+
+ switch subBatch.recordStatuses[0].Flag {
+ case RecordFlagAck, RecordFlagFilter:
+ if !w.hasNextTask(currentIndex) {
+ // This is the last task, the batch has made it end-to-end, let's ack!
+ // We need to ack all the records in the batch, not only active
+ // ones, filtered ones should also be acked.
+ err := acker.Ack(ctx, subBatch)
+ if err != nil {
+ return err
+ }
+ break // break switch
+ }
+ // There is at least one task after this one, let's continue.
+ err := w.nextTask(ctx, currentIndex, subBatch, acker)
+ if err != nil {
+ return err
+ }
+ case RecordFlagNack:
+ err := acker.Nack(ctx, subBatch, t.ID())
+ if err != nil {
+ return err
+ }
+ case RecordFlagRetry:
+ err := w.doTask(ctx, currentIndex, subBatch, acker)
+ if err != nil {
+ return err
+ }
+ }
+
+ idx += len(subBatch.positions)
+ }
+
+ return nil
+}
+
+// subBatchByFlag collects a sub-batch of records with the same status starting
+// from the given index. It returns nil if firstIndex is out of bounds.
+func (w *Worker) subBatchByFlag(b *Batch, firstIndex int) *Batch {
+ if firstIndex >= len(b.recordStatuses) {
+ return nil
+ }
+
+ flags := make([]RecordFlag, 0, 2)
+ flags = append(flags, b.recordStatuses[firstIndex].Flag)
+ // Collect Filters and Acks together in the same batch.
+ if flags[0] == RecordFlagFilter {
+ flags = append(flags, RecordFlagAck)
+ } else if flags[0] == RecordFlagAck {
+ flags = append(flags, RecordFlagFilter)
+ }
+
+ lastIndex := firstIndex
+OUTER:
+ for _, status := range b.recordStatuses[firstIndex:] {
+ for _, f := range flags {
+ if status.Flag == f {
+ lastIndex++
+ // Record has matching status, let's continue.
+ continue OUTER
+ }
+ }
+ // Record has a different status, we're done.
+ break
+ }
+
+ return b.sub(firstIndex, lastIndex)
+}
+
+func (w *Worker) hasNextTask(currentIndex int) bool {
+ return len(w.Order[currentIndex]) > 0
+}
+
+func (w *Worker) nextTask(ctx context.Context, currentIndex int, b *Batch, acker ackNacker) error {
+ nextIndices := w.Order[currentIndex]
+ switch len(nextIndices) {
+ case 0:
+ // no next task, we're done
+ return nil
+ case 1:
+ // single next task, let's pass the batch to it
+ return w.doTask(ctx, nextIndices[0], b, acker)
+ default:
+ // TODO(multi-connector): remove error
+ return cerrors.Errorf("multiple next tasks not supported yet")
+
+ // multiple next tasks, let's clone the batch and pass it to them
+ // concurrently
+ //nolint:govet // TODO implement multi ack nacker
+ multiAcker := newMultiAckNacker(acker, len(nextIndices))
+ p := pool.New().WithErrors() // TODO WithContext?
+ for _, i := range nextIndices {
+ b := b.clone()
+ p.Go(func() error {
+ return w.doTask(ctx, i, b, multiAcker)
+ })
+ }
+ err := p.Wait()
+ if err != nil {
+ return err // no need to wrap, it already contains the task ID
+ }
+
+ // TODO merge batch statuses?
+ return nil
+ }
+}
+
+func (w *Worker) Ack(ctx context.Context, batch *Batch) error {
+ err := w.Source.Ack(ctx, batch.positions)
+ if err != nil {
+ return cerrors.Errorf("failed to ack %d records in source: %w", len(batch.records), err)
+ }
+
+ w.DLQ.Ack(ctx, batch)
+ w.updateTimer(batch.records)
+ return nil
+}
+
+func (w *Worker) Nack(ctx context.Context, batch *Batch, taskID string) error {
+ n, err := w.DLQ.Nack(ctx, batch, taskID)
+ if n > 0 {
+ // Successfully nacked n records, let's ack them, as they reached
+ // the end of the pipeline (in this case the DLQ).
+ err := w.Source.Ack(ctx, batch.positions[:n])
+ if err != nil {
+ return cerrors.Errorf("task %s failed to ack %d records in source: %w", n, err)
+ }
+
+ w.updateTimer(batch.records[:n])
+ }
+
+ if err != nil {
+ return cerrors.Errorf("failed to nack %d records: %w", len(batch.records)-n, err)
+ }
+ return nil
+}
+
+func (w *Worker) updateTimer(records []opencdc.Record) {
+ for _, rec := range records {
+ readAt, err := rec.Metadata.GetReadAt()
+ if err != nil {
+ // If the record metadata has changed and does not include ReadAt
+ // fallback to the time the worker received the record.
+ readAt = w.lastReadAt
+ }
+ w.timer.UpdateSince(readAt)
+ }
+}
+
+// Order represents the order of tasks in a pipeline. Each index in the slice
+// represents a task, and the value at that index is a slice of indices of the
+// next tasks to be executed. If the slice is empty, the task is the last one in
+// the pipeline.
+type Order [][]int
+
+// AppendSingle appends a single element to the current order.
+func (o Order) AppendSingle(next []int) Order {
+ if len(o) == 0 {
+ return Order{next}
+ }
+ o[len(o)-1] = append(o[len(o)-1], len(o))
+ return append(o, next)
+}
+
+// AppendOrder appends the next order to the current order. The next order indices
+// are adjusted to match the new order length.
+func (o Order) AppendOrder(next Order) Order {
+ if len(o) == 0 {
+ return next
+ } else if len(next) == 0 {
+ return o
+ }
+
+ next.Increase(len(o))
+ o[len(o)-1] = append(o[len(o)-1], len(o))
+ return append(o, next...)
+}
+
+// Increase increases all indices in the order by the given increment.
+func (o Order) Increase(incr int) Order {
+ for _, v := range o {
+ for i := range v {
+ v[i] += incr
+ }
+ }
+ return o
+}
+
+type ackNacker interface {
+ Ack(context.Context, *Batch) error
+ Nack(context.Context, *Batch, string) error
+}
+
+// multiAckNacker is an ackNacker that expects multiple acks/nacks for the same
+// batch. It keeps track of the number of acks/nacks and only acks/nacks the
+// batch when all expected acks/nacks are received.
+type multiAckNacker struct {
+ parent ackNacker
+ count *atomic.Int32
+}
+
+func newMultiAckNacker(parent ackNacker, count int) *multiAckNacker {
+ c := atomic.Int32{}
+ c.Add(int32(count)) //nolint:gosec // no risk of overflow
+ return &multiAckNacker{
+ parent: parent,
+ count: &c,
+ }
+}
+
+func (m *multiAckNacker) Ack(ctx context.Context, batch *Batch) error {
+ panic("not implemented")
+}
+
+func (m *multiAckNacker) Nack(ctx context.Context, batch *Batch, taskID string) error {
+ panic("not implemented")
+}
diff --git a/pkg/lifecycle-poc/service.go b/pkg/lifecycle-poc/service.go
new file mode 100644
index 000000000..f386625fd
--- /dev/null
+++ b/pkg/lifecycle-poc/service.go
@@ -0,0 +1,668 @@
+// Copyright © 2024 Meroxa, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package lifecycle contains the logic to manage the lifecycle of pipelines.
+// It is responsible for starting, stopping and managing pipelines.
+package lifecycle
+
+import (
+ "context"
+ "fmt"
+ "strings"
+ "sync"
+ "sync/atomic"
+ "time"
+
+ "github.com/conduitio/conduit-commons/csync"
+ "github.com/conduitio/conduit/pkg/connector"
+ "github.com/conduitio/conduit/pkg/foundation/cerrors"
+ "github.com/conduitio/conduit/pkg/foundation/log"
+ "github.com/conduitio/conduit/pkg/foundation/metrics/measure"
+ "github.com/conduitio/conduit/pkg/lifecycle-poc/funnel"
+ "github.com/conduitio/conduit/pkg/pipeline"
+ connectorPlugin "github.com/conduitio/conduit/pkg/plugin/connector"
+ "github.com/conduitio/conduit/pkg/processor"
+ "gopkg.in/tomb.v2"
+)
+
+type FailureEvent struct {
+ // ID is the ID of the pipeline which failed.
+ ID string
+ Error error
+}
+
+type FailureHandler func(FailureEvent)
+
+// Service manages pipelines.
+type Service struct {
+ logger log.CtxLogger
+
+ pipelines PipelineService
+ connectors ConnectorService
+
+ processors ProcessorService
+ connectorPlugins ConnectorPluginService
+
+ handlers []FailureHandler
+ runningPipelines *csync.Map[string, *runnablePipeline]
+
+ isGracefulShutdown atomic.Bool
+}
+
+// NewService initializes and returns a lifecycle.Service.
+func NewService(
+ logger log.CtxLogger,
+ connectors ConnectorService,
+ processors ProcessorService,
+ connectorPlugins ConnectorPluginService,
+ pipelines PipelineService,
+) *Service {
+ return &Service{
+ logger: logger.WithComponent("lifecycle.Service"),
+ connectors: connectors,
+ processors: processors,
+ connectorPlugins: connectorPlugins,
+ pipelines: pipelines,
+ runningPipelines: csync.NewMap[string, *runnablePipeline](),
+ }
+}
+
+type runnablePipeline struct {
+ pipeline *pipeline.Instance
+ w *funnel.Worker
+ t *tomb.Tomb
+}
+
+// ConnectorService can fetch and create a connector instance.
+type ConnectorService interface {
+ Get(ctx context.Context, id string) (*connector.Instance, error)
+ Create(ctx context.Context, id string, t connector.Type, plugin string, pipelineID string, cfg connector.Config, p connector.ProvisionType) (*connector.Instance, error)
+}
+
+// ProcessorService can fetch a processor instance and make a runnable processor from it.
+type ProcessorService interface {
+ Get(ctx context.Context, id string) (*processor.Instance, error)
+ MakeRunnableProcessor(ctx context.Context, i *processor.Instance) (*processor.RunnableProcessor, error)
+}
+
+// ConnectorPluginService can create a connector plugin dispenser.
+type ConnectorPluginService interface {
+ NewDispenser(logger log.CtxLogger, name string, connectorID string) (connectorPlugin.Dispenser, error)
+}
+
+// PipelineService can fetch, list and update the status of a pipeline instance.
+type PipelineService interface {
+ Get(ctx context.Context, pipelineID string) (*pipeline.Instance, error)
+ List(ctx context.Context) map[string]*pipeline.Instance
+ UpdateStatus(ctx context.Context, pipelineID string, status pipeline.Status, errMsg string) error
+}
+
+// OnFailure registers a handler for a lifecycle.FailureEvent.
+// Only errors which happen after a pipeline has been started
+// are being sent.
+func (s *Service) OnFailure(handler FailureHandler) {
+ s.handlers = append(s.handlers, handler)
+}
+
+// Init starts all pipelines that have the StatusSystemStopped.
+func (s *Service) Init(
+ ctx context.Context,
+) error {
+ var errs []error
+ s.logger.Debug(ctx).Msg("initializing pipelines statuses")
+
+ instances := s.pipelines.List(ctx)
+ for _, instance := range instances {
+ if instance.GetStatus() == pipeline.StatusSystemStopped {
+ err := s.Start(ctx, instance.ID)
+ if err != nil {
+ // try to start remaining pipelines and gather errors
+ errs = append(errs, err)
+ }
+ }
+ }
+
+ return cerrors.Join(errs...)
+}
+
+// Start builds and starts a pipeline with the given ID.
+// If the pipeline is already running, Start returns ErrPipelineRunning.
+func (s *Service) Start(
+ ctx context.Context,
+ pipelineID string,
+) error {
+ pl, err := s.pipelines.Get(ctx, pipelineID)
+ if err != nil {
+ return err
+ }
+
+ if pl.GetStatus() == pipeline.StatusRunning {
+ return cerrors.Errorf("can't start pipeline %s: %w", pl.ID, pipeline.ErrPipelineRunning)
+ }
+
+ s.logger.Debug(ctx).Str(log.PipelineIDField, pl.ID).Msg("starting pipeline")
+ s.logger.Trace(ctx).Str(log.PipelineIDField, pl.ID).Msg("building tasks")
+
+ rp, err := s.buildRunnablePipeline(ctx, pl)
+ if err != nil {
+ return cerrors.Errorf("could not build tasks for pipeline %s: %w", pl.ID, err)
+ }
+
+ s.logger.Trace(ctx).Str(log.PipelineIDField, pl.ID).Msg("running pipeline")
+
+ if err := s.runPipeline(rp); err != nil {
+ return cerrors.Errorf("failed to run pipeline %s: %w", pl.ID, err)
+ }
+ s.logger.Info(ctx).Str(log.PipelineIDField, pl.ID).Msg("pipeline started")
+
+ s.runningPipelines.Set(pl.ID, rp)
+
+ return nil
+}
+
+// Stop will attempt to gracefully stop a given pipeline by calling each worker's
+// Stop method. If the force flag is set to true, the pipeline will be stopped
+// forcefully by cancelling the context.
+func (s *Service) Stop(ctx context.Context, pipelineID string, force bool) error {
+ rp, ok := s.runningPipelines.Get(pipelineID)
+
+ if !ok {
+ return cerrors.Errorf("pipeline %s is not running: %w", pipelineID, pipeline.ErrPipelineNotRunning)
+ }
+
+ if rp.pipeline.GetStatus() != pipeline.StatusRunning && rp.pipeline.GetStatus() != pipeline.StatusRecovering {
+ return cerrors.Errorf("can't stop pipeline with status %q: %w", rp.pipeline.GetStatus(), pipeline.ErrPipelineNotRunning)
+ }
+
+ return s.stopRunnablePipeline(ctx, rp, force)
+}
+
+// StopAll will ask all the running pipelines to stop gracefully
+// (i.e. that existing messages get processed but not new messages get produced).
+func (s *Service) StopAll(ctx context.Context, force bool) error {
+ // Set graceful shutdown flag to true, so pipelines know the system triggered the stop.
+ s.isGracefulShutdown.Store(true)
+
+ l := s.runningPipelines.Len()
+ if l == 0 {
+ return nil
+ }
+
+ switch force {
+ case false:
+ s.logger.Info(ctx).Msgf("stopping %d pipelines gracefully", l)
+ case true:
+ s.logger.Info(ctx).Msgf("stopping %d pipelines forcefully", l)
+ }
+
+ var errs []error
+ for _, rp := range s.runningPipelines.All() {
+ if rp.pipeline.GetStatus() != pipeline.StatusRunning && rp.pipeline.GetStatus() != pipeline.StatusRecovering {
+ continue
+ }
+ errs = append(errs, s.stopRunnablePipeline(ctx, rp, force))
+ }
+ return cerrors.Join(errs...)
+}
+
+func (s *Service) stopRunnablePipeline(ctx context.Context, rp *runnablePipeline, force bool) error {
+ switch force {
+ case false:
+ s.logger.Info(ctx).
+ Str(log.PipelineIDField, rp.pipeline.ID).
+ Any(log.PipelineStatusField, rp.pipeline.GetStatus()).
+ Msg("gracefully stopping pipeline")
+ return rp.w.Stop(ctx)
+ case true:
+ s.logger.Info(ctx).
+ Str(log.PipelineIDField, rp.pipeline.ID).
+ Any(log.PipelineStatusField, rp.pipeline.GetStatus()).
+ Msg("force stopping pipeline")
+ rp.t.Kill(pipeline.ErrForceStop)
+ return nil
+ }
+ panic("unreachable")
+}
+
+// Wait blocks until all pipelines are stopped or until the timeout is reached.
+// Returns:
+//
+// (1) nil if all the pipelines are gracefully stopped,
+//
+// (2) an error, if the pipelines could not have been gracefully stopped,
+//
+// (3) context.DeadlineExceeded if the pipelines were not stopped within the given timeout.
+func (s *Service) Wait(timeout time.Duration) error {
+ gracefullyStopped := make(chan struct{})
+ var err error
+ go func() {
+ defer close(gracefullyStopped)
+ err = s.waitInternal()
+ }()
+
+ select {
+ case <-gracefullyStopped:
+ return err
+ case <-time.After(timeout):
+ return context.DeadlineExceeded
+ }
+}
+
+// waitInternal blocks until all pipelines are stopped and returns an error if any of
+// the pipelines failed to stop gracefully.
+func (s *Service) waitInternal() error {
+ var errs []error
+
+ // copy pipelines to keep the map unlocked while we iterate it
+ pipelines := s.runningPipelines.Copy()
+
+ for _, rp := range pipelines.All() {
+ if rp.t == nil {
+ continue
+ }
+ err := rp.t.Wait()
+ if err != nil {
+ errs = append(errs, cerrors.Errorf("pipeline %s: %w", rp.pipeline.ID, err))
+ }
+ }
+ return cerrors.Join(errs...)
+}
+
+// WaitPipeline blocks until the pipeline with the given ID is stopped.
+func (s *Service) WaitPipeline(id string) error {
+ p, ok := s.runningPipelines.Get(id)
+ if !ok || p.t == nil {
+ return nil
+ }
+ return p.t.Wait()
+}
+
+// buildRunnablePipeline will build and connect all tasks configured in the pipeline.
+func (s *Service) buildRunnablePipeline(
+ ctx context.Context,
+ pl *pipeline.Instance,
+) (*runnablePipeline, error) {
+ pipelineLogger := s.logger
+ pipelineLogger.Logger = pipelineLogger.Logger.With().Str(log.PipelineIDField, pl.ID).Logger()
+
+ srcTasks, srcOrder, err := s.buildSourceTasks(ctx, pl, pipelineLogger)
+ if err != nil {
+ return nil, cerrors.Errorf("failed to build source tasks: %w", err)
+ }
+ if len(srcTasks) == 0 {
+ return nil, cerrors.New("can't build pipeline without any source connectors")
+ }
+
+ destTasks, destOrder, err := s.buildDestinationTasks(ctx, pl, pipelineLogger)
+ if err != nil {
+ return nil, cerrors.Errorf("failed to build destination tasks: %w", err)
+ }
+ if len(destTasks) == 0 {
+ return nil, cerrors.New("can't build pipeline without any destination connectors")
+ }
+
+ procTasks, procOrder, err := s.buildProcessorTasks(ctx, pl, pl.ProcessorIDs, pipelineLogger)
+ if err != nil {
+ return nil, cerrors.Errorf("failed to build pipeline processor tasks: %w", err)
+ }
+
+ dlq, err := s.buildDLQ(ctx, pl, pipelineLogger)
+ if err != nil {
+ return nil, cerrors.Errorf("failed to build DLQ: %w", err)
+ }
+
+ tasks, order := s.combineTasksAndOrders(srcTasks, destTasks, procTasks, srcOrder, destOrder, procOrder)
+
+ // log the tasks and order for debugging purposes
+ taskTypes := make([]string, len(tasks))
+ for i, task := range tasks {
+ taskTypes[i] = fmt.Sprintf("%s(%T)", task.ID(), task)
+ }
+ pipelineLogger.Info(ctx).Any("tasks", taskTypes).Any("order", order).Msg("pipeline tasks and order")
+
+ worker, err := funnel.NewWorker(
+ tasks,
+ order,
+ dlq,
+ pipelineLogger,
+ measure.PipelineExecutionDurationTimer.WithValues(pl.Config.Name),
+ )
+ if err != nil {
+ return nil, cerrors.Errorf("failed to create worker: %w", err)
+ }
+ return &runnablePipeline{
+ pipeline: pl,
+ w: worker,
+ }, nil
+}
+
+func (s *Service) combineTasksAndOrders(
+ srcTasks, destTasks, procTasks []funnel.Task,
+ srcOrder, destOrder, procOrder funnel.Order,
+) ([]funnel.Task, funnel.Order) {
+ tasks := make([]funnel.Task, 0, len(srcTasks)+len(procTasks)+len(destTasks))
+ tasks = append(tasks, srcTasks...)
+ tasks = append(tasks, procTasks...)
+ tasks = append(tasks, destTasks...)
+
+ // TODO(multi-connector): when we have multiple connectors this will not be as straight forward
+ order := srcOrder.AppendOrder(procOrder).AppendOrder(destOrder)
+ return tasks, order
+}
+
+func (s *Service) buildSourceTasks(
+ ctx context.Context,
+ pl *pipeline.Instance,
+ logger log.CtxLogger,
+) ([]funnel.Task, funnel.Order, error) {
+ var tasks []funnel.Task
+ var order funnel.Order
+
+ for _, connID := range pl.ConnectorIDs {
+ instance, err := s.connectors.Get(ctx, connID)
+ if err != nil {
+ return nil, nil, cerrors.Errorf("could not fetch connector: %w", err)
+ }
+
+ if instance.Type != connector.TypeSource {
+ continue // skip any connector that's not a source
+ }
+
+ if len(tasks) > 1 {
+ // TODO(multi-connector): remove check
+ return nil, nil, cerrors.New("pipelines with multiple source connectors currently not supported, please disable the experimental feature flag")
+ }
+
+ src, err := instance.Connector(ctx, s.connectorPlugins)
+ if err != nil {
+ return nil, nil, err
+ }
+
+ srcTask := funnel.NewSourceTask(
+ instance.ID,
+ src.(*connector.Source),
+ logger,
+ measure.ConnectorExecutionDurationTimer.WithValues(
+ pl.Config.Name,
+ instance.Plugin,
+ strings.ToLower(instance.Type.String()),
+ ),
+ measure.ConnectorBytesHistogram.WithValues(
+ pl.Config.Name,
+ instance.Plugin,
+ strings.ToLower(instance.Type.String()),
+ ),
+ )
+
+ // Add processor tasks
+ processorTasks, processorOrder, err := s.buildProcessorTasks(ctx, pl, instance.ProcessorIDs, logger)
+ if err != nil {
+ return nil, nil, cerrors.Errorf("failed to build source processor tasks: %w", err)
+ }
+
+ // Adjust order to include new task and the processor order
+ tasks = append(tasks, srcTask)
+ tasks = append(tasks, processorTasks...)
+
+ order = append(order, nil) // Add new task to order without attaching to previous tasks
+ order = order.AppendOrder(processorOrder)
+ }
+
+ return tasks, order, nil
+}
+
+func (s *Service) buildDestinationTasks(
+ ctx context.Context,
+ pl *pipeline.Instance,
+ logger log.CtxLogger,
+) ([]funnel.Task, funnel.Order, error) {
+ var tasks []funnel.Task
+ var order funnel.Order
+
+ for _, connID := range pl.ConnectorIDs {
+ instance, err := s.connectors.Get(ctx, connID)
+ if err != nil {
+ return nil, nil, cerrors.Errorf("could not fetch connector: %w", err)
+ }
+
+ if instance.Type != connector.TypeDestination {
+ continue // skip any connector that's not a destination
+ }
+
+ if len(tasks) > 1 {
+ // TODO(multi-connector): remove check
+ return nil, nil, cerrors.New("pipelines with multiple destination connectors currently not supported, please disable the experimental feature flag")
+ }
+
+ dest, err := instance.Connector(ctx, s.connectorPlugins)
+ if err != nil {
+ return nil, nil, err
+ }
+
+ destTask := funnel.NewDestinationTask(
+ instance.ID,
+ dest.(*connector.Destination),
+ logger,
+ measure.ConnectorExecutionDurationTimer.WithValues(
+ pl.Config.Name,
+ instance.Plugin,
+ strings.ToLower(instance.Type.String()),
+ ),
+ measure.ConnectorBytesHistogram.WithValues(
+ pl.Config.Name,
+ instance.Plugin,
+ strings.ToLower(instance.Type.String()),
+ ),
+ )
+
+ // Add processor tasks
+ processorTasks, processorOrder, err := s.buildProcessorTasks(ctx, pl, instance.ProcessorIDs, logger)
+ if err != nil {
+ return nil, nil, cerrors.Errorf("failed to build destination processor tasks: %w", err)
+ }
+
+ // Adjust order to include new task and the processor order
+ tasks = append(tasks, processorTasks...)
+ tasks = append(tasks, destTask)
+
+ order = append(order, processorOrder.Increase(len(order))...) // Add processor task order without attaching to previous tasks
+ order = order.AppendSingle(nil)
+ }
+
+ return tasks, order, nil
+}
+
+func (s *Service) buildProcessorTasks(
+ ctx context.Context,
+ pl *pipeline.Instance,
+ processorIDs []string,
+ logger log.CtxLogger,
+) ([]funnel.Task, funnel.Order, error) {
+ var tasks []funnel.Task
+ var order funnel.Order
+
+ for _, procID := range processorIDs {
+ instance, err := s.processors.Get(ctx, procID)
+ if err != nil {
+ return nil, nil, cerrors.Errorf("could not fetch processor: %w", err)
+ }
+
+ runnableProc, err := s.processors.MakeRunnableProcessor(ctx, instance)
+ if err != nil {
+ return nil, nil, err
+ }
+
+ tasks = append(
+ tasks,
+ funnel.NewProcessorTask(
+ instance.ID,
+ runnableProc,
+ logger,
+ measure.ProcessorExecutionDurationTimer.WithValues(pl.Config.Name, instance.Plugin),
+ ),
+ )
+ order = order.AppendSingle(nil)
+ }
+
+ return tasks, order, nil
+}
+
+func (s *Service) buildDLQ(
+ ctx context.Context,
+ pl *pipeline.Instance,
+ logger log.CtxLogger,
+) (*funnel.DLQ, error) {
+ conn, err := s.connectors.Create(
+ ctx,
+ pl.ID+"-dlq",
+ connector.TypeDestination,
+ pl.DLQ.Plugin,
+ pl.ID,
+ connector.Config{
+ Name: pl.ID + "-dlq",
+ Settings: pl.DLQ.Settings,
+ },
+ connector.ProvisionTypeDLQ, // the provision type ensures the connector won't be persisted
+ )
+ if err != nil {
+ return nil, cerrors.Errorf("failed to create DLQ destination: %w", err)
+ }
+
+ dest, err := conn.Connector(ctx, s.connectorPlugins)
+ if err != nil {
+ return nil, err
+ }
+
+ return funnel.NewDLQ(
+ "dlq",
+ dest.(*connector.Destination),
+ logger,
+ measure.DLQExecutionDurationTimer.WithValues(pl.Config.Name, conn.Plugin),
+ measure.DLQBytesHistogram.WithValues(pl.Config.Name, conn.Plugin),
+ pl.DLQ.WindowSize,
+ pl.DLQ.WindowNackThreshold,
+ ), nil
+}
+
+func (s *Service) runPipeline(rp *runnablePipeline) error {
+ if rp.t != nil && rp.t.Alive() {
+ return pipeline.ErrPipelineRunning
+ }
+
+ // the tomb is responsible for running goroutines related to the pipeline
+ rp.t = &tomb.Tomb{}
+ ctx := rp.t.Context(nil) //nolint:staticcheck // this is the correct usage of tomb
+
+ err := rp.w.Open(ctx)
+ if err != nil {
+ return cerrors.Errorf("failed to open worker: %w", err)
+ }
+
+ var workersWg sync.WaitGroup
+
+ // TODO(multi-connector): when we have multiple connectors spawn a worker for each source
+ workersWg.Add(1)
+ rp.t.Go(func() error {
+ defer workersWg.Done()
+
+ doErr := rp.w.Do(ctx)
+ s.logger.Err(ctx, doErr).Str(log.PipelineIDField, rp.pipeline.ID).Msg("pipeline worker stopped")
+
+ closeErr := rp.w.Close(context.Background())
+ err := cerrors.Join(doErr, closeErr)
+ if err != nil {
+ return cerrors.Errorf("worker stopped with error: %w", err)
+ }
+
+ return nil
+ })
+ rp.t.Go(func() error {
+ // Use fresh context for cleanup function, otherwise the updated status
+ // will potentially fail to be stored.
+ ctx := context.Background()
+
+ workersWg.Wait()
+ err := rp.t.Err()
+
+ switch err {
+ case tomb.ErrStillAlive:
+ // not an actual error, the pipeline stopped gracefully
+ err = nil
+ var status pipeline.Status
+ if s.isGracefulShutdown.Load() {
+ // it was triggered by a graceful shutdown of Conduit
+ status = pipeline.StatusSystemStopped
+ } else {
+ // it was manually triggered by a user
+ status = pipeline.StatusUserStopped
+ }
+ if err := s.pipelines.UpdateStatus(ctx, rp.pipeline.ID, status, ""); err != nil {
+ return err
+ }
+ default:
+ if cerrors.IsFatalError(err) {
+ // we use %+v to get the stack trace too
+ if err := s.pipelines.UpdateStatus(ctx, rp.pipeline.ID, pipeline.StatusDegraded, fmt.Sprintf("%+v", err)); err != nil {
+ return err
+ }
+ } else { //nolint:staticcheck // TODO: implement recovery
+ // // try to recover the pipeline
+ // if recoveryErr := s.recoverPipeline(ctx, rp); recoveryErr != nil {
+ // s.logger.
+ // Err(ctx, err).
+ // Str(log.PipelineIDField, rp.pipeline.ID).
+ // Msg("pipeline recovery failed")
+ //
+ // if updateErr := s.pipelines.UpdateStatus(ctx, rp.pipeline.ID, pipeline.StatusDegraded, fmt.Sprintf("%+v", recoveryErr)); updateErr != nil {
+ // return updateErr
+ // }
+ //
+ // // we assign it to err so it's returned and notified by the cleanup function
+ // err = recoveryErr
+ // } else {
+ // // recovery was triggered didn't error, so no cleanup
+ // // this is why we return nil to skip the cleanup below.
+ // return nil
+ // }
+ }
+ }
+
+ s.logger.
+ Err(ctx, err).
+ Str(log.PipelineIDField, rp.pipeline.ID).
+ Msg("pipeline stopped")
+
+ // confirmed that all nodes stopped, we can now remove the pipeline from the running pipelines
+ s.runningPipelines.Delete(rp.pipeline.ID)
+
+ s.notify(rp.pipeline.ID, err)
+ return err
+ })
+
+ return s.pipelines.UpdateStatus(ctx, rp.pipeline.ID, pipeline.StatusRunning, "")
+}
+
+// notify notifies all registered FailureHandlers about an error.
+func (s *Service) notify(pipelineID string, err error) {
+ if err == nil {
+ return
+ }
+ e := FailureEvent{
+ ID: pipelineID,
+ Error: err,
+ }
+ for _, handler := range s.handlers {
+ handler(e)
+ }
+}
diff --git a/pkg/lifecycle-poc/service_test.go b/pkg/lifecycle-poc/service_test.go
new file mode 100644
index 000000000..4c5201248
--- /dev/null
+++ b/pkg/lifecycle-poc/service_test.go
@@ -0,0 +1,596 @@
+// Copyright © 2022 Meroxa, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package lifecycle
+
+import (
+ "context"
+ "fmt"
+ "reflect"
+ "strconv"
+ "strings"
+ "testing"
+ "time"
+
+ "github.com/conduitio/conduit-commons/cchan"
+ "github.com/conduitio/conduit-commons/database/inmemory"
+ "github.com/conduitio/conduit-commons/opencdc"
+ "github.com/conduitio/conduit/pkg/connector"
+ "github.com/conduitio/conduit/pkg/foundation/cerrors"
+ "github.com/conduitio/conduit/pkg/foundation/log"
+ "github.com/conduitio/conduit/pkg/lifecycle-poc/funnel"
+ "github.com/conduitio/conduit/pkg/pipeline"
+ "github.com/conduitio/conduit/pkg/plugin"
+ connectorPlugin "github.com/conduitio/conduit/pkg/plugin/connector"
+ pmock "github.com/conduitio/conduit/pkg/plugin/connector/mock"
+ "github.com/conduitio/conduit/pkg/processor"
+ "github.com/google/go-cmp/cmp"
+ "github.com/google/go-cmp/cmp/cmpopts"
+ "github.com/google/uuid"
+ "github.com/matryer/is"
+ "github.com/rs/zerolog"
+ "go.uber.org/mock/gomock"
+)
+
+const testDLQID = "test-dlq"
+
+func TestServiceLifecycle_buildRunnablePipeline(t *testing.T) {
+ is := is.New(t)
+ ctx, killAll := context.WithCancel(context.Background())
+ defer killAll()
+ ctrl := gomock.NewController(t)
+ logger := log.New(zerolog.Nop())
+ db := &inmemory.DB{}
+ persister := connector.NewPersister(logger, db, time.Second, 3)
+
+ source := dummySource(persister)
+ destination := dummyDestination(persister)
+ dlq := dummyDestination(persister)
+ pl := &pipeline.Instance{
+ ID: uuid.NewString(),
+ Config: pipeline.Config{Name: "test-pipeline"},
+ DLQ: pipeline.DLQ{
+ Plugin: dlq.Plugin,
+ Settings: map[string]string{},
+ WindowSize: 3,
+ WindowNackThreshold: 2,
+ },
+ ConnectorIDs: []string{source.ID, destination.ID},
+ }
+ pl.SetStatus(pipeline.StatusUserStopped)
+
+ ls := NewService(
+ logger,
+ testConnectorService{
+ source.ID: source,
+ destination.ID: destination,
+ testDLQID: dlq,
+ },
+ testProcessorService{},
+ testConnectorPluginService{
+ source.Plugin: pmock.NewDispenser(ctrl),
+ destination.Plugin: pmock.NewDispenser(ctrl),
+ dlq.Plugin: pmock.NewDispenser(ctrl),
+ },
+ testPipelineService{},
+ )
+
+ got, err := ls.buildRunnablePipeline(
+ ctx,
+ pl,
+ )
+
+ is.NoErr(err)
+
+ is.Equal("", cmp.Diff(pl, got.pipeline, cmpopts.IgnoreUnexported(pipeline.Instance{})))
+
+ wantTasks := []funnel.Task{
+ &funnel.SourceTask{},
+ &funnel.DestinationTask{},
+ }
+ is.Equal(len(got.w.Tasks), len(wantTasks))
+ for i := range got.w.Tasks {
+ want := wantTasks[i]
+ got := got.w.Tasks[i]
+ is.Equal(reflect.TypeOf(want), reflect.TypeOf(got)) // unexpected task type
+ }
+ is.Equal(got.w.Order, funnel.Order{{1}, nil})
+ is.Equal(got.w.Source.(*connector.Source).Instance, source)
+}
+
+func TestService_buildRunnablePipeline_NoSourceNode(t *testing.T) {
+ is := is.New(t)
+ ctx, killAll := context.WithCancel(context.Background())
+ defer killAll()
+ ctrl := gomock.NewController(t)
+ logger := log.New(zerolog.Nop())
+ db := &inmemory.DB{}
+ persister := connector.NewPersister(logger, db, time.Second, 3)
+
+ destination := dummyDestination(persister)
+ dlq := dummyDestination(persister)
+ pl := &pipeline.Instance{
+ ID: uuid.NewString(),
+ Config: pipeline.Config{Name: "test-pipeline"},
+ DLQ: pipeline.DLQ{
+ Plugin: dlq.Plugin,
+ Settings: map[string]string{},
+ WindowSize: 3,
+ WindowNackThreshold: 2,
+ },
+ ConnectorIDs: []string{destination.ID},
+ }
+ pl.SetStatus(pipeline.StatusUserStopped)
+
+ ls := NewService(logger,
+ testConnectorService{
+ destination.ID: destination,
+ testDLQID: dlq,
+ },
+ testProcessorService{},
+ testConnectorPluginService{
+ destination.Plugin: pmock.NewDispenser(ctrl),
+ dlq.Plugin: pmock.NewDispenser(ctrl),
+ },
+ testPipelineService{},
+ )
+
+ wantErr := "can't build pipeline without any source connectors"
+
+ got, err := ls.buildRunnablePipeline(
+ ctx,
+ pl,
+ )
+
+ is.True(err != nil)
+ is.Equal(err.Error(), wantErr)
+ is.Equal(got, nil)
+}
+
+func TestService_buildRunnablePipeline_NoDestinationNode(t *testing.T) {
+ is := is.New(t)
+ ctx, killAll := context.WithCancel(context.Background())
+ defer killAll()
+ ctrl := gomock.NewController(t)
+ logger := log.New(zerolog.Nop())
+ db := &inmemory.DB{}
+ persister := connector.NewPersister(logger, db, time.Second, 3)
+
+ source := dummySource(persister)
+ dlq := dummyDestination(persister)
+
+ ls := NewService(logger,
+ testConnectorService{
+ source.ID: source,
+ testDLQID: dlq,
+ },
+ testProcessorService{},
+ testConnectorPluginService{
+ source.Plugin: pmock.NewDispenser(ctrl),
+ dlq.Plugin: pmock.NewDispenser(ctrl),
+ },
+ testPipelineService{},
+ )
+
+ wantErr := "can't build pipeline without any destination connectors"
+
+ pl := &pipeline.Instance{
+ ID: uuid.NewString(),
+ Config: pipeline.Config{Name: "test-pipeline"},
+ DLQ: pipeline.DLQ{
+ Plugin: dlq.Plugin,
+ Settings: map[string]string{},
+ WindowSize: 3,
+ WindowNackThreshold: 2,
+ },
+ ConnectorIDs: []string{source.ID},
+ }
+ pl.SetStatus(pipeline.StatusUserStopped)
+
+ got, err := ls.buildRunnablePipeline(
+ ctx,
+ pl,
+ )
+
+ is.True(err != nil)
+ is.Equal(err.Error(), wantErr)
+ is.Equal(got, nil)
+}
+
+func TestServiceLifecycle_PipelineSuccess(t *testing.T) {
+ is := is.New(t)
+ ctx, killAll := context.WithCancel(context.Background())
+ defer killAll()
+ logger := log.New(zerolog.Nop())
+ db := &inmemory.DB{}
+ persister := connector.NewPersister(logger, db, time.Second, 3)
+ defer persister.Wait()
+
+ ps := pipeline.NewService(logger, db)
+
+ // create a host pipeline
+ pl, err := ps.Create(ctx, uuid.NewString(), pipeline.Config{Name: "test pipeline"}, pipeline.ProvisionTypeAPI)
+ is.NoErr(err)
+
+ // create mocked connectors
+ ctrl := gomock.NewController(t)
+ wantRecords := generateRecords(10)
+ source, sourceDispenser := generatorSource(ctrl, persister, wantRecords, nil, false)
+ destination, destDispenser := asserterDestination(ctrl, persister, wantRecords, false)
+ dlq, dlqDispenser := asserterDestination(ctrl, persister, nil, false)
+ pl.DLQ.Plugin = dlq.Plugin
+
+ pl, err = ps.AddConnector(ctx, pl.ID, source.ID)
+ is.NoErr(err)
+ pl, err = ps.AddConnector(ctx, pl.ID, destination.ID)
+ is.NoErr(err)
+
+ ls := NewService(logger,
+ testConnectorService{
+ source.ID: source,
+ destination.ID: destination,
+ testDLQID: dlq,
+ },
+ testProcessorService{},
+ testConnectorPluginService{
+ source.Plugin: sourceDispenser,
+ destination.Plugin: destDispenser,
+ dlq.Plugin: dlqDispenser,
+ },
+ ps,
+ )
+
+ // start the pipeline now that everything is set up
+ err = ls.Start(
+ ctx,
+ pl.ID,
+ )
+ is.NoErr(err)
+
+ // wait for pipeline to finish consuming records from the source
+ time.Sleep(100 * time.Millisecond)
+
+ is.Equal(pipeline.StatusRunning, pl.GetStatus())
+ is.Equal("", pl.Error)
+
+ // stop pipeline before ending test
+ err = ls.Stop(ctx, pl.ID, false)
+ is.NoErr(err)
+
+ is.NoErr(ls.WaitPipeline(pl.ID))
+}
+
+func TestServiceLifecycle_PipelineError(t *testing.T) {
+ t.Skipf("this test fails, see github.com/ConduitIO/conduit/issues/1659")
+
+ is := is.New(t)
+ ctx, killAll := context.WithCancel(context.Background())
+ defer killAll()
+ logger := log.Test(t)
+ db := &inmemory.DB{}
+ persister := connector.NewPersister(logger, db, time.Second, 3)
+
+ ps := pipeline.NewService(logger, db)
+
+ // create a host pipeline
+ pl, err := ps.Create(ctx, uuid.NewString(), pipeline.Config{Name: "test pipeline"}, pipeline.ProvisionTypeAPI)
+ is.NoErr(err)
+
+ // create mocked connectors
+ wantErr := cerrors.New("source connector error")
+ ctrl := gomock.NewController(t)
+ wantRecords := generateRecords(10)
+ source, sourceDispenser := generatorSource(ctrl, persister, wantRecords, wantErr, false)
+ destination, destDispenser := asserterDestination(ctrl, persister, wantRecords, false)
+ dlq, dlqDispenser := asserterDestination(ctrl, persister, nil, false)
+ pl.DLQ.Plugin = dlq.Plugin
+
+ pl, err = ps.AddConnector(ctx, pl.ID, source.ID)
+ is.NoErr(err)
+ pl, err = ps.AddConnector(ctx, pl.ID, destination.ID)
+ is.NoErr(err)
+
+ ls := NewService(logger,
+ testConnectorService{
+ source.ID: source,
+ destination.ID: destination,
+ testDLQID: dlq,
+ },
+ testProcessorService{},
+ testConnectorPluginService{
+ source.Plugin: sourceDispenser,
+ destination.Plugin: destDispenser,
+ dlq.Plugin: dlqDispenser,
+ },
+ ps,
+ )
+
+ events := make(chan FailureEvent, 1)
+ ls.OnFailure(func(e FailureEvent) {
+ events <- e
+ })
+
+ // start the pipeline now that everything is set up
+ err = ls.Start(
+ ctx,
+ pl.ID,
+ )
+ is.NoErr(err)
+
+ // wait for pipeline to finish
+ err = ls.WaitPipeline(pl.ID)
+ is.True(err != nil)
+
+ is.Equal(pipeline.StatusDegraded, pl.GetStatus())
+ // pipeline errors contain only string messages, so we can only compare the errors by the messages
+ t.Log(pl.Error)
+
+ event, eventReceived, err := cchan.Chan[FailureEvent](events).RecvTimeout(ctx, 200*time.Millisecond)
+ is.NoErr(err)
+ is.True(eventReceived)
+ is.Equal(pl.ID, event.ID)
+
+ // These conditions are NOT met
+ is.True( // expected error message to have "node