Skip to content

Commit

Permalink
fix(backend): parallelFor resolve upstream inputs. Fixes #11520 (#11627)
Browse files Browse the repository at this point in the history
Signed-off-by: zazulam <m.zazula@gmail.com>
  • Loading branch information
zazulam authored Feb 21, 2025
1 parent d2c0376 commit f7c0616
Show file tree
Hide file tree
Showing 6 changed files with 74 additions and 49 deletions.
2 changes: 1 addition & 1 deletion backend/src/v2/compiler/argocompiler/dag.go
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,7 @@ func (c *workflowCompiler) iteratorTask(name string, task *pipelinespec.Pipeline

tasks = []wfapi.DAGTask{
{
Name: name + "-loop",
Name: name,
Template: loopTmplName,
Depends: depends(task.GetDependentTasks()),
Arguments: wfapi.Arguments{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,32 +9,13 @@ spec:
- name: components-e7a1060777c9ef84e36a4d54f25d3102abbcbd62d4c8bbb5883c3a2cbcfb5c6d
value: '{"executorLabel":"exec-print-op","inputDefinitions":{"parameters":{"s":{"parameterType":"STRING"}}}}'
- name: implementations-e7a1060777c9ef84e36a4d54f25d3102abbcbd62d4c8bbb5883c3a2cbcfb5c6d
value: '{"args":["--executor_input","{{$}}","--function_to_execute","print_op"],"command":["sh","-c","\nif
! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip || python3 -m
ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1
python3 -m pip install --quiet --no-warn-script-location ''kfp==2.7.0'' ''--no-deps''
''typing-extensions\u003e=3.7.4,\u003c5; python_version\u003c\"3.9\"'' \u0026\u0026
\"$0\" \"$@\"\n","sh","-ec","program_path=$(mktemp -d)\n\nprintf \"%s\" \"$0\"
\u003e \"$program_path/ephemeral_component.py\"\n_KFP_RUNTIME=true python3 -m
kfp.dsl.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n","\nimport
kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import *\n\ndef
print_op(s: str):\n print(s)\n\n"],"image":"python:3.7"}'
value: '{"args":["--executor_input","{{$}}","--function_to_execute","print_op"],"command":["sh","-c","\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location ''kfp==2.7.0'' ''--no-deps'' ''typing-extensions\u003e=3.7.4,\u003c5; python_version\u003c\"3.9\"'' \u0026\u0026 \"$0\" \"$@\"\n","sh","-ec","program_path=$(mktemp -d)\n\nprintf \"%s\" \"$0\" \u003e \"$program_path/ephemeral_component.py\"\n_KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n","\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import *\n\ndef print_op(s: str):\n print(s)\n\n"],"image":"python:3.7"}'
- name: components-comp-for-loop-2
value: '{"dag":{"tasks":{"print-op":{"cachingOptions":{"enableCache":true},"componentRef":{"name":"comp-print-op"},"inputs":{"parameters":{"s":{"componentInputParameter":"pipelinechannel--loop-item-param-1","parameterExpressionSelector":"parseJson(string_value)[\"A_a\"]"}}},"taskInfo":{"name":"print-op"}},"print-op-2":{"cachingOptions":{"enableCache":true},"componentRef":{"name":"comp-print-op-2"},"inputs":{"parameters":{"s":{"componentInputParameter":"pipelinechannel--loop-item-param-1","parameterExpressionSelector":"parseJson(string_value)[\"B_b\"]"}}},"taskInfo":{"name":"print-op-2"}}}},"inputDefinitions":{"parameters":{"pipelinechannel--loop-item-param-1":{"parameterType":"STRUCT"}}}}'
- name: components-comp-for-loop-4
value: '{"dag":{"tasks":{"print-op-3":{"cachingOptions":{"enableCache":true},"componentRef":{"name":"comp-print-op-3"},"inputs":{"parameters":{"s":{"componentInputParameter":"pipelinechannel--loop-item-param-3","parameterExpressionSelector":"parseJson(string_value)[\"A_a\"]"}}},"taskInfo":{"name":"print-op-3"}},"print-op-4":{"cachingOptions":{"enableCache":true},"componentRef":{"name":"comp-print-op-4"},"inputs":{"parameters":{"s":{"componentInputParameter":"pipelinechannel--loop-item-param-3","parameterExpressionSelector":"parseJson(string_value)[\"B_b\"]"}}},"taskInfo":{"name":"print-op-4"}}}},"inputDefinitions":{"parameters":{"pipelinechannel--loop-item-param-3":{"parameterType":"STRUCT"}}}}'
- name: components-root
value: '{"dag":{"tasks":{"for-loop-2":{"componentRef":{"name":"comp-for-loop-2"},"iteratorPolicy":{"parallelismLimit":2},"parameterIterator":{"itemInput":"pipelinechannel--loop-item-param-1","items":{"raw":"[{\"A_a\":
\"1\", \"B_b\": \"10\"}, {\"A_a\": \"2\", \"B_b\": \"20\"}, {\"A_a\": \"3\",
\"B_b\": \"30\"}, {\"A_a\": \"4\", \"B_b\": \"40\"}, {\"A_a\": \"5\", \"B_b\":
\"50\"}, {\"A_a\": \"6\", \"B_b\": \"60\"}, {\"A_a\": \"7\", \"B_b\": \"70\"},
{\"A_a\": \"8\", \"B_b\": \"80\"}, {\"A_a\": \"9\", \"B_b\": \"90\"}, {\"A_a\":
\"10\", \"B_b\": \"100\"}]"}},"taskInfo":{"name":"foo"}},"for-loop-4":{"componentRef":{"name":"comp-for-loop-4"},"iteratorPolicy":{"parallelismLimit":4},"parameterIterator":{"itemInput":"pipelinechannel--loop-item-param-3","items":{"raw":"[{\"A_a\":
\"1\", \"B_b\": \"10\"}, {\"A_a\": \"2\", \"B_b\": \"20\"}, {\"A_a\": \"3\",
\"B_b\": \"30\"}, {\"A_a\": \"4\", \"B_b\": \"40\"}, {\"A_a\": \"5\", \"B_b\":
\"50\"}, {\"A_a\": \"6\", \"B_b\": \"60\"}, {\"A_a\": \"7\", \"B_b\": \"70\"},
{\"A_a\": \"8\", \"B_b\": \"80\"}, {\"A_a\": \"9\", \"B_b\": \"90\"}, {\"A_a\":
\"10\", \"B_b\": \"100\"}]"}},"taskInfo":{"name":"bar"}}}}}'
value: '{"dag":{"tasks":{"for-loop-2":{"componentRef":{"name":"comp-for-loop-2"},"iteratorPolicy":{"parallelismLimit":2},"parameterIterator":{"itemInput":"pipelinechannel--loop-item-param-1","items":{"raw":"[{\"A_a\": \"1\", \"B_b\": \"10\"}, {\"A_a\": \"2\", \"B_b\": \"20\"}, {\"A_a\": \"3\", \"B_b\": \"30\"}, {\"A_a\": \"4\", \"B_b\": \"40\"}, {\"A_a\": \"5\", \"B_b\": \"50\"}, {\"A_a\": \"6\", \"B_b\": \"60\"}, {\"A_a\": \"7\", \"B_b\": \"70\"}, {\"A_a\": \"8\", \"B_b\": \"80\"}, {\"A_a\": \"9\", \"B_b\": \"90\"}, {\"A_a\": \"10\", \"B_b\": \"100\"}]"}},"taskInfo":{"name":"foo"}},"for-loop-4":{"componentRef":{"name":"comp-for-loop-4"},"iteratorPolicy":{"parallelismLimit":4},"parameterIterator":{"itemInput":"pipelinechannel--loop-item-param-3","items":{"raw":"[{\"A_a\": \"1\", \"B_b\": \"10\"}, {\"A_a\": \"2\", \"B_b\": \"20\"}, {\"A_a\": \"3\", \"B_b\": \"30\"}, {\"A_a\": \"4\", \"B_b\": \"40\"}, {\"A_a\": \"5\", \"B_b\": \"50\"}, {\"A_a\": \"6\", \"B_b\": \"60\"}, {\"A_a\": \"7\", \"B_b\": \"70\"}, {\"A_a\": \"8\", \"B_b\": \"80\"}, {\"A_a\": \"9\", \"B_b\": \"90\"}, {\"A_a\": \"10\", \"B_b\": \"100\"}]"}},"taskInfo":{"name":"bar"}}}}}'
entrypoint: entrypoint
podMetadata:
annotations:
Expand Down Expand Up @@ -383,12 +364,7 @@ spec:
- name: parent-dag-id
value: '{{inputs.parameters.parent-dag-id}}'
- name: task
value: '{"componentRef":{"name":"comp-for-loop-2"},"iteratorPolicy":{"parallelismLimit":2},"parameterIterator":{"itemInput":"pipelinechannel--loop-item-param-1","items":{"raw":"[{\"A_a\":
\"1\", \"B_b\": \"10\"}, {\"A_a\": \"2\", \"B_b\": \"20\"}, {\"A_a\":
\"3\", \"B_b\": \"30\"}, {\"A_a\": \"4\", \"B_b\": \"40\"}, {\"A_a\":
\"5\", \"B_b\": \"50\"}, {\"A_a\": \"6\", \"B_b\": \"60\"}, {\"A_a\":
\"7\", \"B_b\": \"70\"}, {\"A_a\": \"8\", \"B_b\": \"80\"}, {\"A_a\":
\"9\", \"B_b\": \"90\"}, {\"A_a\": \"10\", \"B_b\": \"100\"}]"}},"taskInfo":{"name":"foo"}}'
value: '{"componentRef":{"name":"comp-for-loop-2"},"iteratorPolicy":{"parallelismLimit":2},"parameterIterator":{"itemInput":"pipelinechannel--loop-item-param-1","items":{"raw":"[{\"A_a\": \"1\", \"B_b\": \"10\"}, {\"A_a\": \"2\", \"B_b\": \"20\"}, {\"A_a\": \"3\", \"B_b\": \"30\"}, {\"A_a\": \"4\", \"B_b\": \"40\"}, {\"A_a\": \"5\", \"B_b\": \"50\"}, {\"A_a\": \"6\", \"B_b\": \"60\"}, {\"A_a\": \"7\", \"B_b\": \"70\"}, {\"A_a\": \"8\", \"B_b\": \"80\"}, {\"A_a\": \"9\", \"B_b\": \"90\"}, {\"A_a\": \"10\", \"B_b\": \"100\"}]"}},"taskInfo":{"name":"foo"}}'
name: iteration-item-driver
template: system-dag-driver
- arguments:
Expand Down Expand Up @@ -416,12 +392,7 @@ spec:
- name: parent-dag-id
value: '{{inputs.parameters.parent-dag-id}}'
- name: task
value: '{"componentRef":{"name":"comp-for-loop-2"},"iteratorPolicy":{"parallelismLimit":2},"parameterIterator":{"itemInput":"pipelinechannel--loop-item-param-1","items":{"raw":"[{\"A_a\":
\"1\", \"B_b\": \"10\"}, {\"A_a\": \"2\", \"B_b\": \"20\"}, {\"A_a\":
\"3\", \"B_b\": \"30\"}, {\"A_a\": \"4\", \"B_b\": \"40\"}, {\"A_a\":
\"5\", \"B_b\": \"50\"}, {\"A_a\": \"6\", \"B_b\": \"60\"}, {\"A_a\":
\"7\", \"B_b\": \"70\"}, {\"A_a\": \"8\", \"B_b\": \"80\"}, {\"A_a\":
\"9\", \"B_b\": \"90\"}, {\"A_a\": \"10\", \"B_b\": \"100\"}]"}},"taskInfo":{"name":"foo"}}'
value: '{"componentRef":{"name":"comp-for-loop-2"},"iteratorPolicy":{"parallelismLimit":2},"parameterIterator":{"itemInput":"pipelinechannel--loop-item-param-1","items":{"raw":"[{\"A_a\": \"1\", \"B_b\": \"10\"}, {\"A_a\": \"2\", \"B_b\": \"20\"}, {\"A_a\": \"3\", \"B_b\": \"30\"}, {\"A_a\": \"4\", \"B_b\": \"40\"}, {\"A_a\": \"5\", \"B_b\": \"50\"}, {\"A_a\": \"6\", \"B_b\": \"60\"}, {\"A_a\": \"7\", \"B_b\": \"70\"}, {\"A_a\": \"8\", \"B_b\": \"80\"}, {\"A_a\": \"9\", \"B_b\": \"90\"}, {\"A_a\": \"10\", \"B_b\": \"100\"}]"}},"taskInfo":{"name":"foo"}}'
name: iteration-driver
template: system-dag-driver
- arguments:
Expand Down Expand Up @@ -453,12 +424,7 @@ spec:
- name: parent-dag-id
value: '{{inputs.parameters.parent-dag-id}}'
- name: task
value: '{"componentRef":{"name":"comp-for-loop-4"},"iteratorPolicy":{"parallelismLimit":4},"parameterIterator":{"itemInput":"pipelinechannel--loop-item-param-3","items":{"raw":"[{\"A_a\":
\"1\", \"B_b\": \"10\"}, {\"A_a\": \"2\", \"B_b\": \"20\"}, {\"A_a\":
\"3\", \"B_b\": \"30\"}, {\"A_a\": \"4\", \"B_b\": \"40\"}, {\"A_a\":
\"5\", \"B_b\": \"50\"}, {\"A_a\": \"6\", \"B_b\": \"60\"}, {\"A_a\":
\"7\", \"B_b\": \"70\"}, {\"A_a\": \"8\", \"B_b\": \"80\"}, {\"A_a\":
\"9\", \"B_b\": \"90\"}, {\"A_a\": \"10\", \"B_b\": \"100\"}]"}},"taskInfo":{"name":"bar"}}'
value: '{"componentRef":{"name":"comp-for-loop-4"},"iteratorPolicy":{"parallelismLimit":4},"parameterIterator":{"itemInput":"pipelinechannel--loop-item-param-3","items":{"raw":"[{\"A_a\": \"1\", \"B_b\": \"10\"}, {\"A_a\": \"2\", \"B_b\": \"20\"}, {\"A_a\": \"3\", \"B_b\": \"30\"}, {\"A_a\": \"4\", \"B_b\": \"40\"}, {\"A_a\": \"5\", \"B_b\": \"50\"}, {\"A_a\": \"6\", \"B_b\": \"60\"}, {\"A_a\": \"7\", \"B_b\": \"70\"}, {\"A_a\": \"8\", \"B_b\": \"80\"}, {\"A_a\": \"9\", \"B_b\": \"90\"}, {\"A_a\": \"10\", \"B_b\": \"100\"}]"}},"taskInfo":{"name":"bar"}}'
name: iteration-item-driver
template: system-dag-driver
- arguments:
Expand Down Expand Up @@ -486,12 +452,7 @@ spec:
- name: parent-dag-id
value: '{{inputs.parameters.parent-dag-id}}'
- name: task
value: '{"componentRef":{"name":"comp-for-loop-4"},"iteratorPolicy":{"parallelismLimit":4},"parameterIterator":{"itemInput":"pipelinechannel--loop-item-param-3","items":{"raw":"[{\"A_a\":
\"1\", \"B_b\": \"10\"}, {\"A_a\": \"2\", \"B_b\": \"20\"}, {\"A_a\":
\"3\", \"B_b\": \"30\"}, {\"A_a\": \"4\", \"B_b\": \"40\"}, {\"A_a\":
\"5\", \"B_b\": \"50\"}, {\"A_a\": \"6\", \"B_b\": \"60\"}, {\"A_a\":
\"7\", \"B_b\": \"70\"}, {\"A_a\": \"8\", \"B_b\": \"80\"}, {\"A_a\":
\"9\", \"B_b\": \"90\"}, {\"A_a\": \"10\", \"B_b\": \"100\"}]"}},"taskInfo":{"name":"bar"}}'
value: '{"componentRef":{"name":"comp-for-loop-4"},"iteratorPolicy":{"parallelismLimit":4},"parameterIterator":{"itemInput":"pipelinechannel--loop-item-param-3","items":{"raw":"[{\"A_a\": \"1\", \"B_b\": \"10\"}, {\"A_a\": \"2\", \"B_b\": \"20\"}, {\"A_a\": \"3\", \"B_b\": \"30\"}, {\"A_a\": \"4\", \"B_b\": \"40\"}, {\"A_a\": \"5\", \"B_b\": \"50\"}, {\"A_a\": \"6\", \"B_b\": \"60\"}, {\"A_a\": \"7\", \"B_b\": \"70\"}, {\"A_a\": \"8\", \"B_b\": \"80\"}, {\"A_a\": \"9\", \"B_b\": \"90\"}, {\"A_a\": \"10\", \"B_b\": \"100\"}]"}},"taskInfo":{"name":"bar"}}'
name: iteration-driver
template: system-dag-driver
- arguments:
Expand Down Expand Up @@ -519,14 +480,14 @@ spec:
- name: parent-dag-id
value: '{{inputs.parameters.parent-dag-id}}'
depends: ""
name: for-loop-2-loop
name: for-loop-2
template: comp-for-loop-2-for-loop-2-iterator
- arguments:
parameters:
- name: parent-dag-id
value: '{{inputs.parameters.parent-dag-id}}'
depends: ""
name: for-loop-4-loop
name: for-loop-4
template: comp-for-loop-4-for-loop-4-iterator
inputs:
parameters:
Expand Down
2 changes: 1 addition & 1 deletion backend/src/v2/driver/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -1662,7 +1662,7 @@ func resolveUpstreamArtifacts(cfg resolveUpstreamOutputsConfig) error {
if taskOutput.GetOutputArtifactKey() == "" {
cfg.err(fmt.Errorf("output artifact key is empty"))
}
tasks, err := cfg.mlmd.GetExecutionsInDAG(cfg.ctx, cfg.dag, cfg.pipeline, false)
tasks, err := getDAGTasks(cfg.ctx, cfg.dag, cfg.pipeline, cfg.mlmd, nil)
if err != nil {
cfg.err(err)
}
Expand Down
21 changes: 21 additions & 0 deletions samples/v2/parallel_after_dependency.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from kfp import Client, dsl


@dsl.component
def print_op(message: str) -> str:
print(message)
return message


@dsl.pipeline()
def loop_with_after_dependency_set():
with dsl.ParallelFor([1, 2, 3]):
one = print_op(message='foo')
# Ensure that the dependecy is set downstream for all loop iterations
two = print_op(message='bar').after(one)
three = print_op(message='baz').after(one)


if __name__ == '__main__':
client = Client()
run = client.create_run_from_pipeline_func(loop_with_after_dependency_set)
39 changes: 39 additions & 0 deletions samples/v2/parallel_consume_upstream.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from kfp import Client
from kfp import dsl
from kfp.dsl import Artifact, Input, Output


@dsl.component
def split_input(input: str) -> list:
return input.split(',')


@dsl.component
def create_file(file: Output[Artifact], content: str):
with open(file.path, 'w') as f:
f.write(content)


@dsl.component
def read_file(file: Input[Artifact]) -> str:
with open(file.path, 'r') as f:
print(f.read())
return file.path


@dsl.pipeline()
def loop_consume_upstream():
model_ids_split_op = split_input(input='component1,component2,component3')
model_ids_split_op.set_caching_options(False)

with dsl.ParallelFor(model_ids_split_op.output) as model_id:
create_file_op = create_file(content=model_id)
create_file_op.set_caching_options(False)
# Consume the output from a op in the loop iteration DAG context
read_file_op = read_file(file=create_file_op.outputs['file'])
read_file_op.set_caching_options(False)


if __name__ == '__main__':
client = Client()
run = client.create_run_from_pipeline_func(loop_consume_upstream)
4 changes: 4 additions & 0 deletions samples/v2/sample_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
import pipeline_with_env
import producer_consumer_param
import subdagio
import parallel_consume_upstream
import parallel_after_dependency
import two_step_pipeline_containerized
import pipeline_with_placeholders
from modelcar_import import modelcar_import
Expand Down Expand Up @@ -78,6 +80,8 @@ def test(self):
pipeline_func=subdagio.multiple_artifacts_namedtuple.crust),
TestCase(pipeline_func=pipeline_with_placeholders.pipeline_with_placeholders),
TestCase(pipeline_func=modelcar_import.pipeline_modelcar_import),
TestCase(pipeline_func=parallel_consume_upstream.loop_consume_upstream),
TestCase(pipeline_func=parallel_after_dependency.loop_with_after_dependency_set),
]

with ThreadPoolExecutor() as executor:
Expand Down

0 comments on commit f7c0616

Please sign in to comment.