From 44a8b202cf0ab550cbdedce33891e1db88ad96e2 Mon Sep 17 00:00:00 2001
From: Jin Chi He <hejinchi@cn.ibm.com>
Date: Wed, 23 Oct 2019 10:20:07 +0800
Subject: [PATCH] debug mnist ci/cd testing problem.

---
 .../components/t2t/containers/base/Dockerfile |   2 +-
 .../components/t2t/datacopy_component.yaml    |  13 +--
 .../components/t2t/t2t-train/train_model.py   |  76 ++++++++------
 .../components/t2t/train_component.yaml       |  25 +++--
 .../pipelines/example_pipelines/gh_summ.py    |  47 ++++-----
 .../example_pipelines/gh_summ.py.tar.gz       | Bin 1700 -> 2148 bytes
 .../pipelines-notebook.ipynb                  |  99 ++++++++----------
 mnist/testing/tfjob_test.py                   |   8 ++
 8 files changed, 139 insertions(+), 131 deletions(-)

diff --git a/github_issue_summarization/pipelines/components/t2t/containers/base/Dockerfile b/github_issue_summarization/pipelines/components/t2t/containers/base/Dockerfile
index a5c26d158..718a3d7bc 100644
--- a/github_issue_summarization/pipelines/components/t2t/containers/base/Dockerfile
+++ b/github_issue_summarization/pipelines/components/t2t/containers/base/Dockerfile
@@ -26,7 +26,7 @@ RUN pip install tensorflow-probability==0.5
 RUN pip install tensor2tensor==1.11.0
 RUN pip install tensorflow_hub==0.1.1
 RUN pip install pyyaml==3.12 six==1.11.0
-RUN pip install google-cloud-storage
+RUN pip install google-cloud-storage pathlib2
 
 RUN wget -nv https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.zip && \
     unzip -qq google-cloud-sdk.zip -d /tools && \
diff --git a/github_issue_summarization/pipelines/components/t2t/datacopy_component.yaml b/github_issue_summarization/pipelines/components/t2t/datacopy_component.yaml
index 5d1e97ef0..30c00f916 100644
--- a/github_issue_summarization/pipelines/components/t2t/datacopy_component.yaml
+++ b/github_issue_summarization/pipelines/components/t2t/datacopy_component.yaml
@@ -20,9 +20,6 @@ metadata:
   labels:
     add-pod-env: 'true'
 inputs:
-  - name: working_dir
-    description: '...'
-    type: GCSPath
   - name: data_dir
     description: '...'
     type: GCSPath
@@ -35,15 +32,19 @@ inputs:
   - name: action
     description: '...'
     type: String
+outputs:
+  - name: copy_output_path
+    description: '...'
+    type: GCSPath
 implementation:
   container:
-    image: gcr.io/google-samples/ml-pipeline-t2ttrain:v2ap
+    image: gcr.io/google-samples/ml-pipeline-t2ttrain:v3ap
     args: [
       --data-dir, {inputValue: data_dir},
       --checkpoint-dir, {inputValue: checkpoint_dir},
       --action, {inputValue: action},
-      --working-dir, {inputValue: working_dir},
-      --model-dir, {inputValue: model_dir}
+      --model-dir, {inputValue: model_dir},
+      --copy-output-path, {outputPath: copy_output_path}
     ]
     env:
       KFP_POD_NAME: "{{pod.name}}"
diff --git a/github_issue_summarization/pipelines/components/t2t/t2t-train/train_model.py b/github_issue_summarization/pipelines/components/t2t/t2t-train/train_model.py
index e54120308..9e0daabd4 100644
--- a/github_issue_summarization/pipelines/components/t2t/t2t-train/train_model.py
+++ b/github_issue_summarization/pipelines/components/t2t/t2t-train/train_model.py
@@ -22,18 +22,15 @@
 from urlparse import urlparse
 
 from google.cloud import storage
+import pathlib2
 
 
-# location of the model checkpoint from which we'll start our training
-SOURCE_BUCKET = 'aju-dev-demos-codelabs'
-PREFIX = 'kubecon/model_output_tbase.bak2019000/'
 COPY_ACTION = 'copy_data'
 TRAIN_ACTION = 'train'
 PROBLEM = 'gh_problem'
 OUTPUT_PATH = '/tmp/output'
 
 
-
 def copy_blob(storage_client, source_bucket, source_blob, target_bucket_name, new_blob_name,
     new_blob_prefix, prefix):
   """Copies a blob from one bucket to another with a new name."""
@@ -49,17 +46,26 @@ def copy_blob(storage_client, source_bucket, source_blob, target_bucket_name, ne
       str(source_blob.name), str(source_bucket.name), str(new_blob.name), str(target_bucket.name))
 
 
-def copy_checkpoint(new_blob_prefix, target_bucket):
+def copy_checkpoint(checkpoint_dir, model_dir):
   """Copy an existing model checkpoint directory to the working directory for the workflow,
   so that the training can start from that point.
   """
 
   storage_client = storage.Client()
-  source_bucket = storage_client.bucket(SOURCE_BUCKET)
   retries = 10
 
+  source_bucket_string = urlparse(checkpoint_dir).netloc
+  source_prefix = checkpoint_dir.replace('gs://' + source_bucket_string + '/', '')
+  logging.info("source bucket %s and prefix %s", source_bucket_string, source_prefix)
+  source_bucket = storage_client.bucket(source_bucket_string)
+
+  target_bucket = urlparse(model_dir).netloc
+  logging.info("target bucket: %s", target_bucket)
+  new_blob_prefix = model_dir.replace('gs://' + target_bucket + '/', '')
+  logging.info("new_blob_prefix: %s", new_blob_prefix)
+
   # Lists objects with the given prefix.
-  blob_list = list(source_bucket.list_blobs(prefix=PREFIX))
+  blob_list = list(source_bucket.list_blobs(prefix=source_prefix))
   logging.info('Copying files:')
   for blob in blob_list:
     sleeptime = 0.1
@@ -68,7 +74,7 @@ def copy_checkpoint(new_blob_prefix, target_bucket):
       logging.info('copying %s; retry %s', blob.name, num_retries)
       try:
         copy_blob(storage_client, source_bucket, blob, target_bucket, blob.name, new_blob_prefix,
-            PREFIX)
+            source_prefix)
         break
       except Exception as e:  #pylint: disable=broad-except
         logging.warning(e)
@@ -97,7 +103,6 @@ def run_training(args, data_dir, model_dir, problem):
   # print(result2)
 
   # then export the model...
-
   model_export_command = ['t2t-exporter', '--model', 'transformer',
       '--hparams_set', 'transformer_prepend',
       '--problem', problem,
@@ -124,17 +129,21 @@ def main():
       help='...',
       required=True)
   parser.add_argument(
-      '--working-dir',
+      '--data-dir',
       help='...',
       required=True)
   parser.add_argument(
-      '--data-dir',
+      '--copy-output-path',
       help='...',
-      required=True)
+      )
   parser.add_argument(
+      '--train-output-path',
+      help='...',
+      )
+  parser.add_argument(  # used for the copy step only
       '--checkpoint-dir',
       help='...',
-      required=True)
+      required=False)
   parser.add_argument(
       '--train-steps',
       help='...')
@@ -145,34 +154,37 @@ def main():
 
   args = parser.parse_args()
 
-  # Create metadata.json file for visualization.
-  metadata = {
-    'outputs' : [{
-      'type': 'tensorboard',
-      'source': args.model_dir,
-    }]
-  }
-  with open('/mlpipeline-ui-metadata.json', 'w') as f:
-    json.dump(metadata, f)
-
   data_dir = args.data_dir
   logging.info("data dir: %s", data_dir)
-
-  # model_startpoint = args.checkpoint_dir
-  logging.info("model_startpoint: %s", args.checkpoint_dir)
   model_dir = args.model_dir
   logging.info("model_dir: %s", model_dir)
 
   if args.action.lower() == COPY_ACTION:
-    # copy over the checkpoint directory
-    target_bucket = urlparse(args.working_dir).netloc
-    logging.info("target bucket: %s", target_bucket)
-    new_blob_prefix = model_dir.replace('gs://' + target_bucket + '/', '')
-    logging.info("new_blob_prefix: %s", new_blob_prefix)
-    copy_checkpoint(new_blob_prefix, target_bucket)
+    logging.info("model starting checkpoint: %s", args.checkpoint_dir)
+    copy_checkpoint(args.checkpoint_dir, model_dir)
+    # write the model dir path as an output param
+    logging.info("copy_output_path: %s", args.copy_output_path)
+    pathlib2.Path(args.copy_output_path).parent.mkdir(parents=True)
+    pathlib2.Path(args.copy_output_path).write_text(model_dir.decode('utf-8'))
+
   elif args.action.lower() == TRAIN_ACTION:
     # launch the training job
     run_training(args, data_dir, model_dir, PROBLEM)
+    # write the model export path as an output param
+    logging.info("train_output_path: %s", args.train_output_path)
+    pathlib2.Path(args.train_output_path).parent.mkdir(parents=True)
+    export_dir = '%s/export' % model_dir
+    pathlib2.Path(args.train_output_path).write_text(export_dir.decode('utf-8'))
+    # Create metadata.json file for Tensorboard 'artifact'
+    metadata = {
+      'outputs' : [{
+        'type': 'tensorboard',
+        'source': model_dir,
+      }]
+    }
+    with open('/mlpipeline-ui-metadata.json', 'w') as f:
+      json.dump(metadata, f)
+
   else:
     logging.warning("Error: unknown action mode %s", args.action)
 
diff --git a/github_issue_summarization/pipelines/components/t2t/train_component.yaml b/github_issue_summarization/pipelines/components/t2t/train_component.yaml
index 197e493bd..1017847d9 100644
--- a/github_issue_summarization/pipelines/components/t2t/train_component.yaml
+++ b/github_issue_summarization/pipelines/components/t2t/train_component.yaml
@@ -23,16 +23,10 @@ inputs:
   - name: train_steps
     description: '...'
     type: Integer
-    default: '2019300'
-  - name: working_dir
-    description: '...'
-    type: GCSPath
+    default: 2019300
   - name: data_dir
     description: '...'
     type: GCSPath
-  - name: checkpoint_dir
-    description: '...'
-    type: GCSPath
   - name: model_dir
     description: '...'
     type: GCSPath
@@ -43,22 +37,27 @@ inputs:
     description: '...'
     type: String
 outputs:
-  - name: output
+  - name: launch_server
     description: '...'
     type: String
+  - name: train_output_path
+    description: '...'
+    type: GCSPath
+  - name: MLPipeline UI metadata
+    type: UI metadata
 implementation:
   container:
-    image: gcr.io/google-samples/ml-pipeline-t2ttrain:v2ap
+    image: gcr.io/google-samples/ml-pipeline-t2ttrain:v3ap
     args: [
       --data-dir, {inputValue: data_dir},
-      --checkpoint-dir, {inputValue: checkpoint_dir},
       --action, {inputValue: action},
-      --working-dir, {inputValue: working_dir},
       --model-dir, {inputValue: model_dir},
       --train-steps, {inputValue: train_steps},
-      --deploy-webapp, {inputValue: deploy_webapp}
+      --deploy-webapp, {inputValue: deploy_webapp},
+      --train-output-path, {outputPath: train_output_path}
     ]
     env:
       KFP_POD_NAME: "{{pod.name}}"
     fileOutputs:
-      output: /tmp/output
+      launch_server: /tmp/output
+      MLPipeline UI metadata: /mlpipeline-ui-metadata.json
diff --git a/github_issue_summarization/pipelines/example_pipelines/gh_summ.py b/github_issue_summarization/pipelines/example_pipelines/gh_summ.py
index bf0bde1a1..b0f849180 100644
--- a/github_issue_summarization/pipelines/example_pipelines/gh_summ.py
+++ b/github_issue_summarization/pipelines/example_pipelines/gh_summ.py
@@ -16,6 +16,7 @@
 import kfp.dsl as dsl
 import kfp.gcp as gcp
 import kfp.components as comp
+from kfp.dsl.types import GCSPath, String
 
 
 COPY_ACTION = 'copy_data'
@@ -25,11 +26,11 @@
 MODEL = 'model'
 
 copydata_op = comp.load_component_from_url(
-  'https://raw.githubusercontent.com/kubeflow/examples/master/github_issue_summarization/pipelines/components/t2t/datacopy_component.yaml'  # pylint: disable=line-too-long
+  'https://raw.githubusercontent.com/amygdala/kubeflow-examples/ghpl_update/github_issue_summarization/pipelines/components/t2t/datacopy_component.yaml'  # pylint: disable=line-too-long
   )
 
 train_op = comp.load_component_from_url(
-  'https://raw.githubusercontent.com/kubeflow/examples/master/github_issue_summarization/pipelines/components/t2t/train_component.yaml' # pylint: disable=line-too-long
+  'https://raw.githubusercontent.com/amygdala/kubeflow-examples/ghpl_update/github_issue_summarization/pipelines/components/t2t/train_component.yaml' # pylint: disable=line-too-long
   )
 
 metadata_log_op = comp.load_component_from_url(
@@ -41,37 +42,34 @@
   description='Demonstrate Tensor2Tensor-based training and TF-Serving'
 )
 def gh_summ(  #pylint: disable=unused-argument
-  train_steps=2019300,
-  project='YOUR_PROJECT_HERE',
-  github_token='YOUR_GITHUB_TOKEN_HERE',
-  working_dir='YOUR_GCS_DIR_HERE',
-  checkpoint_dir='gs://aju-dev-demos-codelabs/kubecon/model_output_tbase.bak2019000',
-  deploy_webapp='true',
-  data_dir='gs://aju-dev-demos-codelabs/kubecon/t2t_data_gh_all/'
+  train_steps: 'Integer' = 2019300,
+  project: String = 'YOUR_PROJECT_HERE',
+  github_token: String = 'YOUR_GITHUB_TOKEN_HERE',
+  working_dir: GCSPath = 'gs://YOUR_GCS_DIR_HERE',
+  checkpoint_dir: GCSPath = 'gs://aju-dev-demos-codelabs/kubecon/model_output_tbase.bak2019000/',
+  deploy_webapp: String = 'true',
+  data_dir: GCSPath = 'gs://aju-dev-demos-codelabs/kubecon/t2t_data_gh_all/'
   ):
 
 
   copydata = copydata_op(
-    working_dir=working_dir,
     data_dir=data_dir,
     checkpoint_dir=checkpoint_dir,
-    model_dir='%s/%s/model_output' % (working_dir, '{{workflow.name}}'),
-    action=COPY_ACTION
+    model_dir='%s/%s/model_output' % (working_dir, dsl.RUN_ID_PLACEHOLDER),
+    action=COPY_ACTION,
     ).apply(gcp.use_gcp_secret('user-gcp-sa'))
 
 
   log_dataset = metadata_log_op(
     log_type=DATASET,
     workspace_name=WORKSPACE_NAME,
-    run_name='{{workflow.name}}',
+    run_name=dsl.RUN_ID_PLACEHOLDER,
     data_uri=data_dir
     )
 
   train = train_op(
-    working_dir=working_dir,
     data_dir=data_dir,
-    checkpoint_dir=checkpoint_dir,
-    model_dir='%s/%s/model_output' % (working_dir, '{{workflow.name}}'),
+    model_dir=copydata.outputs['copy_output_path'],
     action=TRAIN_ACTION, train_steps=train_steps,
     deploy_webapp=deploy_webapp
     ).apply(gcp.use_gcp_secret('user-gcp-sa'))
@@ -80,29 +78,28 @@ def gh_summ(  #pylint: disable=unused-argument
   log_model = metadata_log_op(
     log_type=MODEL,
     workspace_name=WORKSPACE_NAME,
-    run_name='{{workflow.name}}',
-    model_uri='%s/%s/model_output' % (working_dir, '{{workflow.name}}')
+    run_name=dsl.RUN_ID_PLACEHOLDER,
+    model_uri=train.outputs['train_output_path']
     )
 
   serve = dsl.ContainerOp(
       name='serve',
       image='gcr.io/google-samples/ml-pipeline-kubeflow-tfserve',
-      arguments=["--model_name", 'ghsumm-%s' % ('{{workflow.name}}',),
-          "--model_path", '%s/%s/model_output/export' % (working_dir, '{{workflow.name}}')
+      arguments=["--model_name", 'ghsumm-%s' % (dsl.RUN_ID_PLACEHOLDER,),
+          "--model_path", train.outputs['train_output_path']
           ]
-      )
+      ).apply(gcp.use_gcp_secret('user-gcp-sa'))
+
   log_dataset.after(copydata)
-  train.after(copydata)
   log_model.after(train)
-  serve.after(train)
   train.set_gpu_limit(1)
   train.set_memory_limit('48G')
 
-  with dsl.Condition(train.output == 'true'):
+  with dsl.Condition(train.outputs['launch_server'] == 'true'):
     webapp = dsl.ContainerOp(
         name='webapp',
         image='gcr.io/google-samples/ml-pipeline-webapp-launcher:v2ap',
-        arguments=["--model_name", 'ghsumm-%s' % ('{{workflow.name}}',),
+        arguments=["--model_name", 'ghsumm-%s' % (dsl.RUN_ID_PLACEHOLDER,),
             "--github_token", github_token]
 
         )
diff --git a/github_issue_summarization/pipelines/example_pipelines/gh_summ.py.tar.gz b/github_issue_summarization/pipelines/example_pipelines/gh_summ.py.tar.gz
index b285e71f412e579056a99d16c0c05b517d72446b..5998f78cfc8d6c7a8d89f7aca799e746e94160d7 100644
GIT binary patch
literal 2148
zcmV-q2%GmGiwFp06t!If|7U1lb9HTPE^v7+bYXG;?HgTh+c>h%`4xm;{8CwQ+AHo1
z>_aYbQ(x0KNSr+!wuQh*jI2gqkf;Q?ApiY_q$HCPCCf@0<aRYc;fUgVa5$V9j;65}
z6W<HT<N*i%yI%wQ0G~d7<i7xw-w)GIfBR63iSg<5)5lNm(EhtufGnjrLFh#mDKGB-
z1aR#APLkA%!WqJe8^uZVbK*tj-4y%r8c+9bz0jGVe@DsfjUV0b1%zS;Q#{*42!~-r
zF$LU|Jt7q6#*|KOvuncUPol&%i(xB`i9JK($B{!)JMm(MI+~%;F$sVMO)w>BMZz>n
z4#bafjZ@-q+6cjWFZ4ne_&R9y*|;Rh9XyWSqmdWJ8BKxIAB^@5&V0(~9_&y5@nL_T
z%?NQo*bpaTrIf_!=zUIjL?2@QauHJEl4SJs9w~JGv%I>n&M%h#njfvKFY}AJOp$4f
z*tCLFkmjf-i`AE_e_E^M*ZElu-}UG^yS8X_OTrpwH=UWLpmB6*9WO3&^n!dr<R111
zbYVGOqNjm>W`;xV;9n4>h8;P?$JeRJ_Q{SyGhhQ&lu^*5MVZzo*Z7v{4{X`2^Rw5)
zzKtU<q*|6V$%wp0hs1vLVBM2z9LEis>LMmR0DfS7mb<pF@0$t_HqM3lQ4{xs>lk@y
znh}&{L4XtQzua<1Ph*(ul8`X#JA?T%BzDB$vkhgovByoA*?!_?0SReg`8WYo3MWZC
z8z|(YxVahJ+;W-ju%E$;ac-MqWqNK)>eQO`DJZ$H+<Y{6HPcg&bF(o(Z7Qzb?iAdO
z2{fQT%j6VR!h8&2J{3JOQ&W}5456P$Rr3N0u*Jy(UuBzJz{He!BbyxBc9exw7*gT*
zj3f)ez+e_7LF|K}aWgSMRJX8yiqqS)crdE=C7^0$#pFgyP*wH#DE#DPLuVo*Jw1&}
zyxcxyjc=q^UI?0nKy?ut#mmho3LTH1ttnRtJX1I`Bq{Jg6A~@MsBEvHUso|}vnGbC
zwb=#=>Rgo$w)0&V19lWYaKn+NaP!{IMLK~`Pv+y}eI95hndiWOd`Ogq07pX%8!@Kq
z@|8)0*c2~PlgpK#FgDASWV9o&Ul!W`_>qs0@UFUNUq7E)=gVX3?C{%suWn9p<V@H^
zSvdmnWVt*!om+?J=ckLK!_{JWW*uG3kLPEr#o_5?m6VwhJ0a9`?bv`n9I$&2`>C11
zp)%NDiYGr)N#TOY(dqK)`11Ixb$GS<BGI=#FM#**_xZ)e;&}c-%$im4If(-KJl=T3
zcP_}yOgV&n^c*%}hKd}D>rTUlvA8@xJbF<+X^icHnHOM}ok%-jsh%4}t`80k!{H{W
z8Tdx&x4<Ft2sXR>fJ?_4`QVwpF)t^b%YZ!#whaodxR%OWLsI`vZl&Q!YePX(*pri5
z?sBS$ZA)?s=O~UsSVd!Ydx*Z~2<SYgA?;X+k7&dl$tKU;ER8P&-GBign2WA6`<Bq}
zAz)yTbWM`)VVR@x#79kt6>%n$i8gI|zsZ9k=CiGYL`|Z(jZh^e)RL<bkwvbM6P~X#
zr3$`GSU78$%q30O#Q_pU;FIq?j6aSb8z6ql|HO`CfPXW<E0!7UlcJDP)T6dtV|;Ue
z+`Jt+s#WK$)x(d{^9s>73zy_qvZNyFLi{L1R>EQtA##~FQJ8u+pvn6AYopW@r`Un!
zuA*zSYB~S?_-j4tPC^MkSjqRdc{P?NGUvGADxO8YsLp=N=^<p~B!pGk9+8+CJPRh$
zv$+327!mnUB?hRuUp2?ny}e(P5JVk4K5~UjD)cA$w#iGVOfh_%h4$Lul0wckl63Lg
zZ}~wVKS$Bph_qeRP5EX^JljsRo(8vH|F%<964r5TL$|q=>K1RG-)j$EL{Uvf-ffja
z5I=H_BE)DEpn2VEy;UO@&pOAaDx&u(sxj!IxOQ(K>%utLqO5YlUsheJtKDcx9`EPB
z`bAWordmnq^tK!CS8<ZM;!_8jGM1ijy*9X=tc3H@o~p^(HkPHE@hh)sI?cCaGtGK!
zig)3ba&NbS6mv@~F)xSMa#BL2DU30{fE&t{|2}1vM#fT_n`EInYh0}gjjPqLEb;nO
z!-Q*6kjJl59r{bwQp(5$_Tqvy-A#8oK*||;GX)D+OJVLPN)WfGQo8S9{o7o(%Ri8{
z_+3|XY2W4QU!FUQzw1E_jo>`y*fM23o$>=1BhaDl0%uQ8wo>TWfv%{VY$x2K_My`-
z+o_UF&tgO+3GU-^<&gE-4tI-7Z4Tp|(!;?Mzco+X?eZ|V=)pz*IbAfrdUtTrVws9J
zaM#TD8dd*VLsC9zKG%b4{N<nL^JHhIS2!4h-pCb#H}*BJn{TZaKwhB~|4Z8vD(wV(
z(Yv=2^sSvR7P>og_W}f8zxJ}eOKoH$$JWLM(Zx4(HVmB&FWK3UkTl8?TP~sbUf`*<
z(|`m~@-Ram|9&EkhIgLhVK{<;>BiX%P2Xs<M4!^`Gpck|xAJC^VlEiMYPa$Ckb7EP
zPF!SndDo_rx3wpGO2ctut8urSyS-Oqhot*?%@v!LitXukYksYC1(%Y@M4!>AvEj3l
z;wgjG!Ae|%I`w8qS04W5*b-lkpS$>42-7P-*^~nJ+i79n=xR|ODtRyE;oeOmx_W=t
zwvfOtU2W#ozkueQ7W!GbAl3R6zqzYqukUw1O)E%i($PJo)l1&3AEozly0*Kttnn=*
z*}blm@_(>sPfYdQE}e4<;mGn0W_6_l{30#Vd1!q%zyJdbFu(u<3^2d|0}L?000Rs#
azyJdbFu(u<3^2d|+u(mvVbpH`Pyhf}jXxj&

literal 1700
zcmV;V23z?biwFo<U6ow||7U1lb9HTPE^v7+bYXG;?OI!J+cp%=bAE;37r&HNoMy#l
zpbuW`q*>G0aGVZ9Q3#C0$f_k#AStI_kpDiCl5FZ?TXvjaNqK-G6?HD+Ip23kN{+k;
z`Cf=xa~Svs&m%`%KAfD$zg!A`kAG{o-)CdVc)R`K<mkXSI(PwzDTBl?_A*Iza<4>S
z<b6eidN}MDkhnM^_(#jb=By3<Xad`ZQ!jKn#^0Ds?|giJ7$626$YAHtFkl#B1{t@d
z;?gjp6il==jc<|AZ(-t^*|0?;WOt0_L&HJTCSD{=8y%z3Ljm`}2xQ0@qmW{9ocsZ|
zkRnI=HVl5>3%$_gaUElP2CfmA@yo_Lqv3^7%qVyDP58cpvCsI$@lpGa_eV$kej#4t
zUFxzC!T^a8>D!_VMO64>^GFUi9^V<N$HqT{n~^mf4gTt%j;&AqQNL02o_Q8GGVda(
z$Yz^8pI?kW-TY~d2cP>__1(K3o5Z&k!&4O2h;e#t^)5#B-QV-t^E_LQN7j0GX~#6-
zj~F;;#{UJF0vkKXhqu%ec(8G324cX%G2`{Jn5b^+7EVQ>IA@k+Db;KFH1-79(+GPZ
z+xS3?#Hf}}2Sq-fTleS|M$wb!$d37=af(>(#DczW)_VkDP@2$pMdMsn+3+ZhkwN1i
zfW-S(vaj*j<PlsHBEg_5uHP{dOaQ43a@ycf(opc1xN(3&ma!KSZpwH!NUnhqN>`E_
zKyo9U$PD^1ztKzy*37A=V9%?Tc#|_pA65dm!mh-MoN)?Iz>`;HtDrq+w!~4{^2tcf
zMM}^G5_A<ZC?J|op8&#cPgKMm_rQxw=5mtj5Wy9x=%qI5+BS|umOv<L+knI&hZfgj
zD2RLxF$oaBAJxqo27`1;vkOo}Rw6}wWy&itkWbaaL(=72OFu2u=<%^x;Why;PYsX-
z14+SZ2dFkzL!}^8i@Quz?ndH0n|nDMYcf$|HYuuc=y;OlZCN6I&A167SrA?sB_IF=
zxPj_4WLnFK3)@Nxt?-c7&HI6Nio#^T#?f2?Sy3sId_^2ZF&`vJ*mm~j!^3^L@o9<t
z@)4ExnR^>^GKpQ%x9{l)3TMTtem)yo!$Hrw>VD}TmZ2A6Ck@do7@iLX=a+q}I~-nK
zoOZ_-gDdNF)bI7L#uweo>!K@@BAXy)x^@Kke;iIi5Bk)MIm3Y%q_Fja;v%q4F9$ch
z>)vOpdo%u|__xk3c<jN~{%Ca3>+dPp>QtN&9H`CV&O^R4LU)}UVddx$vPs9twxcq4
z6=6h(>tXkFuW@Ju?JSrVKv!_qCSup@V(j|7yFyNXM9shlizdoRE7{wb9mB;cfPLO3
zzlfHrkTMY0N!S$$E;OIQU7)0YtH!f}&z0-as?}FL9|++q6!MjZ4;=?^tOLOib@|KM
zS2PFNK0nuAvUZR#k1pF8^X_>6D9t7C7nzSekUiDnF|gc(L0`1g7#%K`D4F$Y<0>dC
znp}-|Hr+l%YM8lygX^lCMR6@{rKQ|%x#gQF6mq279#W<Y<OT*Q*9A`n#^jgD;+kD`
z_kM!H{mezl(Z`R*k^wu`x01A*E4*qwUPJlKPRrYb^&9og9xIpg`rV>f*UkDn)nG4)
zT7~R-vrHbu$8NdTEfY|2d#CyCv68E1y`I17M8NTenJyGw=UnrhZ8=4C{Ec^xsuOe#
zpQ_cmRZM-gQTIqFZJxc)?V~5GLe~;WRT|H^XRIn`?ZdBHtW_BgH@pI?cQh6~M7)Yy
z42kJb#0tk!c^Uw@^T$0E?_fkoMVG{3UaMI;T{epc^O$(M9L$pqN6z?V&Z@uU$!@XE
zaKN}6PP6t_49bz@X~(D1`u;1NmbT}y+6P~p#T$6~I^IcXo^Q|d{k|!w*x+iiXOyyX
zSaK(V!=S{Ql9h<RKPH;!XN1XukhNi!eL4s1Zn?$Z6=cODgvQAK^P7j<+rF0X9m0Lp
zX1xA!Nmilq6tK-nPJegN-(7b4?m`g7F|pM%weJO9{^d!40!-!|<K*{qWi*_5jt5&D
zdZru29i#nrKIN=Cme)UfKW$r6^(x}+uUYGBx#3ewC9%?wEQrj7`m^nx)Zd@2Z{*eA
zm1=W2fhO=_9CCn@C8c-%X+`PR8*d+MNvBGG!c(v7B&{n;L2{M+Mo85>(4s|)7A;z|
uXwjlYixw?fv}n<yMT-_KTC`}<qD6}qEn2i_(V|7m%gTTGQ=)(XPyhgM_*&2a

diff --git a/github_issue_summarization/pipelines/example_pipelines/pipelines-notebook.ipynb b/github_issue_summarization/pipelines/example_pipelines/pipelines-notebook.ipynb
index 46386f206..8de017d09 100644
--- a/github_issue_summarization/pipelines/example_pipelines/pipelines-notebook.ipynb
+++ b/github_issue_summarization/pipelines/example_pipelines/pipelines-notebook.ipynb
@@ -29,7 +29,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Do some installations and imports, and set some variables.  Set the `WORKING_DIR` to a path under the Cloud Storage bucket you created earlier.  The Pipelines SDK is bundled with the notebook server image, but we'll make sure that we're using the most current version for this example."
+    "Do some installations and imports, and set some variables.  Set the `WORKING_DIR` to a path under the Cloud Storage bucket you created earlier.  The Pipelines SDK is bundled with the notebook server image, but we'll make sure that we're using the most current version for this example. You may need to restart your kernel after the SDK update."
    ]
   },
   {
@@ -52,6 +52,8 @@
     "import kfp.dsl as dsl\n",
     "import kfp.gcp as gcp\n",
     "import kfp.components as comp\n",
+    "from kfp.dsl.types import Integer, GCSPath, String\n",
+    "\n",
     "import kfp.notebook"
    ]
   },
@@ -139,15 +141,15 @@
     "MODEL = 'model'\n",
     "\n",
     "copydata_op = comp.load_component_from_url(\n",
-    "  'https://raw.githubusercontent.com/amygdala/kubeflow-examples/preempt/github_issue_summarization/pipelines/components/t2t/datacopy_component.yaml'\n",
+    "  'https://raw.githubusercontent.com/amygdala/kubeflow-examples/ghpl_update/github_issue_summarization/pipelines/components/t2t/datacopy_component.yaml'  # pylint: disable=line-too-long\n",
     "  )\n",
     "\n",
     "train_op = comp.load_component_from_url(\n",
-    "  'https://raw.githubusercontent.com/amygdala/kubeflow-examples/preempt/github_issue_summarization/pipelines/components/t2t/train_component.yaml'\n",
+    "  'https://raw.githubusercontent.com/amygdala/kubeflow-examples/ghpl_update/github_issue_summarization/pipelines/components/t2t/train_component.yaml' # pylint: disable=line-too-long\n",
     "  )\n",
     "\n",
     "metadata_log_op = comp.load_component_from_url(\n",
-    "  'https://raw.githubusercontent.com/amygdala/kubeflow-examples/preempt/github_issue_summarization/pipelines/components/t2t/metadata_log_component.yaml'\n",
+    "  'https://raw.githubusercontent.com/kubeflow/examples/master/github_issue_summarization/pipelines/components/t2t/metadata_log_component.yaml' # pylint: disable=line-too-long\n",
     "  )"
    ]
   },
@@ -170,37 +172,34 @@
     "  description='Demonstrate Tensor2Tensor-based training and TF-Serving'\n",
     ")\n",
     "def gh_summ(  #pylint: disable=unused-argument\n",
-    "  train_steps=2019300,\n",
-    "  project='YOUR_PROJECT_HERE',\n",
-    "  github_token='YOUR_GITHUB_TOKEN_HERE',\n",
-    "  working_dir='YOUR_GCS_DIR_HERE',\n",
-    "  checkpoint_dir='gs://aju-dev-demos-codelabs/kubecon/model_output_tbase.bak2019000',\n",
-    "  deploy_webapp='true',\n",
-    "  data_dir='gs://aju-dev-demos-codelabs/kubecon/t2t_data_gh_all/'\n",
+    "  train_steps: Integer = 2019300,\n",
+    "  project: String = 'YOUR_PROJECT_HERE',\n",
+    "  github_token: String = 'YOUR_GITHUB_TOKEN_HERE',\n",
+    "  working_dir: GCSPath = 'YOUR_GCS_DIR_HERE',\n",
+    "  checkpoint_dir: GCSPath = 'gs://aju-dev-demos-codelabs/kubecon/model_output_tbase.bak2019000/',\n",
+    "  deploy_webapp: String = 'true',\n",
+    "  data_dir: GCSPath = 'gs://aju-dev-demos-codelabs/kubecon/t2t_data_gh_all/'\n",
     "  ):\n",
     "\n",
     "\n",
     "  copydata = copydata_op(\n",
-    "    working_dir=working_dir,\n",
     "    data_dir=data_dir,\n",
     "    checkpoint_dir=checkpoint_dir,\n",
-    "    model_dir='%s/%s/model_output' % (working_dir, '{{workflow.name}}'),\n",
-    "    action=COPY_ACTION\n",
+    "    model_dir='%s/%s/model_output' % (working_dir, dsl.RUN_ID_PLACEHOLDER),\n",
+    "    action=COPY_ACTION,\n",
     "    ).apply(gcp.use_gcp_secret('user-gcp-sa'))\n",
     "\n",
     "\n",
     "  log_dataset = metadata_log_op(\n",
     "    log_type=DATASET,\n",
     "    workspace_name=WORKSPACE_NAME,\n",
-    "    run_name='{{workflow.name}}',\n",
+    "    run_name=dsl.RUN_ID_PLACEHOLDER,\n",
     "    data_uri=data_dir\n",
     "    )\n",
     "\n",
     "  train = train_op(\n",
-    "    working_dir=working_dir,\n",
     "    data_dir=data_dir,\n",
-    "    checkpoint_dir=checkpoint_dir,\n",
-    "    model_dir='%s/%s/model_output' % (working_dir, '{{workflow.name}}'),\n",
+    "    model_dir=copydata.outputs['copy_output_path'],\n",
     "    action=TRAIN_ACTION, train_steps=train_steps,\n",
     "    deploy_webapp=deploy_webapp\n",
     "    ).apply(gcp.use_gcp_secret('user-gcp-sa'))\n",
@@ -209,29 +208,27 @@
     "  log_model = metadata_log_op(\n",
     "    log_type=MODEL,\n",
     "    workspace_name=WORKSPACE_NAME,\n",
-    "    run_name='{{workflow.name}}',\n",
-    "    model_uri='%s/%s/model_output' % (working_dir, '{{workflow.name}}')\n",
+    "    run_name=dsl.RUN_ID_PLACEHOLDER,\n",
+    "    model_uri=train.outputs['train_output_path']\n",
     "    )\n",
     "\n",
     "  serve = dsl.ContainerOp(\n",
     "      name='serve',\n",
     "      image='gcr.io/google-samples/ml-pipeline-kubeflow-tfserve',\n",
-    "      arguments=[\"--model_name\", 'ghsumm-%s' % ('{{workflow.name}}',),\n",
-    "          \"--model_path\", '%s/%s/model_output/export' % (working_dir, '{{workflow.name}}')\n",
+    "      arguments=[\"--model_name\", 'ghsumm-%s' % (dsl.RUN_ID_PLACEHOLDER,),\n",
+    "          \"--model_path\", train.outputs['train_output_path']\n",
     "          ]\n",
     "      )\n",
     "  log_dataset.after(copydata)\n",
-    "  train.after(copydata)\n",
     "  log_model.after(train)\n",
-    "  serve.after(train)\n",
-    "  train.set_gpu_limit(4)\n",
+    "  train.set_gpu_limit(1)\n",
     "  train.set_memory_limit('48G')\n",
     "\n",
-    "  with dsl.Condition(train.output == 'true'):\n",
+    "  with dsl.Condition(train.outputs['launch_server'] == 'true'):\n",
     "    webapp = dsl.ContainerOp(\n",
     "        name='webapp',\n",
     "        image='gcr.io/google-samples/ml-pipeline-webapp-launcher:v2ap',\n",
-    "        arguments=[\"--model_name\", 'ghsumm-%s' % ('{{workflow.name}}',),\n",
+    "        arguments=[\"--model_name\", 'ghsumm-%s' % (dsl.RUN_ID_PLACEHOLDER,),\n",
     "            \"--github_token\", github_token]\n",
     "\n",
     "        )\n",
@@ -328,80 +325,74 @@
     "\n",
     "@dsl.pipeline(\n",
     "  name='Github issue summarization',\n",
-    "  description='Demonstrate TFT-based feature processing, TFMA, TFJob, CMLE OP, and TF-Serving'\n",
+    "  description='Demonstrate Tensor2Tensor-based training and TF-Serving'\n",
     ")\n",
     "def gh_summ2(\n",
-    "  train_steps = 2019300,\n",
-    "  project = PROJECT_NAME,\n",
-    "  github_token='YOUR_GITHUB_TOKEN_HERE',\n",
-    "  working_dir='YOUR_GCS_DIR_HERE',\n",
-    "  checkpoint_dir = 'gs://aju-dev-demos-codelabs/kubecon/model_output_tbase.bak2019000',\n",
-    "  deploy_webapp = 'true',\n",
-    "  data_dir = 'gs://aju-dev-demos-codelabs/kubecon/t2t_data_gh_all/'\n",
+    "  train_steps: Integer = 2019300,\n",
+    "  project: String = 'YOUR_PROJECT_HERE',\n",
+    "  github_token: String = 'YOUR_GITHUB_TOKEN_HERE',\n",
+    "  working_dir: GCSPath = 'YOUR_GCS_DIR_HERE',\n",
+    "  checkpoint_dir: GCSPath = 'gs://aju-dev-demos-codelabs/kubecon/model_output_tbase.bak2019000/',\n",
+    "  deploy_webapp: String = 'true',\n",
+    "  data_dir: GCSPath = 'gs://aju-dev-demos-codelabs/kubecon/t2t_data_gh_all/'\n",
     "  ):\n",
     "\n",
     "  # The new pre-processing op.\n",
     "  preproc = preproc_op(project=project,\n",
-    "      data_dir=('%s/%s/gh_data' % (working_dir, '{{workflow.name}}')))\n",
+    "      data_dir=('%s/%s/gh_data' % (working_dir, dsl.RUN_ID_PLACEHOLDER)))\n",
     "\n",
     "  copydata = copydata_op(\n",
-    "    working_dir=working_dir,\n",
     "    data_dir=data_dir,\n",
     "    checkpoint_dir=checkpoint_dir,\n",
-    "    model_dir='%s/%s/model_output' % (working_dir, '{{workflow.name}}'),\n",
-    "    action=COPY_ACTION\n",
+    "    model_dir='%s/%s/model_output' % (working_dir, dsl.RUN_ID_PLACEHOLDER),\n",
+    "    action=COPY_ACTION,\n",
     "    ).apply(gcp.use_gcp_secret('user-gcp-sa'))\n",
     "\n",
     "\n",
     "  log_dataset = metadata_log_op(\n",
     "    log_type=DATASET,\n",
     "    workspace_name=WORKSPACE_NAME,\n",
-    "    run_name='{{workflow.name}}',\n",
+    "    run_name=dsl.RUN_ID_PLACEHOLDER,\n",
     "    data_uri=data_dir\n",
     "    )\n",
     "\n",
     "  train = train_op(\n",
-    "    working_dir=working_dir,\n",
     "    data_dir=data_dir,\n",
-    "    checkpoint_dir=checkpoint_dir,\n",
-    "    model_dir='%s/%s/model_output' % (working_dir, '{{workflow.name}}'),\n",
+    "    model_dir=copydata.outputs['copy_output_path'],\n",
     "    action=TRAIN_ACTION, train_steps=train_steps,\n",
     "    deploy_webapp=deploy_webapp\n",
     "    ).apply(gcp.use_gcp_secret('user-gcp-sa'))\n",
     "\n",
     "  log_dataset.after(copydata)\n",
-    "  train.after(copydata)\n",
     "  train.after(preproc)\n",
     "\n",
     "  log_model = metadata_log_op(\n",
     "    log_type=MODEL,\n",
     "    workspace_name=WORKSPACE_NAME,\n",
-    "    run_name='{{workflow.name}}',\n",
-    "    model_uri='%s/%s/model_output' % (working_dir, '{{workflow.name}}')\n",
+    "    run_name=dsl.RUN_ID_PLACEHOLDER,\n",
+    "    model_uri=train.outputs['train_output_path']\n",
     "    )\n",
     "\n",
     "  serve = dsl.ContainerOp(\n",
     "      name='serve',\n",
     "      image='gcr.io/google-samples/ml-pipeline-kubeflow-tfserve',\n",
-    "      arguments=[\"--model_name\", 'ghsumm-%s' % ('{{workflow.name}}',),\n",
-    "          \"--model_path\", '%s/%s/model_output/export' % (working_dir, '{{workflow.name}}')\n",
+    "      arguments=[\"--model_name\", 'ghsumm-%s' % (dsl.RUN_ID_PLACEHOLDER,),\n",
+    "          \"--model_path\", train.outputs['train_output_path']\n",
     "          ]\n",
     "      )\n",
-    "\n",
     "  log_model.after(train)\n",
-    "  serve.after(train)\n",
-    "  train.set_gpu_limit(4)\n",
+    "  train.set_gpu_limit(1)\n",
     "  train.set_memory_limit('48G')\n",
     "\n",
-    "  with dsl.Condition(train.output == 'true'):\n",
+    "  with dsl.Condition(train.outputs['launch_server'] == 'true'):\n",
     "    webapp = dsl.ContainerOp(\n",
     "        name='webapp',\n",
     "        image='gcr.io/google-samples/ml-pipeline-webapp-launcher:v2ap',\n",
-    "        arguments=[\"--model_name\", 'ghsumm-%s' % ('{{workflow.name}}',),\n",
+    "        arguments=[\"--model_name\", 'ghsumm-%s' % (dsl.RUN_ID_PLACEHOLDER,),\n",
     "            \"--github_token\", github_token]\n",
     "\n",
     "        )\n",
-    "    webapp.after(serve)  \n"
+    "    webapp.after(serve)\n"
    ]
   },
   {
diff --git a/mnist/testing/tfjob_test.py b/mnist/testing/tfjob_test.py
index cd82ee05e..3e921efb2 100644
--- a/mnist/testing/tfjob_test.py
+++ b/mnist/testing/tfjob_test.py
@@ -64,6 +64,14 @@ def test_train(self):
     util.run(['wget', '-O', '/usr/local/bin/kustomize', kusUrl], cwd=self.app_dir)
     util.run(['chmod', 'a+x', '/usr/local/bin/kustomize'], cwd=self.app_dir)
 
+    # TODO @jinchihe: The kubectl has been upgraded to 1.14.0 in kubeflow/testing/pull/500.
+    # But the test-worker image is not refreshed, see issue kubeflow/testing/issues/501.
+    # The below code can be removed once test-worker released.
+    kusUrl = 'https://storage.googleapis.com/kubernetes-release/' \
+         'release/v1.14.0/bin/linux/amd64/kubectl'
+    util.run(['wget', '-O', '/usr/local/bin/kubectl', kusUrl], cwd=self.app_dir)
+    util.run(['chmod', 'a+x', '/usr/local/bin/kubectl'], cwd=self.app_dir)
+
     # Setup parameters for kustomize
     configmap = 'mnist-map-training'
     for pair in self.params.split(","):