Merge pull request rh-aiservices-bu#93 from rh-aiservices-bu/feature/…

…process_claims_pipeline Removed secrets and added text guide
redhat-gpte-devopsautomation · Jan 9, 2024 · c8db10d · c8db10d
2 parents 8cf3184 + 73c7bd4
commit c8db10d
Show file tree

Hide file tree

Showing 15 changed files with 89 additions and 167 deletions.
diff --git a/content/modules/ROOT/assets/images/05/05-object-detection-endpoint.png b/content/modules/ROOT/assets/images/05/05-object-detection-endpoint.png
diff --git a/content/modules/ROOT/assets/images/05/05-open-pipeline-properties.png b/content/modules/ROOT/assets/images/05/05-open-pipeline-properties.png
diff --git a/content/modules/ROOT/assets/images/05/05-run-details.png b/content/modules/ROOT/assets/images/05/05-run-details.png
diff --git a/content/modules/ROOT/assets/images/05/05-run-settings.png b/content/modules/ROOT/assets/images/05/05-run-settings.png
diff --git a/content/modules/ROOT/nav.adoc b/content/modules/ROOT/nav.adoc
@@ -32,6 +32,7 @@
 ** xref:05-02-web-app-deploy-gitops.adoc[5.2 Deploying your Web App via GitOps]
 ** xref:05-03-web-app-update.adoc[5.3 Updating your Web App via GitOps]
 ** xref:05-04-web-app-validating.adoc[5.4 Validating Claims processing]
+** xref:05-05-process-claims.adoc[5.5 Processing Claims with a pipeline]
 
 * 6. Productization and Extrapolations
 ** xref:06-01-potential-imp-ref.adoc[6.1 Potential improvements and refinements]

diff --git a/content/modules/ROOT/pages/05-05-process-claims.adoc b/content/modules/ROOT/pages/05-05-process-claims.adoc
@@ -1,6 +1,43 @@
 = Pipeline for processing claims
 include::_attributes.adoc[]
 
+== What will the pipeline do?
+Now that we have the web app deployed, we need some way to process the claims in the web app. For that, we will use a pipeline that either can run ad-hoc or be scheduled just like the sanity check pipeline. +
+This pipeline is also a good starting point for creating an ArgoCD or Tekton pipeline which can be automatically triggered.
+
+== Look at the pipeline
+Start by navigating to `lab-materials/05/` and open the file `process_claims.pipeline` +
+Just like before, this is our Elyra version of the pipeline. Feel free to take a look at the different files and properties of the pipeline to see how it all fits together, but don't change anything just yet. +
+These are the main files of the pipeline and what they do:
+
+* *get_claims* - Will connect to the database, fetch any unprocessed claims, and add them to a list that will be passed to the other tasks through a file `claims.json`.
+* The following will go through all the claims and use the full body of the text to try and find some important feature, then push that to the databse:
+** *get_location* - Finds the location of the accident.
+** *get_accident_time* - Finds the time of the accident.
+** *summarize_text* - Makes a short summary of the text.
+** *get_sentiment* - Gets the sentiment of the text.
+* *detect_objects* - Downloads the images of the claims and uses the served object-detection model to find damages in the image.
+
 == Run the pipeline
+Before we can run the pipeline, we need to tell it what the endpoint to object-detection model is. +
+Go to the properties of the pipeline, and then scroll down until you see an environment variable called `DETECTION_ENDPOINT` and copy-paste your object-detection endpoint as Value.
+
+image::05/05-open-pipeline-properties.png[open pipeline properties]
+
+image::05/05-object-detection-endpoint.png[add the detection endpoint]
+
+Now we can press the *Run* button to start the pipeline.
+
+When starting the pipeline we now have an extra option called `claim_id`. This will let you choose which claim to process. If set to 0 (default), it will automatically process all unprocessed claims. +
+Leave it at 0 and press OK.
+
+image::05/05-run-settings.png[run settings]
+
+Press the `run-details` in the next popup to see how the run is progressing.
+
+image::05/05-run-details.png[run details]
 
-== Check the results
+== Check the results
+After the pipeline has finished running, you can go to the app and take a look at the claims. +
+Instead of just a long body, you will now see a summary, a location field, an accident time field, and a sentiment field. +
+You can also see that we have new image(s) which have bounding boxes where the damage is.
diff --git a/lab-materials/03/06/sanity_check.yaml b/lab-materials/03/06/sanity_check.yaml
@@ -46,7 +46,7 @@ spec:
             sh -c "mkdir -p ./jupyter-work-dir && cd ./jupyter-work-dir"
             sh -c "echo 'Downloading file:///opt/app-root/bin/utils/bootstrapper.py' && curl --fail -H 'Cache-Control: no-cache' -L file:///opt/app-root/bin/utils/bootstrapper.py --output bootstrapper.py"
             sh -c "echo 'Downloading file:///opt/app-root/bin/utils/requirements-elyra.txt' && curl --fail -H 'Cache-Control: no-cache' -L file:///opt/app-root/bin/utils/requirements-elyra.txt --output requirements-elyra.txt"
-            sh -c "python3 -m pip install  packaging && python3 -m pip freeze > requirements-current.txt && python3 bootstrapper.py --pipeline-name 'sanity_check' --cos-endpoint 'https://minio-api-robert-serving-test.apps.rhods-internal.61tk.p1.openshiftapps.com' --cos-bucket 'pipeline-bucket' --cos-directory 'sanity_check-1215145442' --cos-dependencies-archive 'test_response_quality-d0510f0c-fc50-42fc-a598-97e26b34ed88.tar.gz' --file 'insurance-claim-processing/lab-materials/03/test_response_quality.py' --outputs 'quality_result.json' "
+            sh -c "python3 -m pip install  packaging && python3 -m pip freeze > requirements-current.txt && python3 bootstrapper.py --pipeline-name 'sanity_check' --cos-endpoint 'https://minio-api-robert-serving-test.apps.rhods-internal.61tk.p1.openshiftapps.com' --cos-bucket 'pipeline-bucket' --cos-directory 'sanity_check-0109084010' --cos-dependencies-archive 'test_response_quality-d0510f0c-fc50-42fc-a598-97e26b34ed88.tar.gz' --file 'insurance-claim-processing/lab-materials/03/06/test_response_quality.py' --outputs 'quality_result.json' "
           command:
           - sh
           - -c
@@ -71,21 +71,6 @@ spec:
             valueFrom:
               fieldRef:
                 fieldPath: metadata.annotations['pipelines.kubeflow.org/run_name']
-          - name: LLM_ENDPOINT
-            valueFrom:
-              secretKeyRef:
-                key: LLM_ENDPOINT
-                name: llm-info
-          - name: MODEL_SHA
-            valueFrom:
-              secretKeyRef:
-                key: MODEL_SHA
-                name: llm-info
-          - name: SERVING_SHA
-            valueFrom:
-              secretKeyRef:
-                key: SERVING_SHA
-                name: llm-info
           image: quay.io/rlundber/rh1/sanity_pipeline:1.7
         stepTemplate:
           volumeMounts:
@@ -103,11 +88,11 @@ spec:
             elyra/node-name: test_response_quality
             pipelines.kubeflow.org/cache_enabled: "true"
           annotations:
-            elyra/node-file-name: insurance-claim-processing/lab-materials/03/test_response_quality.py
+            elyra/node-file-name: insurance-claim-processing/lab-materials/03/06/test_response_quality.py
             elyra/pipeline-source: sanity_check.pipeline
             pipelines.kubeflow.org/task_display_name: test_response_quality
             pipelines.kubeflow.org/component_spec_digest: '{"name": "Run a file",
-              "outputs": [], "version": "Run a file@sha256=7e955de34869fba374b1d87fd591922910d2f2490d11e7fdd1312ae0749a0742"}'
+              "outputs": [], "version": "Run a file@sha256=5358a652664edc3aa96dcbf0002719b8c6b6b4dfa176a48c3c3786c22766358e"}'
     - name: run-a-file-2
       taskSpec:
         steps:
@@ -117,7 +102,7 @@ spec:
             sh -c "mkdir -p ./jupyter-work-dir && cd ./jupyter-work-dir"
             sh -c "echo 'Downloading file:///opt/app-root/bin/utils/bootstrapper.py' && curl --fail -H 'Cache-Control: no-cache' -L file:///opt/app-root/bin/utils/bootstrapper.py --output bootstrapper.py"
             sh -c "echo 'Downloading file:///opt/app-root/bin/utils/requirements-elyra.txt' && curl --fail -H 'Cache-Control: no-cache' -L file:///opt/app-root/bin/utils/requirements-elyra.txt --output requirements-elyra.txt"
-            sh -c "python3 -m pip install  packaging && python3 -m pip freeze > requirements-current.txt && python3 bootstrapper.py --pipeline-name 'sanity_check' --cos-endpoint 'https://minio-api-robert-serving-test.apps.rhods-internal.61tk.p1.openshiftapps.com' --cos-bucket 'pipeline-bucket' --cos-directory 'sanity_check-1215145442' --cos-dependencies-archive 'test_responsetime-c50bb14f-a036-4af1-b5dc-21e48eb80f7f.tar.gz' --file 'insurance-claim-processing/lab-materials/03/test_responsetime.py' --outputs 'responsetime_result.json' "
+            sh -c "python3 -m pip install  packaging && python3 -m pip freeze > requirements-current.txt && python3 bootstrapper.py --pipeline-name 'sanity_check' --cos-endpoint 'https://minio-api-robert-serving-test.apps.rhods-internal.61tk.p1.openshiftapps.com' --cos-bucket 'pipeline-bucket' --cos-directory 'sanity_check-0109084010' --cos-dependencies-archive 'test_responsetime-c50bb14f-a036-4af1-b5dc-21e48eb80f7f.tar.gz' --file 'insurance-claim-processing/lab-materials/03/06/test_responsetime.py' --outputs 'responsetime_result.json' "
           command:
           - sh
           - -c
@@ -142,21 +127,6 @@ spec:
             valueFrom:
               fieldRef:
                 fieldPath: metadata.annotations['pipelines.kubeflow.org/run_name']
-          - name: LLM_ENDPOINT
-            valueFrom:
-              secretKeyRef:
-                key: LLM_ENDPOINT
-                name: llm-info
-          - name: MODEL_SHA
-            valueFrom:
-              secretKeyRef:
-                key: MODEL_SHA
-                name: llm-info
-          - name: SERVING_SHA
-            valueFrom:
-              secretKeyRef:
-                key: SERVING_SHA
-                name: llm-info
           image: quay.io/modh/runtime-images@sha256:7dd23e58291cad7a0ab4a8e04bda06492f2c027eb33b226358380db58dcdd60b
         stepTemplate:
           volumeMounts:
@@ -174,11 +144,11 @@ spec:
             elyra/node-name: test_responsetime
             pipelines.kubeflow.org/cache_enabled: "true"
           annotations:
-            elyra/node-file-name: insurance-claim-processing/lab-materials/03/test_responsetime.py
+            elyra/node-file-name: insurance-claim-processing/lab-materials/03/06/test_responsetime.py
             elyra/pipeline-source: sanity_check.pipeline
             pipelines.kubeflow.org/task_display_name: test_responsetime
             pipelines.kubeflow.org/component_spec_digest: '{"name": "Run a file",
-              "outputs": [], "version": "Run a file@sha256=afa090f686bb464016ab6640a960b8e31b88c415166d799c2c734f6e9ba58f6a"}'
+              "outputs": [], "version": "Run a file@sha256=3e58a6283faf2610ead4310f242aa11d207880bb328d65d9b80064eac003274c"}'
     - name: run-a-file-3
       taskSpec:
         steps:
@@ -188,7 +158,7 @@ spec:
             sh -c "mkdir -p ./jupyter-work-dir && cd ./jupyter-work-dir"
             sh -c "echo 'Downloading file:///opt/app-root/bin/utils/bootstrapper.py' && curl --fail -H 'Cache-Control: no-cache' -L file:///opt/app-root/bin/utils/bootstrapper.py --output bootstrapper.py"
             sh -c "echo 'Downloading file:///opt/app-root/bin/utils/requirements-elyra.txt' && curl --fail -H 'Cache-Control: no-cache' -L file:///opt/app-root/bin/utils/requirements-elyra.txt --output requirements-elyra.txt"
-            sh -c "python3 -m pip install  packaging && python3 -m pip freeze > requirements-current.txt && python3 bootstrapper.py --pipeline-name 'sanity_check' --cos-endpoint 'https://minio-api-robert-serving-test.apps.rhods-internal.61tk.p1.openshiftapps.com' --cos-bucket 'pipeline-bucket' --cos-directory 'sanity_check-1215145442' --cos-dependencies-archive 'test_security-6b595dc7-afb8-46bb-bf52-7cd695ddafb8.tar.gz' --file 'insurance-claim-processing/lab-materials/03/test_security.py' --outputs 'security_result.json' "
+            sh -c "python3 -m pip install  packaging && python3 -m pip freeze > requirements-current.txt && python3 bootstrapper.py --pipeline-name 'sanity_check' --cos-endpoint 'https://minio-api-robert-serving-test.apps.rhods-internal.61tk.p1.openshiftapps.com' --cos-bucket 'pipeline-bucket' --cos-directory 'sanity_check-0109084010' --cos-dependencies-archive 'test_security-6b595dc7-afb8-46bb-bf52-7cd695ddafb8.tar.gz' --file 'insurance-claim-processing/lab-materials/03/06/test_security.py' --outputs 'security_result.json' "
           command:
           - sh
           - -c
@@ -213,21 +183,6 @@ spec:
             valueFrom:
               fieldRef:
                 fieldPath: metadata.annotations['pipelines.kubeflow.org/run_name']
-          - name: LLM_ENDPOINT
-            valueFrom:
-              secretKeyRef:
-                key: LLM_ENDPOINT
-                name: llm-info
-          - name: MODEL_SHA
-            valueFrom:
-              secretKeyRef:
-                key: MODEL_SHA
-                name: llm-info
-          - name: SERVING_SHA
-            valueFrom:
-              secretKeyRef:
-                key: SERVING_SHA
-                name: llm-info
           image: quay.io/modh/runtime-images@sha256:7dd23e58291cad7a0ab4a8e04bda06492f2c027eb33b226358380db58dcdd60b
         stepTemplate:
           volumeMounts:
@@ -245,11 +200,11 @@ spec:
             elyra/node-name: test_security
             pipelines.kubeflow.org/cache_enabled: "true"
           annotations:
-            elyra/node-file-name: insurance-claim-processing/lab-materials/03/test_security.py
+            elyra/node-file-name: insurance-claim-processing/lab-materials/03/06/test_security.py
             elyra/pipeline-source: sanity_check.pipeline
             pipelines.kubeflow.org/task_display_name: test_security
             pipelines.kubeflow.org/component_spec_digest: '{"name": "Run a file",
-              "outputs": [], "version": "Run a file@sha256=a523a1dc72737c6cfaf07342df759a61f68c9b45bf34ab33d3f20cb496ae8083"}'
+              "outputs": [], "version": "Run a file@sha256=51651082dff4a766733eb7e70c27fa7c33370a3c14f3af453c03a463d0a64dac"}'
     - name: run-a-file-4
       taskSpec:
         steps:
@@ -259,7 +214,7 @@ spec:
             sh -c "mkdir -p ./jupyter-work-dir && cd ./jupyter-work-dir"
             sh -c "echo 'Downloading file:///opt/app-root/bin/utils/bootstrapper.py' && curl --fail -H 'Cache-Control: no-cache' -L file:///opt/app-root/bin/utils/bootstrapper.py --output bootstrapper.py"
             sh -c "echo 'Downloading file:///opt/app-root/bin/utils/requirements-elyra.txt' && curl --fail -H 'Cache-Control: no-cache' -L file:///opt/app-root/bin/utils/requirements-elyra.txt --output requirements-elyra.txt"
-            sh -c "python3 -m pip install  packaging && python3 -m pip freeze > requirements-current.txt && python3 bootstrapper.py --pipeline-name 'sanity_check' --cos-endpoint 'https://minio-api-robert-serving-test.apps.rhods-internal.61tk.p1.openshiftapps.com' --cos-bucket 'pipeline-bucket' --cos-directory 'sanity_check-1215145442' --cos-dependencies-archive 'summarize_results-6b99ceae-d124-4758-904a-03e1a49fe56d.tar.gz' --file 'insurance-claim-processing/lab-materials/03/summarize_results.py' --inputs 'security_result.json;responsetime_result.json;quality_result.json' --outputs 'results.json' "
+            sh -c "python3 -m pip install  packaging && python3 -m pip freeze > requirements-current.txt && python3 bootstrapper.py --pipeline-name 'sanity_check' --cos-endpoint 'https://minio-api-robert-serving-test.apps.rhods-internal.61tk.p1.openshiftapps.com' --cos-bucket 'pipeline-bucket' --cos-directory 'sanity_check-0109084010' --cos-dependencies-archive 'summarize_results-6b99ceae-d124-4758-904a-03e1a49fe56d.tar.gz' --file 'insurance-claim-processing/lab-materials/03/06/summarize_results.py' --inputs 'responsetime_result.json;security_result.json;quality_result.json' --outputs 'results.json' "
           command:
           - sh
           - -c
@@ -284,21 +239,6 @@ spec:
             valueFrom:
               fieldRef:
                 fieldPath: metadata.annotations['pipelines.kubeflow.org/run_name']
-          - name: LLM_ENDPOINT
-            valueFrom:
-              secretKeyRef:
-                key: LLM_ENDPOINT
-                name: llm-info
-          - name: MODEL_SHA
-            valueFrom:
-              secretKeyRef:
-                key: MODEL_SHA
-                name: llm-info
-          - name: SERVING_SHA
-            valueFrom:
-              secretKeyRef:
-                key: SERVING_SHA
-                name: llm-info
           image: quay.io/modh/runtime-images@sha256:7dd23e58291cad7a0ab4a8e04bda06492f2c027eb33b226358380db58dcdd60b
         stepTemplate:
           volumeMounts:
@@ -316,11 +256,11 @@ spec:
             elyra/node-name: summarize_results
             pipelines.kubeflow.org/cache_enabled: "true"
           annotations:
-            elyra/node-file-name: insurance-claim-processing/lab-materials/03/summarize_results.py
+            elyra/node-file-name: insurance-claim-processing/lab-materials/03/06/summarize_results.py
             elyra/pipeline-source: sanity_check.pipeline
             pipelines.kubeflow.org/task_display_name: summarize_results
             pipelines.kubeflow.org/component_spec_digest: '{"name": "Run a file",
-              "outputs": [], "version": "Run a file@sha256=9e2cfcee5f1d1f9eb84f47713866930314f31eb4ea3e7f564394dce005843176"}'
+              "outputs": [], "version": "Run a file@sha256=bfe6fa070a3ebd821fe5801ef8beb2f710d88a1e58380eb85b169a1b44a5b753"}'
       runAfter:
       - run-a-file
       - run-a-file-2

diff --git a/lab-materials/05/.pipeline-envs b/lab-materials/05/.pipeline-envs
@@ -0,0 +1,10 @@
+LLM_ENDPOINT=http://llm.ic-shared-llm.svc.cluster.local:3000
+POSTGRES_HOST=claimdb.ic-shared-db.svc.cluster.local
+POSTGRES_DB=claimdb
+POSTGRES_USER=claimdb
+POSTGRES_PASSWORD=claimdb
+POSTGRES_PORT=5432
+DB_S3_ENDPOINT_URL=http://minio.ic-shared-minio.svc.cluster.local:9000
+DB_AWS_ACCESS_KEY_ID=JGH5CgQbxkzTAo5JVWCC
+DB_AWS_SECRET_ACCESS_KEY=M9Eb6iILjxTvqh3pGMuD7wIO7kCtiIkQioVX24Xd
+IMAGES_BUCKET=claim-images
diff --git a/lab-materials/05/detect_objects.py b/lab-materials/05/detect_objects.py
@@ -19,6 +19,7 @@
     **dotenv_values(".env"),  # load shared development variables
     **dotenv_values(".env.secret"),  # load sensitive variables
     **os.environ,  # override loaded values with environment variables
+    **dotenv_values(".pipeline-envs"), # load pipeline-specific vars
 }
 
 db = db_utils.Database(config, logger)

diff --git a/lab-materials/05/get_accident_time.py b/lab-materials/05/get_accident_time.py
@@ -16,6 +16,7 @@
     **dotenv_values(".env"),  # load shared development variables
     **dotenv_values(".env.secret"),  # load sensitive variables
     **os.environ,  # override loaded values with environment variables
+    **dotenv_values(".pipeline-envs"), # load pipeline-specific vars
 }
 
 db = db_utils.Database(config, logger)

diff --git a/lab-materials/05/get_claims.py b/lab-materials/05/get_claims.py
@@ -15,6 +15,7 @@
     **dotenv_values(".env"),  # load shared development variables
     **dotenv_values(".env.secret"),  # load sensitive variables
     **os.environ,  # override loaded values with environment variables
+    **dotenv_values(".pipeline-envs"), # load pipeline-specific vars
 }
 
 db = db_utils.Database(config, logger)

diff --git a/lab-materials/05/get_location.py b/lab-materials/05/get_location.py
@@ -16,6 +16,7 @@
     **dotenv_values(".env"),  # load shared development variables
     **dotenv_values(".env.secret"),  # load sensitive variables
     **os.environ,  # override loaded values with environment variables
+    **dotenv_values(".pipeline-envs"), # load pipeline-specific vars
 }
 
 db = db_utils.Database(config, logger)

diff --git a/lab-materials/05/get_sentiment.py b/lab-materials/05/get_sentiment.py
@@ -16,6 +16,7 @@
     **dotenv_values(".env"),  # load shared development variables
     **dotenv_values(".env.secret"),  # load sensitive variables
     **os.environ,  # override loaded values with environment variables
+    **dotenv_values(".pipeline-envs"), # load pipeline-specific vars
 }
 
 db = db_utils.Database(config, logger)

diff --git a/lab-materials/05/llm_usage.py b/lab-materials/05/llm_usage.py
@@ -7,7 +7,7 @@
 from langchain.evaluation import load_evaluator
 from langchain.embeddings import HuggingFaceEmbeddings
 
-INFERENCE_SERVER_URL = os.environ.get("LLM_ENDPOINT")
+INFERENCE_SERVER_URL = "http://llm.ic-shared-llm.svc.cluster.local:3000"
 MAX_NEW_TOKENS = 512
 TOP_K = 10
 TOP_P = 0.95