From b387467a197330a732f28886167aaac06654bebc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Hern=C3=A1ndez?= <23639005+israel-hdez@users.noreply.github.com> Date: Sun, 3 Nov 2024 17:04:02 -0600 Subject: [PATCH 1/2] Trust OpenShift service CA in kserve-router MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This changes kserve-controller to mount the OpenShift Service CA bundle into kserve-router container and a configures it to trust the bundle. This affects InferenceGraph deployed in Serverless mode. With these changes, InferenceGraphs will work correctly when deployed without an Istio sidecar. These changes are needed because in ODH the InferenceServices are secured with TLS. The internal endpoints (which are the ones InferenceGraph uses) are using OpenShift service serving certificates. Related to: https://issues.redhat.com/browse/RHOAIENG-13448 Signed-off-by: Edgar Hernández <23639005+israel-hdez@users.noreply.github.com> --- pkg/constants/constants.go | 5 ++ .../inferencegraph/controller_test.go | 66 +++++++++++++++++++ .../inferencegraph/knative_reconciler.go | 34 ++++++++-- 3 files changed, 100 insertions(+), 5 deletions(-) diff --git a/pkg/constants/constants.go b/pkg/constants/constants.go index 97543756fa7..0992bdff19c 100644 --- a/pkg/constants/constants.go +++ b/pkg/constants/constants.go @@ -496,6 +496,11 @@ var ( MultiNodeHead = "head" ) +// OpenShift constants +const ( + OpenShiftServiceCaConfigMapName = "openshift-service-ca.crt" +) + // GetRawServiceLabel generate native service label func GetRawServiceLabel(service string) string { return "isvc." + service diff --git a/pkg/controller/v1alpha1/inferencegraph/controller_test.go b/pkg/controller/v1alpha1/inferencegraph/controller_test.go index 5ee18d38dbd..a2634dda740 100644 --- a/pkg/controller/v1alpha1/inferencegraph/controller_test.go +++ b/pkg/controller/v1alpha1/inferencegraph/controller_test.go @@ -147,6 +147,10 @@ var _ = Describe("Inference Graph controller test", func() { { Image: "kserve/router:v0.10.0", Env: []v1.EnvVar{ + { + Name: "SSL_CERT_FILE", + Value: "/etc/odh/openshift-service-ca-bundle/service-ca.crt", + }, { Name: "PROPAGATE_HEADERS", Value: "Authorization,Intuit_tid", @@ -175,9 +179,27 @@ var _ = Describe("Inference Graph controller test", func() { Drop: []v1.Capability{v1.Capability("ALL")}, }, }, + VolumeMounts: []v1.VolumeMount{ + { + Name: "openshift-service-ca-bundle", + MountPath: "/etc/odh/openshift-service-ca-bundle", + }, + }, }, }, AutomountServiceAccountToken: proto.Bool(false), + Volumes: []v1.Volume{ + { + Name: "openshift-service-ca-bundle", + VolumeSource: v1.VolumeSource{ + ConfigMap: &v1.ConfigMapVolumeSource{ + LocalObjectReference: v1.LocalObjectReference{ + Name: constants.OpenShiftServiceCaConfigMapName, + }, + }, + }, + }, + }, }, }, }, @@ -283,6 +305,10 @@ var _ = Describe("Inference Graph controller test", func() { { Image: "kserve/router:v0.10.0", Env: []v1.EnvVar{ + { + Name: "SSL_CERT_FILE", + Value: "/etc/odh/openshift-service-ca-bundle/service-ca.crt", + }, { Name: "PROPAGATE_HEADERS", Value: "Authorization,Intuit_tid", @@ -311,9 +337,27 @@ var _ = Describe("Inference Graph controller test", func() { Drop: []v1.Capability{v1.Capability("ALL")}, }, }, + VolumeMounts: []v1.VolumeMount{ + { + Name: "openshift-service-ca-bundle", + MountPath: "/etc/odh/openshift-service-ca-bundle", + }, + }, }, }, AutomountServiceAccountToken: proto.Bool(false), + Volumes: []v1.Volume{ + { + Name: "openshift-service-ca-bundle", + VolumeSource: v1.VolumeSource{ + ConfigMap: &v1.ConfigMapVolumeSource{ + LocalObjectReference: v1.LocalObjectReference{ + Name: constants.OpenShiftServiceCaConfigMapName, + }, + }, + }, + }, + }, }, }, }, @@ -433,6 +477,10 @@ var _ = Describe("Inference Graph controller test", func() { { Image: "kserve/router:v0.10.0", Env: []v1.EnvVar{ + { + Name: "SSL_CERT_FILE", + Value: "/etc/odh/openshift-service-ca-bundle/service-ca.crt", + }, { Name: "PROPAGATE_HEADERS", Value: "Authorization,Intuit_tid", @@ -461,6 +509,12 @@ var _ = Describe("Inference Graph controller test", func() { Drop: []v1.Capability{v1.Capability("ALL")}, }, }, + VolumeMounts: []v1.VolumeMount{ + { + Name: "openshift-service-ca-bundle", + MountPath: "/etc/odh/openshift-service-ca-bundle", + }, + }, }, }, Affinity: &v1.Affinity{ @@ -487,6 +541,18 @@ var _ = Describe("Inference Graph controller test", func() { }, }, AutomountServiceAccountToken: proto.Bool(false), + Volumes: []v1.Volume{ + { + Name: "openshift-service-ca-bundle", + VolumeSource: v1.VolumeSource{ + ConfigMap: &v1.ConfigMapVolumeSource{ + LocalObjectReference: v1.LocalObjectReference{ + Name: constants.OpenShiftServiceCaConfigMapName, + }, + }, + }, + }, + }, }, }, }, diff --git a/pkg/controller/v1alpha1/inferencegraph/knative_reconciler.go b/pkg/controller/v1alpha1/inferencegraph/knative_reconciler.go index b61790bbd1b..2caf8dac1a4 100644 --- a/pkg/controller/v1alpha1/inferencegraph/knative_reconciler.go +++ b/pkg/controller/v1alpha1/inferencegraph/knative_reconciler.go @@ -203,6 +203,30 @@ func createKnativeService(componentMeta metav1.ObjectMeta, graph *v1alpha1api.In Drop: []v1.Capability{v1.Capability("ALL")}, }, }, + VolumeMounts: []v1.VolumeMount{ + { + Name: "openshift-service-ca-bundle", + MountPath: "/etc/odh/openshift-service-ca-bundle", + }, + }, + Env: []v1.EnvVar{ + { + Name: "SSL_CERT_FILE", + Value: "/etc/odh/openshift-service-ca-bundle/service-ca.crt", + }, + }, + }, + }, + Volumes: []v1.Volume{ + { + Name: "openshift-service-ca-bundle", + VolumeSource: v1.VolumeSource{ + ConfigMap: &v1.ConfigMapVolumeSource{ + LocalObjectReference: v1.LocalObjectReference{ + Name: constants.OpenShiftServiceCaConfigMapName, + }, + }, + }, }, }, Affinity: graph.Spec.Affinity, @@ -217,12 +241,12 @@ func createKnativeService(componentMeta metav1.ObjectMeta, graph *v1alpha1api.In // Only adding this env variable "PROPAGATE_HEADERS" if router's headers config has the key "propagate" value, exists := config.Headers["propagate"] if exists { - service.Spec.ConfigurationSpec.Template.Spec.PodSpec.Containers[0].Env = []v1.EnvVar{ - { - Name: constants.RouterHeadersPropagateEnvVar, - Value: strings.Join(value, ","), - }, + propagateEnv := v1.EnvVar{ + Name: constants.RouterHeadersPropagateEnvVar, + Value: strings.Join(value, ","), } + + service.Spec.ConfigurationSpec.Template.Spec.PodSpec.Containers[0].Env = append(service.Spec.ConfigurationSpec.Template.Spec.PodSpec.Containers[0].Env, propagateEnv) } return service } From 1ce708da5369346571bc278a0f8b478018b127b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Hern=C3=A1ndez?= <23639005+israel-hdez@users.noreply.github.com> Date: Sun, 3 Nov 2024 15:21:00 -0600 Subject: [PATCH 2/2] Inference Graph: use plain text HTTP when part of Istio Mesh MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In topologies using Istio mesh, applications can use plain-text HTTP to send traffic to other mesh-member workloads. Handling of TLS is delegated to Istio. Thus, when KServe workloads have the Istio sidecar (e.g. by using auto-injection), the Inference Graph router should send its traffic without TLS even if the schema of the service URL is specified as HTTPS. Istio would originate TLS when needed, and the double TLS is prevented (see https://istio.io/latest/docs/ops/common-problems/network-issues/#double-tls). These changes implement a detection of the Istio sidecar by querying a well known port that the sidecar is using. If the sidecar is found, inference requests are sent using plain-text HTTP. Signed-off-by: Edgar Hernández <23639005+israel-hdez@users.noreply.github.com> --- cmd/router/main.go | 75 +++++++++++++++++++ .../v1alpha1/inferencegraph/raw_ig.go | 2 +- 2 files changed, 76 insertions(+), 1 deletion(-) diff --git a/cmd/router/main.go b/cmd/router/main.go index 01f5e7a4604..dc50d061ff3 100644 --- a/cmd/router/main.go +++ b/cmd/router/main.go @@ -23,10 +23,12 @@ import ( "fmt" "io" "net/http" + "net/url" "os" "regexp" "strconv" "strings" + "syscall" "time" "github.com/kserve/kserve/pkg/constants" @@ -45,9 +47,82 @@ import ( var log = logf.Log.WithName("InferenceGraphRouter") +// _isInMesh is an auxiliary global variable for isInIstioMesh function. +var _isInMesh *bool + +// isInIstioMesh checks if the InferenceGraph pod belongs to the mesh by +// checking the presence of the sidecar. It is known that when the sidecar +// is present, Envoy will be using port 15000 with standard HTTP. Thus, the +// presence of the sidecar is assumed if this port responds with an HTTP 200 status +// when doing a "GET /" request. +// +// The result of the check is cached in the _isInMesh global variable. Since a +// pod cannot be modified, a single check is enough for the whole life of the pod. +// The check cannot be done at start-up, because there is the possibility of +// false negatives, since there is no guarantee that the Istio sidecar has already +// started. So, the isInIstioMesh func should be used after the first inference +// request is received when it is guaranteed that the Istio sidecar is in ready state. +// +// Reference: +// - https://istio.io/latest/docs/ops/deployment/application-requirements/#ports-used-by-istio) +func isInIstioMesh() (bool, error) { + if _isInMesh != nil { + return *_isInMesh, nil + } + + isInMesh := false + client := http.Client{ + Timeout: time.Second * 3, + } + response, err := client.Get("http://localhost:15000") + if err == nil { + if response.StatusCode == http.StatusOK { + isInMesh = true + } + } else if errors.Is(err, syscall.ECONNREFUSED) { + // Assume no Istio sidecar. Thus, this pod is not + // part of the mesh. + err = nil + } + + if response != nil && response.Body != nil { + err = response.Body.Close() + } + + _isInMesh = &isInMesh + return *_isInMesh, err +} + func callService(serviceUrl string, input []byte, headers http.Header) ([]byte, int, error) { defer timeTrack(time.Now(), "step", serviceUrl) log.Info("Entering callService", "url", serviceUrl) + + parsedServiceUrl, parseServiceUrlErr := url.Parse(serviceUrl) + if parseServiceUrlErr != nil { + return nil, 500, parseServiceUrlErr + } + if parsedServiceUrl.Scheme == "https" { + if isInMesh, isInMeshErr := isInIstioMesh(); isInMeshErr != nil { + return nil, 500, isInMeshErr + } else if isInMesh { + // In this branch, it has been resolved that the Inference Graph is + // part of the Istio mesh. In this case, even if the target service + // is using HTTPS, it is better to use plain-text HTTP: + // * If the target service is also part of the mesh, Istio will take + // care of properly applying TLS policies (e.g. mutual TLS). + // * If the target service is _not_ part of the mesh, it still is better + // to let Istio manage TLS by configuring the sidecar to do TLS + // origination and prevent double TLS (see: https://istio.io/latest/docs/ops/common-problems/network-issues/#double-tls) + // + // If the Inference Graph is not part of the mesh, the indicated + // schema is used. + parsedServiceUrl.Scheme = "http" + serviceUrl = parsedServiceUrl.String() + + log.Info("Using plain-text schema to let Istio manage TLS termination", "url", serviceUrl) + } + } + req, err := http.NewRequest("POST", serviceUrl, bytes.NewBuffer(input)) if err != nil { log.Error(err, "An error occurred while preparing request object with serviceUrl.", "serviceUrl", serviceUrl) diff --git a/pkg/controller/v1alpha1/inferencegraph/raw_ig.go b/pkg/controller/v1alpha1/inferencegraph/raw_ig.go index 49321f0e536..f29248f76f5 100644 --- a/pkg/controller/v1alpha1/inferencegraph/raw_ig.go +++ b/pkg/controller/v1alpha1/inferencegraph/raw_ig.go @@ -148,7 +148,7 @@ func handleInferenceGraphRawDeployment(cl client.Client, clientset kubernetes.In reconciler, err := raw.NewRawKubeReconciler(cl, clientset, scheme, objectMeta, metav1.ObjectMeta{}, &componentExtSpec, desiredSvc, nil) if err != nil { - return nil, reconciler.URL, errors.Wrapf(err, "fails to create NewRawKubeReconciler for inference graph") + return nil, nil, errors.Wrapf(err, "fails to create NewRawKubeReconciler for inference graph") } // set Deployment Controller for _, deployments := range reconciler.Deployment.DeploymentList {