diff --git a/charts/mlrun-ce/Chart.yaml b/charts/mlrun-ce/Chart.yaml index 90a197d0..665f75fa 100644 --- a/charts/mlrun-ce/Chart.yaml +++ b/charts/mlrun-ce/Chart.yaml @@ -1,5 +1,5 @@ apiVersion: v1 -version: 0.6.0-rc2 +version: 0.6.0-rc3 name: mlrun-ce description: MLRUn Open Source Stack home: https://iguazio.com diff --git a/charts/mlrun-ce/templates/config/mlrun-env-configmap.yaml b/charts/mlrun-ce/templates/config/mlrun-env-configmap.yaml index c75101fa..23b17a2b 100644 --- a/charts/mlrun-ce/templates/config/mlrun-env-configmap.yaml +++ b/charts/mlrun-ce/templates/config/mlrun-env-configmap.yaml @@ -17,3 +17,4 @@ data: MLRUN_CE__VERSION: {{ .Chart.Version }} MLRUN_DEFAULT_TENSORBOARD_LOGS_PATH: /home/jovyan/data/tensorboard/{{ `{{project}} `}} MLRUN_FEATURE_STORE__DEFAULT_TARGETS: parquet + MLRUN_MODEL_ENDPOINT_MONITORING__STORE_TYPE: sql diff --git a/charts/mlrun-ce/templates/config/model-monitoring-details.yml b/charts/mlrun-ce/templates/config/model-monitoring-details.yml new file mode 100644 index 00000000..93db4e71 --- /dev/null +++ b/charts/mlrun-ce/templates/config/model-monitoring-details.yml @@ -0,0 +1,985 @@ +apiVersion: v1 +data: + model-monitoring-details.json: | + { + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 113, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [], + "title": "Model Monitoring - Overview", + "type": "link", + "url": "d/g0M4uh0Mz/model-monitoring-overview" + } + ], + "liveNow": false, + "panels": [ + { + "datasource": "iguazio", + "description": "", + "fieldConfig": { + "defaults": { + "custom": { + "align": "auto", + "displayMode": "auto", + "filterable": false, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "First Request" + }, + "properties": [ + { + "id": "unit", + "value": "dateTimeFromNow" + }, + { + "id": "custom.align", + "value": "center" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Last Request" + }, + "properties": [ + { + "id": "unit", + "value": "dateTimeFromNow" + }, + { + "id": "custom.align", + "value": "center" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Endpoint ID" + }, + "properties": [ + { + "id": "custom.align", + "value": "center" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Model" + }, + "properties": [ + { + "id": "custom.align", + "value": "center" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Function URI" + }, + "properties": [ + { + "id": "custom.align", + "value": "center" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Model Class" + }, + "properties": [ + { + "id": "custom.align", + "value": "center" + }, + { + "id": "custom.width" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Predictions/s (5 minute avg)" + }, + "properties": [ + { + "id": "custom.align", + "value": "center" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Average Latency (1 hour)" + }, + "properties": [ + { + "id": "custom.align", + "value": "center" + }, + { + "id": "unit", + "value": "µs" + } + ] + } + ] + }, + "gridPos": { + "h": 3, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 23, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "9.3.6", + "targets": [ + { + "datasource": "iguazio", + "editorMode": "code", + "format": "table", + "rawQuery": true, + "rawSql": "SELECT uid,model,function_uri,model_class,metrics->\"$.generic.predictions_per_second\",metrics->\"$.generic.latency_avg_1h\",first_request,last_request FROM mlrun.model_endpoints where project='$PROJECT' and uid='$MODELENDPOINT'", + "refId": "A", + "sql": { + "columns": [ + { + "parameters": [], + "type": "function" + } + ], + "groupBy": [ + { + "property": { + "type": "string" + }, + "type": "groupBy" + } + ], + "limit": 50 + } + } + ], + "transformations": [ + { + "id": "organize", + "options": { + "indexByName": { + "first_request": 5, + "last_request": 6, + "function_uri": 2, + "metrics->\"$.generic.latency_avg_1h\"": 7, + "metrics->\"$.generic.predictions_per_second\"": 8, + "model": 3, + "model_class": 4, + "uid": 1 + }, + "renameByName": { + "first_request": "First Request", + "last_request": "Last Request", + "function_uri": "Function URI", + "metrics->\"$.generic.latency_avg_1h\"": "Average Latency (1 hour)", + "metrics->\"$.generic.predictions_per_second\"": "Predictions/s (5 minute avg)", + "model": "Model", + "model_class": "Model Class", + "uid": "Endpoint ID" + } + } + } + ], + "transparent": true, + "type": "table" + }, + { + "datasource": "iguazio", + "description": "", + "fieldConfig": { + "defaults": { + "custom": { + "align": "center", + "displayMode": "auto", + "filterable": false, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "tvd_sum" + }, + "properties": [ + { + "id": "displayName", + "value": "TVD (sum)" + }, + { + "id": "custom.align", + "value": "center" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "tvd_mean" + }, + "properties": [ + { + "id": "displayName", + "value": "TVD (mean)" + }, + { + "id": "custom.align", + "value": "center" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "hellinger_sum" + }, + "properties": [ + { + "id": "displayName", + "value": "Hellinger (sum)" + }, + { + "id": "custom.align", + "value": "center" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "hellinger_mean" + }, + "properties": [ + { + "id": "displayName", + "value": "Hellinger (mean)" + }, + { + "id": "custom.align", + "value": "center" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "kld_sum" + }, + "properties": [ + { + "id": "displayName", + "value": "KLD (sum)" + }, + { + "id": "custom.align", + "value": "center" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "kld_mean" + }, + "properties": [ + { + "id": "displayName", + "value": "KLD (mean)" + }, + { + "id": "custom.align", + "value": "center" + } + ] + } + ] + }, + "gridPos": { + "h": 3, + "w": 24, + "x": 0, + "y": 3 + }, + "id": 24, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [ + { + "desc": false, + "displayName": "name" + } + ] + }, + "pluginVersion": "9.3.6", + "targets": [ + { + "datasource": "iguazio", + "editorMode": "code", + "format": "table", + "rawQuery": true, + "rawSql": "SELECT drift_measures->\"$.tvd_sum\" as \"TVD (sum)\", drift_measures->\"$.tvd_mean\" as \"TVD (mean)\", drift_measures->\"$.hellinger_sum\" as \"Hellinger (sum)\", drift_measures->\"$.hellinger_mean\" as \"Hellinger (mean)\", drift_measures->\"$.kld_sum\" as \"KLD (sum)\", drift_measures->\"$.kld_mean\" as \"KLD (mean)\" FROM mlrun.model_endpoints where project='$PROJECT' and uid='$MODELENDPOINT'", + "refId": "A", + "sql": { + "columns": [ + { + "parameters": [], + "type": "function" + } + ], + "groupBy": [ + { + "property": { + "type": "string" + }, + "type": "groupBy" + } + ], + "limit": 50 + } + } + ], + "title": "Overall Drift Analysis", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": {}, + "renameByName": { + "endpoint_id": "Endpoint ID", + "first_request": "First Request", + "function": "Function", + "last_request": "Last Request", + "latency_avg_1s": "Average Latency", + "model": "Model", + "model_class": "Model Class", + "predictions_per_second_count_1s": "Predictions/sec", + "tag": "Tag" + } + } + } + ], + "transparent": true, + "type": "table" + }, + { + "datasource": "iguazio", + "description": "Feature analysis of the latest batch", + "fieldConfig": { + "defaults": { + "custom": { + "align": "center", + "displayMode": "auto", + "filterable": false, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Expected Min" + }, + "properties": [ + { + "id": "noValue", + "value": "N/A" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Expected Mean" + }, + "properties": [ + { + "id": "noValue", + "value": "N/A" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Expected Max" + }, + "properties": [ + { + "id": "noValue", + "value": "N/A" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "tvd" + }, + "properties": [ + { + "id": "displayName", + "value": "TVD" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "hellinger" + }, + "properties": [ + { + "id": "displayName", + "value": "Hellinger" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "kld" + }, + "properties": [ + { + "id": "displayName", + "value": "KLD" + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 6 + }, + "id": 14, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [ + { + "desc": false, + "displayName": "Feature" + } + ] + }, + "pluginVersion": "9.3.6", + "targets": [ + { + "dataset": "mlrun", + "datasource": "iguazio", + "editorMode": "code", + "format": "table", + "rawQuery": true, + "rawSql": "SELECT feature_stats FROM mlrun.model_endpoints where project='$PROJECT' and uid='$MODELENDPOINT'", + "refId": "A", + "sql": { + "columns": [ + { + "parameters": [ + { + "name": "feature_stats", + "type": "functionParameter" + } + ], + "type": "function" + } + ], + "groupBy": [ + { + "property": { + "type": "string" + }, + "type": "groupBy" + } + ], + "limit": 50 + }, + "table": "model_endpoints" + }, + { + "datasource": "iguazio", + "editorMode": "code", + "format": "table", + "hide": false, + "rawQuery": true, + "rawSql": "SELECT current_stats FROM mlrun.model_endpoints where project='$PROJECT' and uid='$MODELENDPOINT'", + "refId": "B", + "sql": { + "columns": [ + { + "parameters": [], + "type": "function" + } + ], + "groupBy": [ + { + "property": { + "type": "string" + }, + "type": "groupBy" + } + ], + "limit": 50 + } + }, + { + "datasource": "iguazio", + "editorMode": "code", + "format": "table", + "hide": false, + "rawQuery": true, + "rawSql": "SELECT drift_measures FROM mlrun.model_endpoints where project='$PROJECT' and uid='$MODELENDPOINT'", + "refId": "C", + "sql": { + "columns": [ + { + "parameters": [], + "type": "function" + } + ], + "groupBy": [ + { + "property": { + "type": "string" + }, + "type": "groupBy" + } + ], + "limit": 50 + } + } + ], + "title": "Features Analysis", + "transformations": [ + { + "id": "extractFields", + "options": { + "format": "json", + "source": "feature_stats" + } + }, + { + "id": "extractFields", + "options": { + "format": "json", + "source": "current_stats" + } + }, + { + "id": "extractFields", + "options": { + "format": "json", + "source": "drift_measures" + } + }, + { + "id": "merge", + "options": {} + }, + { + "id": "reduce", + "options": { + "reducers": [ + "allValues" + ] + } + }, + { + "id": "filterByValue", + "options": { + "filters": [ + { + "config": { + "id": "equal", + "options": { + "value": "timestamp" + } + }, + "fieldName": "Field" + }, + { + "config": { + "id": "equal", + "options": { + "value": "feature_stats" + } + }, + "fieldName": "Field" + }, + { + "config": { + "id": "equal", + "options": { + "value": "current_stats" + } + }, + "fieldName": "Field" + }, + { + "config": { + "id": "equal", + "options": { + "value": "tvd_sum" + } + }, + "fieldName": "Field" + }, + { + "config": { + "id": "equal", + "options": { + "value": "tvd_mean" + } + }, + "fieldName": "Field" + }, + { + "config": { + "id": "equal", + "options": { + "value": "hellinger_sum" + } + }, + "fieldName": "Field" + }, + { + "config": { + "id": "equal", + "options": { + "value": "hellinger_mean" + } + }, + "fieldName": "Field" + }, + { + "config": { + "id": "equal", + "options": { + "value": "kld_sum" + } + }, + "fieldName": "Field" + }, + { + "config": { + "id": "equal", + "options": { + "value": "kld_mean" + } + }, + "fieldName": "Field" + }, + { + "config": { + "id": "equal", + "options": { + "value": "drift_measures" + } + }, + "fieldName": "Field" + } + ], + "match": "any", + "type": "exclude" + } + }, + { + "id": "extractFields", + "options": { + "format": "json", + "source": "All values" + } + }, + { + "id": "filterByValue", + "options": { + "filters": [ + { + "config": { + "id": "isNull", + "options": {} + }, + "fieldName": "0" + } + ], + "match": "any", + "type": "exclude" + } + }, + { + "id": "filterFieldsByName", + "options": { + "include": { + "names": [ + "Field", + "0", + "1", + "2" + ] + } + } + }, + { + "id": "extractFields", + "options": { + "replace": false, + "source": "0" + } + }, + { + "id": "extractFields", + "options": { + "source": "1" + } + }, + { + "id": "extractFields", + "options": { + "source": "2" + } + }, + { + "id": "filterFieldsByName", + "options": { + "include": { + "names": [ + "Field", + "mean 1", + "min 1", + "max 1", + "mean 2", + "min 2", + "max 2", + "tvd", + "hellinger", + "kld" + ] + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "Field": 0, + "hellinger": 8, + "kld": 9, + "max 1": 5, + "max 2": 6, + "mean 1": 1, + "mean 2": 2, + "min 1": 3, + "min 2": 4, + "tvd": 7 + }, + "renameByName": { + "Field": "Feature", + "max 1": "Expected Max", + "max 2": "Actual Max", + "mean 1": "Expected Mean", + "mean 2": "Actual Mean", + "min 1": "Expected Min", + "min 2": "Actual Min" + } + } + } + ], + "transparent": true, + "type": "table" + } + ], + "refresh": false, + "schemaVersion": 37, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "default", + "value": "default" + }, + "datasource": "iguazio", + "definition": "SELECT DISTINCT project FROM mlrun.model_endpoints", + "hide": 0, + "includeAll": false, + "label": "Project", + "multi": false, + "name": "PROJECT", + "options": [], + "query": "SELECT DISTINCT project FROM mlrun.model_endpoints", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "isNone": true, + "selected": false, + "text": "None", + "value": "" + }, + "datasource": "iguazio", + "definition": "SELECT uid FROM mlrun.model_endpoints where project='$PROJECT';", + "hide": 0, + "includeAll": false, + "label": "Model Endpoint", + "multi": false, + "name": "MODELENDPOINT", + "options": [], + "query": "SELECT uid FROM mlrun.model_endpoints where project='$PROJECT';", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Model Monitoring - Details", + "uid": "AohIXhAMk", + "version": 6, + "weekStart": "" + } +kind: ConfigMap +metadata: + labels: + grafana_dashboard: "1" + name: model-monitoring-details \ No newline at end of file diff --git a/charts/mlrun-ce/templates/config/model-monitoring-overview.yml b/charts/mlrun-ce/templates/config/model-monitoring-overview.yml new file mode 100644 index 00000000..785d26d2 --- /dev/null +++ b/charts/mlrun-ce/templates/config/model-monitoring-overview.yml @@ -0,0 +1,780 @@ +apiVersion: v1 +data: + model-monitoring-overview.json: | + { + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [ + { + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [], + "targetBlank": false, + "title": "Model Monitoring - Details", + "type": "link", + "url": "d/AohIXhAMk/model-monitoring-details" + } + ], + "liveNow": false, + "panels": [ + { + "datasource": "iguazio", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 5, + "x": 0, + "y": 0 + }, + "id": 24, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "9.3.6", + "targets": [ + { + "dataset": "mlrun", + "datasource": "iguazio", + "editorMode": "code", + "format": "table", + "rawQuery": true, + "rawSql": "SELECT COUNT(uid) FROM mlrun.model_endpoints where project='$PROJECT'", + "refId": "A", + "sql": { + "columns": [ + { + "name": "COUNT", + "parameters": [ + { + "name": "uid", + "type": "functionParameter" + } + ], + "type": "function" + } + ], + "groupBy": [ + { + "property": { + "type": "string" + }, + "type": "groupBy" + } + ], + "limit": 50 + }, + "table": "model_endpoints" + } + ], + "title": "Endpoints", + "transformations": [], + "transparent": true, + "type": "stat" + }, + { + "datasource": "iguazio", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 5, + "x": 5, + "y": 0 + }, + "id": 25, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "/^metrics\\-\\>\"\\$\\.generic\\.predictions_per_second\"$/", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.3.6", + "targets": [ + { + "dataset": "mlrun", + "datasource": "iguazio", + "editorMode": "code", + "format": "table", + "hide": false, + "rawQuery": true, + "rawSql": "SELECT metrics->\"$.generic.predictions_per_second\" FROM mlrun.model_endpoints where project='$PROJECT'", + "refId": "A", + "sql": { + "columns": [ + { + "parameters": [ + { + "name": "metrics", + "type": "functionParameter" + } + ], + "type": "function" + } + ], + "groupBy": [ + { + "property": { + "type": "string" + }, + "type": "groupBy" + } + ], + "limit": 50 + }, + "table": "model_endpoints" + } + ], + "title": "Predictions/s (5 Minute Average)", + "transformations": [], + "transparent": true, + "type": "stat" + }, + { + "datasource": "iguazio", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "µs" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 5, + "x": 11, + "y": 0 + }, + "id": 10, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "/^metrics\\-\\>\"\\$\\.generic\\.latency_avg_1h\"$/", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.3.6", + "targets": [ + { + "datasource": "iguazio", + "editorMode": "code", + "format": "table", + "rawQuery": true, + "rawSql": "SELECT metrics->\"$.generic.latency_avg_1h\" FROM mlrun.model_endpoints where project='$PROJECT'", + "refId": "A", + "sql": { + "columns": [ + { + "parameters": [], + "type": "function" + } + ], + "groupBy": [ + { + "property": { + "type": "string" + }, + "type": "groupBy" + } + ], + "limit": 50 + } + } + ], + "title": "Average Latency (Last Hour)", + "transformations": [], + "transparent": true, + "type": "stat" + }, + { + "datasource": "iguazio", + "fieldConfig": { + "defaults": { + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 6, + "x": 16, + "y": 0 + }, + "id": 12, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.3.6", + "targets": [ + { + "dataset": "mlrun", + "datasource": "iguazio", + "editorMode": "code", + "format": "table", + "rawQuery": true, + "rawSql": "SELECT sum(error_count) FROM mlrun.model_endpoints where project='$PROJECT' ", + "refId": "A", + "sql": { + "columns": [ + { + "parameters": [ + { + "name": "error_count", + "type": "functionParameter" + } + ], + "type": "function" + } + ], + "groupBy": [ + { + "property": { + "type": "string" + }, + "type": "groupBy" + } + ], + "limit": 50 + }, + "table": "model_endpoints" + } + ], + "title": "Errors", + "transformations": [ + { + "id": "filterFieldsByName", + "options": { + "include": { + "names": [ + "error_count" + ] + } + } + }, + { + "id": "reduce", + "options": { + "reducers": [ + "sum" + ] + } + } + ], + "transparent": true, + "type": "stat" + }, + { + "datasource": "iguazio", + "description": "", + "fieldConfig": { + "defaults": { + "custom": { + "align": "center", + "displayMode": "auto", + "filterable": true, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Function" + }, + "properties": [ + { + "id": "custom.align", + "value": "center" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Model" + }, + "properties": [ + { + "id": "custom.align", + "value": "center" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Model Class" + }, + "properties": [ + { + "id": "custom.align", + "value": "center" + }, + { + "id": "noValue", + "value": "N/A" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "First Request" + }, + "properties": [ + { + "id": "unit", + "value": "dateTimeFromNow" + }, + { + "id": "custom.align", + "value": "center" + }, + { + "id": "noValue", + "value": "N/A" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Last Request" + }, + "properties": [ + { + "id": "custom.align", + "value": "center" + }, + { + "id": "unit", + "value": "dateTimeFromNow" + }, + { + "id": "noValue", + "value": "N/A" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Accuracy" + }, + "properties": [ + { + "id": "custom.align", + "value": "center" + }, + { + "id": "noValue", + "value": "N/A" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Error Count" + }, + "properties": [ + { + "id": "custom.align", + "value": "center" + }, + { + "id": "noValue", + "value": "N/A" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Drift Status" + }, + "properties": [ + { + "id": "custom.align", + "value": "center" + }, + { + "id": "noValue", + "value": "N/A" + }, + { + "id": "mappings", + "value": [ + { + "options": { + "DRIFT_DETECTED": { + "color": "red", + "index": 1, + "text": "2" + }, + "NO_DRIFT": { + "color": "green", + "index": 3, + "text": "0" + }, + "N\\A": { + "index": 0, + "text": "-1" + }, + "POSSIBLE_DRIFT": { + "color": "yellow", + "index": 2, + "text": "1" + } + }, + "type": "value" + } + ] + }, + { + "id": "custom.displayMode", + "value": "color-background" + }, + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(255, 255, 255, 0)", + "value": null + }, + { + "color": "green", + "value": 0 + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "red", + "value": 2 + } + ] + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Endpoint ID" + }, + "properties": [ + { + "id": "links", + "value": [ + { + "targetBlank": true, + "title": "", + "url": "/d/AohIXhAMk/model-monitoring-details?orgId=1&refresh=1m&var-PROJECT=$PROJECT&var-MODEL=${__value.text}" + } + ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Average Latency (Last Hour)" + }, + "properties": [ + { + "id": "unit", + "value": "µs" + } + ] + } + ] + }, + "gridPos": { + "h": 13, + "w": 24, + "x": 0, + "y": 3 + }, + "id": 22, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "current_stats" + } + ] + }, + "pluginVersion": "9.3.6", + "targets": [ + { + "dataset": "mlrun", + "datasource": "iguazio", + "editorMode": "code", + "format": "table", + "rawQuery": true, + "rawSql": "SELECT uid, function_uri, model, model_class, first_request, last_request, error_count, drift_status FROM mlrun.model_endpoints where project='$PROJECT'", + "refId": "A", + "sql": { + "columns": [ + { + "parameters": [ + { + "name": "current_stats", + "type": "functionParameter" + } + ], + "type": "function" + } + ], + "groupBy": [ + { + "property": { + "type": "string" + }, + "type": "groupBy" + } + ], + "limit": 50 + }, + "table": "model_endpoints" + } + ], + "title": "Models", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "model_hash": false + }, + "indexByName": { + "drift_status": 7, + "error_count": 6, + "first_request": 4, + "function_uri": 1, + "last_request": 5, + "model": 2, + "model_class": 3, + "uid": 0 + }, + "renameByName": { + "accuracy": "Accuracy", + "drift_status": "Drift Status", + "endpoint_function": "Function", + "endpoint_id": "Endpoint ID", + "endpoint_model": "Model", + "endpoint_model_class": "Model Class", + "endpoint_tag": "Tag", + "error_count": "Error Count", + "first_request": "First Request", + "function": "Function", + "function_uri": "Function", + "last_request": "Last Request", + "latency_avg_1h": "Average Latency (Last Hour)", + "latency_avg_1s": "Average Latency", + "model": "Model", + "model_class": "Class", + "predictions_per_second": "Predictions/s (5 Minute Average)", + "predictions_per_second_count_1s": "Predictions/1s", + "tag": "Tag", + "uid": "Endpoint ID" + } + } + } + ], + "type": "table" + } + ], + "refresh": false, + "schemaVersion": 37, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "default", + "value": "default" + }, + "datasource": "iguazio", + "definition": "SELECT DISTINCT project FROM mlrun.model_endpoints", + "hide": 0, + "includeAll": false, + "label": "Project", + "multi": false, + "name": "PROJECT", + "options": [], + "query": "SELECT DISTINCT project FROM mlrun.model_endpoints", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Model Monitoring - Overview", + "uid": "g0M4uh0Mz", + "version": 1, + "weekStart": "" + } +kind: ConfigMap +metadata: + labels: + grafana_dashboard: "1" + name: model-monitoring-overview \ No newline at end of file diff --git a/charts/mlrun-ce/values.yaml b/charts/mlrun-ce/values.yaml index c496e2f2..435224bf 100644 --- a/charts/mlrun-ce/values.yaml +++ b/charts/mlrun-ce/values.yaml @@ -331,16 +331,27 @@ kube-prometheus-stack: alertmanager: enabled: false grafana: + adminUser: admin + adminPassword: admin + additionalDataSources: + - name: iguazio + type: mysql + url: + user: + password: + database: + editable: true + maxOpenConns: 100 + maxIdleConns: 100 + maxIdleConnsAuto: true persistence: type: pvc enabled: true - size: 10Gi + size: 10Gi grafana.ini: auth.anonymous: enabled: true org_role: Editor - security: - disable_initial_admin_creation: true fullnameOverride: grafana enabled: true service: @@ -350,7 +361,7 @@ kube-prometheus-stack: enabled: true service: type: NodePort - nodePort: 30120 + nodePort: 30120 kube-state-metrics: fullnameOverride: state-metrics prometheus-node-exporter: