{
  "__inputs": [
    {
      "name": "DS_PROMETHEUS",
      "label": "Prometheus",
      "description": "",
      "type": "datasource",
      "pluginId": "prometheus",
      "pluginName": "Prometheus"
    }
  ],
  "__elements": {},
  "__requires": [
    {
      "type": "grafana",
      "id": "grafana",
      "name": "Grafana",
      "version": "10.3.1"
    },
    {
      "type": "datasource",
      "id": "prometheus",
      "name": "Prometheus",
      "version": "1.0.0"
    },
    {
      "type": "panel",
      "id": "table",
      "name": "Table",
      "version": ""
    }
  ],
  "annotations": {
    "list": [
      {
        "builtIn": 1,
        "datasource": {
          "type": "grafana",
          "uid": "-- Grafana --"
        },
        "enable": true,
        "hide": true,
        "iconColor": "rgba(0, 211, 255, 1)",
        "name": "Annotations & Alerts",
        "type": "dashboard"
      }
    ]
  },
  "editable": true,
  "fiscalYearStartMonth": 0,
  "graphTooltip": 0,
  "id": null,
  "links": [],
  "liveNow": false,
  "panels": [
    {
      "datasource": {
        "type": "prometheus",
        "uid": "${DS_PROMETHEUS}"
      },
      "description": "",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "thresholds"
          },
          "custom": {
            "align": "auto",
            "cellOptions": {
              "mode": "gradient",
              "type": "color-background"
            },
            "filterable": false,
            "inspect": false,
            "minWidth": 65
          },
          "decimals": 0,
          "mappings": [
            {
              "options": {
                "from": 0,
                "result": {
                  "color": "blue",
                  "index": 0
                },
                "to": 50
              },
              "type": "range"
            },
            {
              "options": {
                "from": 51,
                "result": {
                  "color": "green",
                  "index": 1
                },
                "to": 70
              },
              "type": "range"
            },
            {
              "options": {
                "from": 71,
                "result": {
                  "color": "yellow",
                  "index": 2
                },
                "to": 80
              },
              "type": "range"
            },
            {
              "options": {
                "from": 81,
                "result": {
                  "color": "red",
                  "index": 3
                },
                "to": 100
              },
              "type": "range"
            }
          ],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "transparent",
                "value": null
              }
            ]
          },
          "unit": "celsius",
          "unitScale": true
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 0,
        "y": 0
      },
      "id": 2,
      "options": {
        "cellHeight": "sm",
        "footer": {
          "countRows": false,
          "fields": "",
          "reducer": [
            "sum"
          ],
          "show": false
        },
        "frameIndex": 0,
        "showHeader": true
      },
      "pluginVersion": "10.3.1",
      "targets": [
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "DCGM_FI_DEV_GPU_TEMP{gpu=\"0\"}",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "A"
        },
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "DCGM_FI_DEV_GPU_TEMP{gpu=\"1\"}",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "B"
        },
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "DCGM_FI_DEV_GPU_TEMP{gpu=\"2\"}",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "C"
        },
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "DCGM_FI_DEV_GPU_TEMP{gpu=\"3\"}",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "D"
        },
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "DCGM_FI_DEV_GPU_TEMP{gpu=\"4\"}",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "E"
        },
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "DCGM_FI_DEV_GPU_TEMP{gpu=\"5\"}",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "F"
        },
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "DCGM_FI_DEV_GPU_TEMP{gpu=\"6\"}",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "G"
        },
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "DCGM_FI_DEV_GPU_TEMP{gpu=\"7\"}",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "H"
        }
      ],
      "title": "Machine GPU Temps",
      "transformations": [
        {
          "id": "joinByField",
          "options": {
            "byField": "job",
            "mode": "outer"
          }
        },
        {
          "id": "organize",
          "options": {
            "excludeByName": {
              "DCGM_FI_DRIVER_VERSION 1": true,
              "DCGM_FI_DRIVER_VERSION 2": true,
              "DCGM_FI_DRIVER_VERSION 3": true,
              "DCGM_FI_DRIVER_VERSION 4": true,
              "Hostname 1": true,
              "Hostname 2": true,
              "Hostname 3": true,
              "Hostname 4": true,
              "Hostname 5": true,
              "Hostname 6": true,
              "Hostname 7": true,
              "Hostname 8": true,
              "Time 1": true,
              "Time 2": true,
              "Time 3": true,
              "Time 4": true,
              "Time 5": true,
              "Time 6": true,
              "Time 7": true,
              "Time 8": true,
              "UUID 1": true,
              "UUID 2": true,
              "UUID 3": true,
              "UUID 4": true,
              "UUID 5": true,
              "UUID 6": true,
              "UUID 7": true,
              "UUID 8": true,
              "Value #D": false,
              "__name__ 1": true,
              "__name__ 2": true,
              "__name__ 3": true,
              "__name__ 4": true,
              "__name__ 5": true,
              "__name__ 6": true,
              "__name__ 7": true,
              "__name__ 8": true,
              "device 1": true,
              "device 2": true,
              "device 3": true,
              "device 4": true,
              "device 5": true,
              "device 6": true,
              "device 7": true,
              "device 8": true,
              "gpu 1": true,
              "gpu 2": true,
              "gpu 3": true,
              "gpu 4": true,
              "gpu 5": true,
              "gpu 6": true,
              "gpu 7": true,
              "gpu 8": true,
              "instance 1": true,
              "instance 2": true,
              "instance 3": true,
              "instance 4": true,
              "instance 5": true,
              "instance 6": true,
              "instance 7": true,
              "instance 8": true,
              "modelName 1": true,
              "modelName 2": true,
              "modelName 3": true,
              "modelName 4": true,
              "modelName 5": true,
              "modelName 6": true,
              "modelName 7": true,
              "modelName 8": true
            },
            "indexByName": {
              "DCGM_FI_DRIVER_VERSION 1": 1,
              "DCGM_FI_DRIVER_VERSION 2": 13,
              "DCGM_FI_DRIVER_VERSION 3": 22,
              "DCGM_FI_DRIVER_VERSION 4": 32,
              "Hostname 1": 5,
              "Hostname 2": 14,
              "Hostname 3": 23,
              "Hostname 4": 33,
              "Time 1": 4,
              "Time 2": 12,
              "Time 3": 21,
              "Time 4": 31,
              "UUID 1": 6,
              "UUID 2": 15,
              "UUID 3": 24,
              "UUID 4": 34,
              "Value #A": 2,
              "Value #B": 3,
              "Value #C": 30,
              "Value #D": 40,
              "__name__ 1": 7,
              "__name__ 2": 16,
              "__name__ 3": 25,
              "__name__ 4": 35,
              "device 1": 8,
              "device 2": 17,
              "device 3": 26,
              "device 4": 36,
              "gpu 1": 9,
              "gpu 2": 18,
              "gpu 3": 27,
              "gpu 4": 37,
              "instance 1": 10,
              "instance 2": 19,
              "instance 3": 28,
              "instance 4": 38,
              "job": 0,
              "modelName 1": 11,
              "modelName 2": 20,
              "modelName 3": 29,
              "modelName 4": 39
            },
            "renameByName": {
              "Hostname 1": "",
              "Value #A": "GPU 0",
              "Value #B": "GPU 1",
              "Value #C": "GPU 2",
              "Value #D": "GPU 3",
              "Value #E": "GPU 4",
              "Value #F": "GPU 5",
              "Value #G": "GPU 6",
              "Value #H": "GPU 7",
              "job": "Machine"
            }
          }
        },
        {
          "id": "sortBy",
          "options": {
            "fields": {},
            "sort": [
              {
                "field": "Machine"
              }
            ]
          }
        }
      ],
      "type": "table"
    },
    {
      "datasource": {
        "type": "prometheus",
        "uid": "${DS_PROMETHEUS}"
      },
      "description": "",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "thresholds"
          },
          "custom": {
            "align": "auto",
            "cellOptions": {
              "mode": "gradient",
              "type": "color-background"
            },
            "filterable": false,
            "inspect": false,
            "minWidth": 65
          },
          "decimals": 0,
          "mappings": [
            {
              "options": {
                "from": 0,
                "result": {
                  "color": "blue",
                  "index": 0
                },
                "to": 50
              },
              "type": "range"
            },
            {
              "options": {
                "from": 51,
                "result": {
                  "color": "green",
                  "index": 1
                },
                "to": 70
              },
              "type": "range"
            },
            {
              "options": {
                "from": 71,
                "result": {
                  "color": "yellow",
                  "index": 2
                },
                "to": 80
              },
              "type": "range"
            },
            {
              "options": {
                "from": 81,
                "result": {
                  "color": "red",
                  "index": 3
                },
                "to": 100
              },
              "type": "range"
            }
          ],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "transparent",
                "value": null
              }
            ]
          },
          "unit": "celsius",
          "unitScale": true
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 12,
        "y": 0
      },
      "id": 5,
      "options": {
        "cellHeight": "sm",
        "footer": {
          "countRows": false,
          "fields": "",
          "reducer": [
            "sum"
          ],
          "show": false
        },
        "frameIndex": 0,
        "showHeader": true
      },
      "pluginVersion": "10.3.1",
      "targets": [
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "DCGM_FI_DEV_VRAM_TEMP{gpu=\"0\"}",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "A"
        },
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "DCGM_FI_DEV_VRAM_TEMP{gpu=\"1\"}",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "B"
        },
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "DCGM_FI_DEV_VRAM_TEMP{gpu=\"2\"}",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "C"
        },
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "DCGM_FI_DEV_VRAM_TEMP{gpu=\"3\"}",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "D"
        },
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "DCGM_FI_DEV_VRAM_TEMP{gpu=\"4\"}",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "E"
        },
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "DCGM_FI_DEV_VRAM_TEMP{gpu=\"5\"}",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "F"
        },
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "DCGM_FI_DEV_VRAM_TEMP{gpu=\"6\"}",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "G"
        },
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "DCGM_FI_DEV_VRAM_TEMP{gpu=\"7\"}",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "H"
        }
      ],
      "title": "Machine GPU VRAM Temps ",
      "transformations": [
        {
          "id": "joinByField",
          "options": {
            "byField": "job",
            "mode": "outer"
          }
        },
        {
          "id": "organize",
          "options": {
            "excludeByName": {
              "DCGM_FI_DRIVER_VERSION 1": true,
              "DCGM_FI_DRIVER_VERSION 2": true,
              "DCGM_FI_DRIVER_VERSION 3": true,
              "DCGM_FI_DRIVER_VERSION 4": true,
              "Hostname 1": true,
              "Hostname 2": true,
              "Hostname 3": true,
              "Hostname 4": true,
              "Hostname 5": true,
              "Hostname 6": true,
              "Hostname 7": true,
              "Hostname 8": true,
              "Time 1": true,
              "Time 2": true,
              "Time 3": true,
              "Time 4": true,
              "Time 5": true,
              "Time 6": true,
              "Time 7": true,
              "Time 8": true,
              "UUID 1": true,
              "UUID 2": true,
              "UUID 3": true,
              "UUID 4": true,
              "UUID 5": true,
              "UUID 6": true,
              "UUID 7": true,
              "UUID 8": true,
              "Value #D": false,
              "__name__ 1": true,
              "__name__ 2": true,
              "__name__ 3": true,
              "__name__ 4": true,
              "__name__ 5": true,
              "__name__ 6": true,
              "__name__ 7": true,
              "__name__ 8": true,
              "device 1": true,
              "device 2": true,
              "device 3": true,
              "device 4": true,
              "device 5": true,
              "device 6": true,
              "device 7": true,
              "device 8": true,
              "gpu 1": true,
              "gpu 2": true,
              "gpu 3": true,
              "gpu 4": true,
              "gpu 5": true,
              "gpu 6": true,
              "gpu 7": true,
              "gpu 8": true,
              "instance 1": true,
              "instance 2": true,
              "instance 3": true,
              "instance 4": true,
              "instance 5": true,
              "instance 6": true,
              "instance 7": true,
              "instance 8": true,
              "modelName 1": true,
              "modelName 2": true,
              "modelName 3": true,
              "modelName 4": true,
              "modelName 5": true,
              "modelName 6": true,
              "modelName 7": true,
              "modelName 8": true
            },
            "indexByName": {
              "DCGM_FI_DRIVER_VERSION 1": 1,
              "DCGM_FI_DRIVER_VERSION 2": 13,
              "DCGM_FI_DRIVER_VERSION 3": 22,
              "DCGM_FI_DRIVER_VERSION 4": 32,
              "Hostname 1": 5,
              "Hostname 2": 14,
              "Hostname 3": 23,
              "Hostname 4": 33,
              "Time 1": 4,
              "Time 2": 12,
              "Time 3": 21,
              "Time 4": 31,
              "UUID 1": 6,
              "UUID 2": 15,
              "UUID 3": 24,
              "UUID 4": 34,
              "Value #A": 2,
              "Value #B": 3,
              "Value #C": 30,
              "Value #D": 40,
              "__name__ 1": 7,
              "__name__ 2": 16,
              "__name__ 3": 25,
              "__name__ 4": 35,
              "device 1": 8,
              "device 2": 17,
              "device 3": 26,
              "device 4": 36,
              "gpu 1": 9,
              "gpu 2": 18,
              "gpu 3": 27,
              "gpu 4": 37,
              "instance 1": 10,
              "instance 2": 19,
              "instance 3": 28,
              "instance 4": 38,
              "job": 0,
              "modelName 1": 11,
              "modelName 2": 20,
              "modelName 3": 29,
              "modelName 4": 39
            },
            "renameByName": {
              "Hostname 1": "",
              "Value #A": "GPU 0",
              "Value #B": "GPU 1",
              "Value #C": "GPU 2",
              "Value #D": "GPU 3",
              "Value #E": "GPU 4",
              "Value #F": "GPU 5",
              "Value #G": "GPU 6",
              "Value #H": "GPU 7",
              "job": "Machine"
            }
          }
        },
        {
          "id": "sortBy",
          "options": {
            "fields": {},
            "sort": [
              {
                "field": "Machine"
              }
            ]
          }
        }
      ],
      "type": "table"
    },
    {
      "datasource": {
        "type": "prometheus",
        "uid": "${DS_PROMETHEUS}"
      },
      "description": "",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "thresholds"
          },
          "custom": {
            "align": "auto",
            "cellOptions": {
              "mode": "gradient",
              "type": "color-background"
            },
            "filterable": false,
            "inspect": false,
            "minWidth": 65
          },
          "decimals": 0,
          "mappings": [
            {
              "options": {
                "from": 0,
                "result": {
                  "color": "blue",
                  "index": 0
                },
                "to": 50
              },
              "type": "range"
            },
            {
              "options": {
                "from": 51,
                "result": {
                  "color": "green",
                  "index": 1
                },
                "to": 200
              },
              "type": "range"
            },
            {
              "options": {
                "from": 201,
                "result": {
                  "color": "yellow",
                  "index": 2
                },
                "to": 300
              },
              "type": "range"
            },
            {
              "options": {
                "from": 301,
                "result": {
                  "color": "red",
                  "index": 3
                },
                "to": 500
              },
              "type": "range"
            }
          ],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "transparent",
                "value": null
              }
            ]
          },
          "unit": "watt",
          "unitScale": true
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 0,
        "y": 8
      },
      "id": 6,
      "options": {
        "cellHeight": "sm",
        "footer": {
          "countRows": false,
          "fields": "",
          "reducer": [
            "sum"
          ],
          "show": false
        },
        "frameIndex": 0,
        "showHeader": true
      },
      "pluginVersion": "10.3.1",
      "targets": [
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "DCGM_FI_DEV_POWER_USAGE{gpu=\"0\"}",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "A"
        },
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "DCGM_FI_DEV_POWER_USAGE{gpu=\"1\"}",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "B"
        },
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "DCGM_FI_DEV_POWER_USAGE{gpu=\"2\"}",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "C"
        },
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "DCGM_FI_DEV_POWER_USAGE{gpu=\"3\"}",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "D"
        },
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "DCGM_FI_DEV_POWER_USAGE{gpu=\"4\"}",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "E"
        },
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "DCGM_FI_DEV_POWER_USAGE{gpu=\"5\"}",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "F"
        },
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "DCGM_FI_DEV_POWER_USAGE{gpu=\"6\"}",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "G"
        },
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "DCGM_FI_DEV_POWER_USAGE{gpu=\"7\"}",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "H"
        }
      ],
      "title": "Machine GPU Power",
      "transformations": [
        {
          "id": "joinByField",
          "options": {
            "byField": "job",
            "mode": "outer"
          }
        },
        {
          "id": "organize",
          "options": {
            "excludeByName": {
              "DCGM_FI_DRIVER_VERSION 1": true,
              "DCGM_FI_DRIVER_VERSION 2": true,
              "DCGM_FI_DRIVER_VERSION 3": true,
              "DCGM_FI_DRIVER_VERSION 4": true,
              "Hostname 1": true,
              "Hostname 2": true,
              "Hostname 3": true,
              "Hostname 4": true,
              "Hostname 5": true,
              "Hostname 6": true,
              "Hostname 7": true,
              "Hostname 8": true,
              "Time 1": true,
              "Time 2": true,
              "Time 3": true,
              "Time 4": true,
              "Time 5": true,
              "Time 6": true,
              "Time 7": true,
              "Time 8": true,
              "UUID 1": true,
              "UUID 2": true,
              "UUID 3": true,
              "UUID 4": true,
              "UUID 5": true,
              "UUID 6": true,
              "UUID 7": true,
              "UUID 8": true,
              "Value #D": false,
              "__name__ 1": true,
              "__name__ 2": true,
              "__name__ 3": true,
              "__name__ 4": true,
              "__name__ 5": true,
              "__name__ 6": true,
              "__name__ 7": true,
              "__name__ 8": true,
              "device 1": true,
              "device 2": true,
              "device 3": true,
              "device 4": true,
              "device 5": true,
              "device 6": true,
              "device 7": true,
              "device 8": true,
              "gpu 1": true,
              "gpu 2": true,
              "gpu 3": true,
              "gpu 4": true,
              "gpu 5": true,
              "gpu 6": true,
              "gpu 7": true,
              "gpu 8": true,
              "instance 1": true,
              "instance 2": true,
              "instance 3": true,
              "instance 4": true,
              "instance 5": true,
              "instance 6": true,
              "instance 7": true,
              "instance 8": true,
              "modelName 1": true,
              "modelName 2": true,
              "modelName 3": true,
              "modelName 4": true,
              "modelName 5": true,
              "modelName 6": true,
              "modelName 7": true,
              "modelName 8": true
            },
            "indexByName": {
              "DCGM_FI_DRIVER_VERSION 1": 1,
              "DCGM_FI_DRIVER_VERSION 2": 13,
              "DCGM_FI_DRIVER_VERSION 3": 22,
              "DCGM_FI_DRIVER_VERSION 4": 32,
              "Hostname 1": 5,
              "Hostname 2": 14,
              "Hostname 3": 23,
              "Hostname 4": 33,
              "Time 1": 4,
              "Time 2": 12,
              "Time 3": 21,
              "Time 4": 31,
              "UUID 1": 6,
              "UUID 2": 15,
              "UUID 3": 24,
              "UUID 4": 34,
              "Value #A": 2,
              "Value #B": 3,
              "Value #C": 30,
              "Value #D": 40,
              "__name__ 1": 7,
              "__name__ 2": 16,
              "__name__ 3": 25,
              "__name__ 4": 35,
              "device 1": 8,
              "device 2": 17,
              "device 3": 26,
              "device 4": 36,
              "gpu 1": 9,
              "gpu 2": 18,
              "gpu 3": 27,
              "gpu 4": 37,
              "instance 1": 10,
              "instance 2": 19,
              "instance 3": 28,
              "instance 4": 38,
              "job": 0,
              "modelName 1": 11,
              "modelName 2": 20,
              "modelName 3": 29,
              "modelName 4": 39
            },
            "renameByName": {
              "Hostname 1": "",
              "Value #A": "GPU 0",
              "Value #B": "GPU 1",
              "Value #C": "GPU 2",
              "Value #D": "GPU 3",
              "Value #E": "GPU 4",
              "Value #F": "GPU 5",
              "Value #G": "GPU 6",
              "Value #H": "GPU 7",
              "job": "Machine"
            }
          }
        },
        {
          "id": "sortBy",
          "options": {
            "fields": {},
            "sort": [
              {
                "field": "Machine"
              }
            ]
          }
        }
      ],
      "type": "table"
    },
    {
      "datasource": {
        "type": "prometheus",
        "uid": "${DS_PROMETHEUS}"
      },
      "description": "",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "thresholds"
          },
          "custom": {
            "align": "auto",
            "cellOptions": {
              "mode": "gradient",
              "type": "color-background"
            },
            "filterable": false,
            "inspect": false,
            "minWidth": 65
          },
          "decimals": 0,
          "mappings": [
            {
              "options": {
                "from": 0,
                "result": {
                  "color": "blue",
                  "index": 0
                },
                "to": 50
              },
              "type": "range"
            },
            {
              "options": {
                "from": 51,
                "result": {
                  "color": "green",
                  "index": 1
                },
                "to": 70
              },
              "type": "range"
            },
            {
              "options": {
                "from": 71,
                "result": {
                  "color": "yellow",
                  "index": 2
                },
                "to": 80
              },
              "type": "range"
            },
            {
              "options": {
                "from": 81,
                "result": {
                  "color": "orange",
                  "index": 3
                },
                "to": 90
              },
              "type": "range"
            },
            {
              "options": {
                "from": 91,
                "result": {
                  "color": "red",
                  "index": 4
                },
                "to": 200
              },
              "type": "range"
            }
          ],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "transparent",
                "value": null
              }
            ]
          },
          "unit": "celsius",
          "unitScale": true
        },
        "overrides": []
      },
      "gridPos": {
        "h": 7,
        "w": 12,
        "x": 12,
        "y": 8
      },
      "id": 8,
      "options": {
        "cellHeight": "sm",
        "footer": {
          "countRows": false,
          "fields": "",
          "reducer": [
            "sum"
          ],
          "show": false
        },
        "frameIndex": 0,
        "showHeader": true
      },
      "pluginVersion": "10.3.1",
      "targets": [
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "DCGM_FI_DEV_HOT_SPOT_TEMP{gpu=\"0\"}",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "A"
        },
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "DCGM_FI_DEV_HOT_SPOT_TEMP{gpu=\"1\"}",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "B"
        },
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "DCGM_FI_DEV_HOT_SPOT_TEMP{gpu=\"2\"}",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "C"
        },
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "DCGM_FI_DEV_HOT_SPOT_TEMP{gpu=\"3\"}",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "D"
        },
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "DCGM_FI_DEV_HOT_SPOT_TEMP{gpu=\"4\"}",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "E"
        },
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "DCGM_FI_DEV_HOT_SPOT_TEMP{gpu=\"5\"}",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "F"
        },
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "DCGM_FI_DEV_HOT_SPOT_TEMP{gpu=\"6\"}",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "G"
        },
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "DCGM_FI_DEV_HOT_SPOT_TEMP{gpu=\"7\"}",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "H"
        }
      ],
      "title": "Machine GPU Core Hot Spot",
      "transformations": [
        {
          "id": "joinByField",
          "options": {
            "byField": "job",
            "mode": "outer"
          }
        },
        {
          "id": "organize",
          "options": {
            "excludeByName": {
              "DCGM_FI_DRIVER_VERSION 1": true,
              "DCGM_FI_DRIVER_VERSION 2": true,
              "DCGM_FI_DRIVER_VERSION 3": true,
              "DCGM_FI_DRIVER_VERSION 4": true,
              "Hostname 1": true,
              "Hostname 2": true,
              "Hostname 3": true,
              "Hostname 4": true,
              "Hostname 5": true,
              "Hostname 6": true,
              "Hostname 7": true,
              "Hostname 8": true,
              "Time 1": true,
              "Time 2": true,
              "Time 3": true,
              "Time 4": true,
              "Time 5": true,
              "Time 6": true,
              "Time 7": true,
              "Time 8": true,
              "UUID 1": true,
              "UUID 2": true,
              "UUID 3": true,
              "UUID 4": true,
              "UUID 5": true,
              "UUID 6": true,
              "UUID 7": true,
              "UUID 8": true,
              "Value #D": false,
              "__name__ 1": true,
              "__name__ 2": true,
              "__name__ 3": true,
              "__name__ 4": true,
              "__name__ 5": true,
              "__name__ 6": true,
              "__name__ 7": true,
              "__name__ 8": true,
              "device 1": true,
              "device 2": true,
              "device 3": true,
              "device 4": true,
              "device 5": true,
              "device 6": true,
              "device 7": true,
              "device 8": true,
              "gpu 1": true,
              "gpu 2": true,
              "gpu 3": true,
              "gpu 4": true,
              "gpu 5": true,
              "gpu 6": true,
              "gpu 7": true,
              "gpu 8": true,
              "instance 1": true,
              "instance 2": true,
              "instance 3": true,
              "instance 4": true,
              "instance 5": true,
              "instance 6": true,
              "instance 7": true,
              "instance 8": true,
              "modelName 1": true,
              "modelName 2": true,
              "modelName 3": true,
              "modelName 4": true,
              "modelName 5": true,
              "modelName 6": true,
              "modelName 7": true,
              "modelName 8": true
            },
            "indexByName": {
              "DCGM_FI_DRIVER_VERSION 1": 1,
              "DCGM_FI_DRIVER_VERSION 2": 13,
              "DCGM_FI_DRIVER_VERSION 3": 22,
              "DCGM_FI_DRIVER_VERSION 4": 32,
              "Hostname 1": 5,
              "Hostname 2": 14,
              "Hostname 3": 23,
              "Hostname 4": 33,
              "Time 1": 4,
              "Time 2": 12,
              "Time 3": 21,
              "Time 4": 31,
              "UUID 1": 6,
              "UUID 2": 15,
              "UUID 3": 24,
              "UUID 4": 34,
              "Value #A": 2,
              "Value #B": 3,
              "Value #C": 30,
              "Value #D": 40,
              "__name__ 1": 7,
              "__name__ 2": 16,
              "__name__ 3": 25,
              "__name__ 4": 35,
              "device 1": 8,
              "device 2": 17,
              "device 3": 26,
              "device 4": 36,
              "gpu 1": 9,
              "gpu 2": 18,
              "gpu 3": 27,
              "gpu 4": 37,
              "instance 1": 10,
              "instance 2": 19,
              "instance 3": 28,
              "instance 4": 38,
              "job": 0,
              "modelName 1": 11,
              "modelName 2": 20,
              "modelName 3": 29,
              "modelName 4": 39
            },
            "renameByName": {
              "Hostname 1": "",
              "Value #A": "GPU 0",
              "Value #B": "GPU 1",
              "Value #C": "GPU 2",
              "Value #D": "GPU 3",
              "Value #E": "GPU 4",
              "Value #F": "GPU 5",
              "Value #G": "GPU 6",
              "Value #H": "GPU 7",
              "job": "Machine"
            }
          }
        },
        {
          "id": "sortBy",
          "options": {
            "fields": {},
            "sort": [
              {
                "field": "Machine"
              }
            ]
          }
        }
      ],
      "type": "table"
    },
    {
      "datasource": {
        "type": "prometheus",
        "uid": "${DS_PROMETHEUS}"
      },
      "description": "",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "thresholds"
          },
          "custom": {
            "align": "auto",
            "cellOptions": {
              "mode": "gradient",
              "type": "color-background"
            },
            "filterable": false,
            "inspect": false,
            "minWidth": 65
          },
          "decimals": 0,
          "mappings": [
            {
              "options": {
                "from": 0,
                "result": {
                  "color": "blue",
                  "index": 0
                },
                "to": 50
              },
              "type": "range"
            },
            {
              "options": {
                "from": 51,
                "result": {
                  "color": "green",
                  "index": 1
                },
                "to": 70
              },
              "type": "range"
            },
            {
              "options": {
                "from": 71,
                "result": {
                  "color": "yellow",
                  "index": 2
                },
                "to": 80
              },
              "type": "range"
            },
            {
              "options": {
                "from": 81,
                "result": {
                  "color": "red",
                  "index": 3
                },
                "to": 100
              },
              "type": "range"
            }
          ],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "transparent",
                "value": null
              }
            ]
          },
          "unit": "%",
          "unitScale": true
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 12,
        "y": 15
      },
      "id": 7,
      "options": {
        "cellHeight": "sm",
        "footer": {
          "countRows": false,
          "fields": "",
          "reducer": [
            "sum"
          ],
          "show": false
        },
        "frameIndex": 0,
        "showHeader": true
      },
      "pluginVersion": "10.3.1",
      "targets": [
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "DCGM_FI_DEV_FAN_SPEED{gpu=\"0\"}",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "A"
        },
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "DCGM_FI_DEV_FAN_SPEED{gpu=\"1\"}",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "B"
        },
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "DCGM_FI_DEV_FAN_SPEED{gpu=\"2\"}",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "C"
        },
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "DCGM_FI_DEV_FAN_SPEED{gpu=\"3\"}",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "D"
        },
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "DCGM_FI_DEV_FAN_SPEED{gpu=\"4\"}",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "E"
        },
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "DCGM_FI_DEV_FAN_SPEED{gpu=\"5\"}",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "F"
        },
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "DCGM_FI_DEV_FAN_SPEED{gpu=\"6\"}",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "G"
        },
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "DCGM_FI_DEV_FAN_SPEED{gpu=\"7\"}",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "H"
        }
      ],
      "title": "Machine GPU FAN Speeds",
      "transformations": [
        {
          "id": "joinByField",
          "options": {
            "byField": "job",
            "mode": "outer"
          }
        },
        {
          "id": "organize",
          "options": {
            "excludeByName": {
              "DCGM_FI_DRIVER_VERSION 1": true,
              "DCGM_FI_DRIVER_VERSION 2": true,
              "DCGM_FI_DRIVER_VERSION 3": true,
              "DCGM_FI_DRIVER_VERSION 4": true,
              "Hostname 1": true,
              "Hostname 2": true,
              "Hostname 3": true,
              "Hostname 4": true,
              "Hostname 5": true,
              "Hostname 6": true,
              "Hostname 7": true,
              "Hostname 8": true,
              "Time 1": true,
              "Time 2": true,
              "Time 3": true,
              "Time 4": true,
              "Time 5": true,
              "Time 6": true,
              "Time 7": true,
              "Time 8": true,
              "UUID 1": true,
              "UUID 2": true,
              "UUID 3": true,
              "UUID 4": true,
              "UUID 5": true,
              "UUID 6": true,
              "UUID 7": true,
              "UUID 8": true,
              "Value #D": false,
              "__name__ 1": true,
              "__name__ 2": true,
              "__name__ 3": true,
              "__name__ 4": true,
              "__name__ 5": true,
              "__name__ 6": true,
              "__name__ 7": true,
              "__name__ 8": true,
              "device 1": true,
              "device 2": true,
              "device 3": true,
              "device 4": true,
              "device 5": true,
              "device 6": true,
              "device 7": true,
              "device 8": true,
              "gpu 1": true,
              "gpu 2": true,
              "gpu 3": true,
              "gpu 4": true,
              "gpu 5": true,
              "gpu 6": true,
              "gpu 7": true,
              "gpu 8": true,
              "instance 1": true,
              "instance 2": true,
              "instance 3": true,
              "instance 4": true,
              "instance 5": true,
              "instance 6": true,
              "instance 7": true,
              "instance 8": true,
              "modelName 1": true,
              "modelName 2": true,
              "modelName 3": true,
              "modelName 4": true,
              "modelName 5": true,
              "modelName 6": true,
              "modelName 7": true,
              "modelName 8": true
            },
            "indexByName": {
              "DCGM_FI_DRIVER_VERSION 1": 1,
              "DCGM_FI_DRIVER_VERSION 2": 13,
              "DCGM_FI_DRIVER_VERSION 3": 22,
              "DCGM_FI_DRIVER_VERSION 4": 32,
              "Hostname 1": 5,
              "Hostname 2": 14,
              "Hostname 3": 23,
              "Hostname 4": 33,
              "Time 1": 4,
              "Time 2": 12,
              "Time 3": 21,
              "Time 4": 31,
              "UUID 1": 6,
              "UUID 2": 15,
              "UUID 3": 24,
              "UUID 4": 34,
              "Value #A": 2,
              "Value #B": 3,
              "Value #C": 30,
              "Value #D": 40,
              "__name__ 1": 7,
              "__name__ 2": 16,
              "__name__ 3": 25,
              "__name__ 4": 35,
              "device 1": 8,
              "device 2": 17,
              "device 3": 26,
              "device 4": 36,
              "gpu 1": 9,
              "gpu 2": 18,
              "gpu 3": 27,
              "gpu 4": 37,
              "instance 1": 10,
              "instance 2": 19,
              "instance 3": 28,
              "instance 4": 38,
              "job": 0,
              "modelName 1": 11,
              "modelName 2": 20,
              "modelName 3": 29,
              "modelName 4": 39
            },
            "renameByName": {
              "Hostname 1": "",
              "Value #A": "GPU 0",
              "Value #B": "GPU 1",
              "Value #C": "GPU 2",
              "Value #D": "GPU 3",
              "Value #E": "GPU 4",
              "Value #F": "GPU 5",
              "Value #G": "GPU 6",
              "Value #H": "GPU 7",
              "job": "Machine"
            }
          }
        },
        {
          "id": "sortBy",
          "options": {
            "fields": {},
            "sort": [
              {
                "field": "Machine"
              }
            ]
          }
        }
      ],
      "type": "table"
    },
    {
      "datasource": {
        "type": "prometheus",
        "uid": "${DS_PROMETHEUS}"
      },
      "description": "",
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "thresholds"
          },
          "custom": {
            "align": "auto",
            "cellOptions": {
              "mode": "gradient",
              "type": "color-background"
            },
            "filterable": false,
            "inspect": false,
            "minWidth": 65
          },
          "decimals": 0,
          "mappings": [
            {
              "options": {
                "from": 0,
                "result": {
                  "color": "blue",
                  "index": 0
                },
                "to": 0
              },
              "type": "range"
            },
            {
              "options": {
                "from": 1,
                "result": {
                  "color": "red",
                  "index": 1
                },
                "to": 100
              },
              "type": "range"
            }
          ],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "transparent",
                "value": null
              }
            ]
          },
          "unit": "bool",
          "unitScale": true
        },
        "overrides": []
      },
      "gridPos": {
        "h": 7,
        "w": 12,
        "x": 0,
        "y": 16
      },
      "id": 9,
      "options": {
        "cellHeight": "sm",
        "footer": {
          "countRows": false,
          "fields": "",
          "reducer": [
            "sum"
          ],
          "show": false
        },
        "frameIndex": 0,
        "showHeader": true
      },
      "pluginVersion": "10.3.1",
      "targets": [
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "DCGM_FI_DEV_CLOCKS_THROTTLE_REASON{reason=\"SwThermalSlowdown\", gpu=\"0\"}",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "A"
        },
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "DCGM_FI_DEV_CLOCKS_THROTTLE_REASON{reason=\"SwThermalSlowdown\", gpu=\"1\"}",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "B"
        },
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "DCGM_FI_DEV_CLOCKS_THROTTLE_REASON{reason=\"SwThermalSlowdown\", gpu=\"2\"}",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "C"
        },
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "DCGM_FI_DEV_CLOCKS_THROTTLE_REASON{reason=\"SwThermalSlowdown\", gpu=\"3\"}",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "D"
        },
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "DCGM_FI_DEV_CLOCKS_THROTTLE_REASON{reason=\"SwThermalSlowdown\", gpu=\"4\"} ",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "E"
        },
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "DCGM_FI_DEV_CLOCKS_THROTTLE_REASON{reason=\"SwThermalSlowdown\", gpu=\"5\"}",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "F"
        },
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "DCGM_FI_DEV_CLOCKS_THROTTLE_REASON{reason=\"SwThermalSlowdown\", gpu=\"6\"}",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "G"
        },
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "DCGM_FI_DEV_CLOCKS_THROTTLE_REASON{reason=\"SwThermalSlowdown\", gpu=\"7\"}",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "H"
        }
      ],
      "title": "Machine GPU Thermal Throttle",
      "transformations": [
        {
          "id": "joinByField",
          "options": {
            "byField": "job",
            "mode": "outer"
          }
        },
        {
          "id": "organize",
          "options": {
            "excludeByName": {
              "DCGM_FI_DRIVER_VERSION 1": true,
              "DCGM_FI_DRIVER_VERSION 2": true,
              "DCGM_FI_DRIVER_VERSION 3": true,
              "DCGM_FI_DRIVER_VERSION 4": true,
              "Hostname 1": true,
              "Hostname 2": true,
              "Hostname 3": true,
              "Hostname 4": true,
              "Hostname 5": true,
              "Hostname 6": true,
              "Hostname 7": true,
              "Hostname 8": true,
              "Time 1": true,
              "Time 2": true,
              "Time 3": true,
              "Time 4": true,
              "Time 5": true,
              "Time 6": true,
              "Time 7": true,
              "Time 8": true,
              "UUID 1": true,
              "UUID 2": true,
              "UUID 3": true,
              "UUID 4": true,
              "UUID 5": true,
              "UUID 6": true,
              "UUID 7": true,
              "UUID 8": true,
              "Value #D": false,
              "__name__ 1": true,
              "__name__ 2": true,
              "__name__ 3": true,
              "__name__ 4": true,
              "__name__ 5": true,
              "__name__ 6": true,
              "__name__ 7": true,
              "__name__ 8": true,
              "device 1": true,
              "device 2": true,
              "device 3": true,
              "device 4": true,
              "device 5": true,
              "device 6": true,
              "device 7": true,
              "device 8": true,
              "gpu 1": true,
              "gpu 2": true,
              "gpu 3": true,
              "gpu 4": true,
              "gpu 5": true,
              "gpu 6": true,
              "gpu 7": true,
              "gpu 8": true,
              "instance 1": true,
              "instance 2": true,
              "instance 3": true,
              "instance 4": true,
              "instance 5": true,
              "instance 6": true,
              "instance 7": true,
              "instance 8": true,
              "modelName 1": true,
              "modelName 2": true,
              "modelName 3": true,
              "modelName 4": true,
              "modelName 5": true,
              "modelName 6": true,
              "modelName 7": true,
              "modelName 8": true,
              "reason": true
            },
            "includeByName": {},
            "indexByName": {
              "DCGM_FI_DRIVER_VERSION 1": 1,
              "DCGM_FI_DRIVER_VERSION 2": 13,
              "DCGM_FI_DRIVER_VERSION 3": 22,
              "DCGM_FI_DRIVER_VERSION 4": 32,
              "Hostname 1": 5,
              "Hostname 2": 14,
              "Hostname 3": 23,
              "Hostname 4": 33,
              "Time 1": 4,
              "Time 2": 12,
              "Time 3": 21,
              "Time 4": 31,
              "UUID 1": 6,
              "UUID 2": 15,
              "UUID 3": 24,
              "UUID 4": 34,
              "Value #A": 2,
              "Value #B": 3,
              "Value #C": 30,
              "Value #D": 40,
              "__name__ 1": 7,
              "__name__ 2": 16,
              "__name__ 3": 25,
              "__name__ 4": 35,
              "device 1": 8,
              "device 2": 17,
              "device 3": 26,
              "device 4": 36,
              "gpu 1": 9,
              "gpu 2": 18,
              "gpu 3": 27,
              "gpu 4": 37,
              "instance 1": 10,
              "instance 2": 19,
              "instance 3": 28,
              "instance 4": 38,
              "job": 0,
              "modelName 1": 11,
              "modelName 2": 20,
              "modelName 3": 29,
              "modelName 4": 39
            },
            "renameByName": {
              "Hostname 1": "",
              "Value #A": "GPU 0",
              "Value #B": "GPU 1",
              "Value #C": "GPU 2",
              "Value #D": "GPU 3",
              "Value #E": "GPU 4",
              "Value #F": "GPU 5",
              "Value #G": "GPU 6",
              "Value #H": "GPU 7",
              "job": "Machine"
            }
          }
        },
        {
          "id": "sortBy",
          "options": {
            "fields": {},
            "sort": [
              {
                "field": "Machine"
              }
            ]
          }
        }
      ],
      "type": "table"
    },
    {
      "datasource": {
        "type": "prometheus",
        "uid": "${DS_PROMETHEUS}"
      },
      "fieldConfig": {
        "defaults": {
          "color": {
            "mode": "thresholds"
          },
          "custom": {
            "align": "auto",
            "cellOptions": {
              "type": "auto"
            },
            "inspect": false,
            "minWidth": 50
          },
          "decimals": 1,
          "mappings": [],
          "thresholds": {
            "mode": "absolute",
            "steps": [
              {
                "color": "green",
                "value": null
              },
              {
                "color": "red",
                "value": 80
              }
            ]
          },
          "unitScale": true
        },
        "overrides": []
      },
      "gridPos": {
        "h": 8,
        "w": 24,
        "x": 0,
        "y": 23
      },
      "id": 1,
      "options": {
        "cellHeight": "sm",
        "footer": {
          "countRows": false,
          "fields": "",
          "reducer": [
            "sum"
          ],
          "show": false
        },
        "frameIndex": 4,
        "showHeader": true
      },
      "pluginVersion": "10.3.1",
      "targets": [
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "(sum by(instance) (irate(node_cpu_seconds_total{mode!=\"idle\"}[$__rate_interval])) / on(instance) group_left sum by (instance) (irate(node_cpu_seconds_total[$__rate_interval]))) * 100\r\n",
          "format": "table",
          "legendFormat": "__auto",
          "range": true,
          "refId": "A"
        },
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "node_uname_info",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "B"
        },
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "(sum by(instance) (node_memory_MemTotal_bytes{} - node_memory_MemFree_bytes{} - node_memory_Buffers_bytes{} - node_memory_Cached_bytes{})) / sum by(instance) (node_memory_MemTotal_bytes{}) * 100\r\n",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "C"
        },
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "(node_time_seconds - on(instance) node_boot_time_seconds ) / 60 /60",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "D"
        },
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "round(max by(instance)(rate(node_network_transmit_bytes_total[1m])*8/1000000), 0.1)\r\n\r\n\r\n\r\n",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "E"
        },
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "round(max by(instance)(rate(node_network_receive_bytes_total[1m])*8/1000000), 0.1)\r\n",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "F"
        },
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "(1 - avg by(instance)(node_filesystem_avail_bytes{mountpoint=\"/\"}/node_filesystem_size_bytes{mountpoint=\"/\"})) * 100\r\n",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "G"
        },
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "(1 - avg by(instance)(node_filesystem_avail_bytes{mountpoint=\"/var/lib/docker\"}/node_filesystem_size_bytes{mountpoint=\"/var/lib/docker\"})) * 100\r\n",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "H"
        },
        {
          "datasource": {
            "type": "prometheus",
            "uid": "${DS_PROMETHEUS}"
          },
          "editorMode": "code",
          "expr": "avg by(instance)(node_cpu_temperature{})\r\n",
          "format": "table",
          "hide": false,
          "legendFormat": "__auto",
          "range": true,
          "refId": "I"
        }
      ],
      "title": "Machine Overview",
      "transformations": [
        {
          "id": "joinByField",
          "options": {
            "byField": "instance",
            "mode": "outer"
          }
        },
        {
          "id": "organize",
          "options": {
            "excludeByName": {
              "Time 1": true,
              "Time 2": true,
              "Time 3": true,
              "Time 4": true,
              "Time 5": true,
              "Time 6": true,
              "Time 7": true,
              "Time 8": true,
              "Time 9": true,
              "Value #B": true,
              "__name__": true,
              "domainname": true,
              "instance": true,
              "job": true,
              "machine": true,
              "sysname": true,
              "version": true
            },
            "indexByName": {
              "Time 1": 4,
              "Time 2": 7,
              "Time 3": 14,
              "Time 4": 16,
              "Time 5": 18,
              "Time 6": 20,
              "Time 7": 22,
              "Time 8": 24,
              "Time 9": 26,
              "Value #A": 5,
              "Value #B": 13,
              "Value #C": 15,
              "Value #D": 17,
              "Value #E": 19,
              "Value #F": 21,
              "Value #G": 23,
              "Value #H": 25,
              "Value #I": 6,
              "__name__": 8,
              "domainname": 9,
              "instance": 3,
              "job": 10,
              "machine": 2,
              "nodename": 0,
              "release": 1,
              "sysname": 11,
              "version": 12
            },
            "renameByName": {
              "Time 2": "",
              "Time 5": "",
              "Value #A": "CPU %",
              "Value #C": "Memory Used %",
              "Value #D": "Uptime(h)",
              "Value #E": "Network_transmit Mbps",
              "Value #F": "Network_receive Mbps ",
              "Value #G": "Root FS Disk Usage %",
              "Value #H": "Docker Disk Usage %",
              "Value #I": "CPU Temp C",
              "nodename": "Machine",
              "release": "Kernel"
            }
          }
        },
        {
          "id": "sortBy",
          "options": {
            "fields": {},
            "sort": [
              {
                "field": "Machine"
              }
            ]
          }
        }
      ],
      "type": "table"
    }
  ],
  "refresh": "",
  "schemaVersion": 39,
  "tags": [
    "Prometheuse",
    "Nvidia",
    "Linux"
  ],
  "templating": {
    "list": []
  },
  "time": {
    "from": "now-5m",
    "to": "now"
  },
  "timepicker": {},
  "timezone": "",
  "title": "DC OverView",
  "uid": "b7573857-fb4d-4389-8159-edbad8b65e5b",
  "version": 23,
  "weekStart": ""
}