Merge branch 'master' into scheduler_compatibility

pingcap · Sep 24, 2021 · 4c02820 · 4c02820
2 parents 6f8dadd + aa37593
commit 4c02820
Show file tree

Hide file tree

Showing 17 changed files with 1,562 additions and 293 deletions.
diff --git a/docs/api-references/docs.md b/docs/api-references/docs.md
@@ -2484,6 +2484,24 @@ Defaults to 1.</p>
 </tr>
 <tr>
 <td>
+<code>shards</code></br>
+<em>
+int32
+</em>
+</td>
+<td>
+<p>EXPERIMENTAL: Number of shards to distribute targets onto. Number of
+replicas multiplied by shards is the total number of Pods created. Note
+that scaling down shards will not reshard data onto remaining instances,
+it must be manually moved. Increasing shards will not reshard data
+either but it will continue to be available from the same instances. To
+query globally use Thanos sidecar and Thanos querier or remote write
+data to a central location. Sharding is done on the content of the
+<code>__address__</code> target meta-label.</p>
+</td>
+</tr>
+<tr>
+<td>
 <code>additionalVolumes</code></br>
 <em>
 <a href="https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.18/#volume-v1-core">
@@ -21263,6 +21281,24 @@ Defaults to 1.</p>
 </tr>
 <tr>
 <td>
+<code>shards</code></br>
+<em>
+int32
+</em>
+</td>
+<td>
+<p>EXPERIMENTAL: Number of shards to distribute targets onto. Number of
+replicas multiplied by shards is the total number of Pods created. Note
+that scaling down shards will not reshard data onto remaining instances,
+it must be manually moved. Increasing shards will not reshard data
+either but it will continue to be available from the same instances. To
+query globally use Thanos sidecar and Thanos querier or remote write
+data to a central location. Sharding is done on the content of the
+<code>__address__</code> target meta-label.</p>
+</td>
+</tr>
+<tr>
+<td>
 <code>additionalVolumes</code></br>
 <em>
 <a href="https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.18/#volume-v1-core">

diff --git a/examples/monitor-shards/README.md b/examples/monitor-shards/README.md
@@ -0,0 +1,36 @@
+# Shards and Replicas
+
+If a single Prometheus can't hold the current target metrics, the user can shard the targets on multiple Prometheus servers.
+Shards use the Prometheus `modulus` configuration, which takes the hash of the `__address__` source label values, and splits the scrape targets based on the number of shards.
+
+Note that decreasing shards will not reshard data onto the remaining instances, the data must be manually moved. Increasing shards will not reshard data either but it will continue to be available from the original instances. 
+
+It’s not recommended to configure `spec.prometheus.ingress` and `spec.grafana` in the TidbMonitor CR when using multiple shards. And we recommend using Thanos to query the metrics globally.
+
+## Install Example
+
+Install TiDB:
+
+```bash
+kubectl apply -f tidb-cluster.yaml -n ${namespace}
+```
+
+Wait for Pods ready:
+
+```bash
+watch kubectl -n ${namespace} get pod
+```
+
+Install TidbMonitor with two shards :
+
+```bash
+kubectl apply -f tidb-monitor.yaml -n ${namespace}
+```
+
+Wait for Pods ready:
+
+```bash
+watch kubectl -n ${namespace} get pod
+```
+
+We will see that the scrape targets are distributed on the two shards.
diff --git a/examples/monitor-shards/tidb-cluster.yaml b/examples/monitor-shards/tidb-cluster.yaml
@@ -0,0 +1,45 @@
+# IT IS NOT SUITABLE FOR PRODUCTION USE.
+# This YAML describes a basic TiDB cluster with minimum resource requirements,
+# which should be able to run in any Kubernetes cluster with storage support.
+apiVersion: pingcap.com/v1alpha1
+kind: TidbCluster
+metadata:
+  name: basic
+spec:
+  version: v5.2.1
+  timezone: UTC
+  pvReclaimPolicy: Retain
+  enableDynamicConfiguration: true
+  configUpdateStrategy: RollingUpdate
+  discovery: {}
+  pd:
+    baseImage: pingcap/pd
+    replicas: 1
+    # if storageClassName is not set, the default Storage Class of the Kubernetes cluster will be used
+    # storageClassName: local-storage
+    requests:
+      storage: "1Gi"
+    config: {}
+  tikv:
+    baseImage: pingcap/tikv
+    replicas: 1
+    # if storageClassName is not set, the default Storage Class of the Kubernetes cluster will be used
+    # storageClassName: local-storage
+    requests:
+      storage: "1Gi"
+    config:
+      storage:
+        # In basic examples, we set this to avoid using too much storage.
+        reserve-space: "0MB"
+      rocksdb:
+        # In basic examples, we set this to avoid the following error in some Kubernetes clusters:
+        # "the maximum number of open file descriptors is too small, got 1024, expect greater or equal to 82920"
+        max-open-files: 256
+      raftdb:
+        max-open-files: 256
+  tidb:
+    baseImage: pingcap/tidb
+    replicas: 1
+    service:
+      type: ClusterIP
+    config: {}
diff --git a/examples/monitor-shards/tidb-monitor.yaml b/examples/monitor-shards/tidb-monitor.yaml
@@ -0,0 +1,19 @@
+apiVersion: pingcap.com/v1alpha1
+kind: TidbMonitor
+metadata:
+  name: basic
+spec:
+  replicas: 1
+  shards: 2
+  clusters:
+  - name: basic
+  prometheus:
+    baseImage: prom/prometheus
+    version: v2.18.1
+  initializer:
+    baseImage: pingcap/tidb-monitor-initializer
+    version: v5.2.1
+  reloader:
+    baseImage: pingcap/tidb-monitor-reloader
+    version: v1.0.1
+  imagePullPolicy: IfNotPresent
diff --git a/manifests/crd.yaml b/manifests/crd.yaml
@@ -22048,6 +22048,9 @@ spec:
             replicas:
               format: int32
               type: integer
+            shards:
+              format: int32
+              type: integer
             storage:
               type: string
             storageClassName:

diff --git a/pkg/apis/pingcap/v1alpha1/openapi_generated.go b/pkg/apis/pingcap/v1alpha1/openapi_generated.go
diff --git a/pkg/apis/pingcap/v1alpha1/tidbmonitor_types.go b/pkg/apis/pingcap/v1alpha1/tidbmonitor_types.go
@@ -114,6 +114,16 @@ type TidbMonitorSpec struct {
 	// +optional
 	Replicas *int32 `json:"replicas,omitempty"`
 
+	// EXPERIMENTAL: Number of shards to distribute targets onto. Number of
+	// replicas multiplied by shards is the total number of Pods created. Note
+	// that scaling down shards will not reshard data onto remaining instances,
+	// it must be manually moved. Increasing shards will not reshard data
+	// either but it will continue to be available from the same instances. To
+	// query globally use Thanos sidecar and Thanos querier or remote write
+	// data to a central location. Sharding is done on the content of the
+	// `__address__` target meta-label.
+	Shards *int32 `json:"shards,omitempty"`
+
 	// Additional volumes of component pod.
 	// +optional
 	AdditionalVolumes []corev1.Volume `json:"additionalVolumes,omitempty"`
@@ -449,3 +459,11 @@ type QueueConfig struct {
 	MinBackoff time.Duration `json:"minBackoff,omitempty"`
 	MaxBackoff time.Duration `json:"maxBackoff,omitempty"`
 }
+
+func (tm *TidbMonitor) GetShards() int32 {
+	shards := int32(1)
+	if tm.Spec.Shards != nil && *tm.Spec.Shards > 1 {
+		shards = *tm.Spec.Shards
+	}
+	return shards
+}
diff --git a/pkg/apis/pingcap/v1alpha1/zz_generated.deepcopy.go b/pkg/apis/pingcap/v1alpha1/zz_generated.deepcopy.go
diff --git a/pkg/monitor/monitor/monitor_manager.go b/pkg/monitor/monitor/monitor_manager.go
@@ -233,30 +233,42 @@ func (m *MonitorManager) syncTidbMonitorStatefulset(tc *v1alpha1.TidbCluster, dc
 		klog.Infof("Wait for the smooth migration to be done successfully for tm [%s/%s]", ns, name)
 		return nil
 	}
-
-	newMonitorSts, err := getMonitorStatefulSet(sa, secret, monitor, tc, dc)
-	if err != nil {
-		klog.Errorf("Fail to generate statefulset for tm [%s/%s], err: %v", ns, name, err)
-		return err
-	}
-
-	oldMonitorSetTmp, err := m.deps.StatefulSetLister.StatefulSets(ns).Get(GetMonitorObjectName(monitor))
-	if err != nil && !errors.IsNotFound(err) {
-		return fmt.Errorf("syncTidbMonitorStatefulset: fail to get sts %s for cluster %s/%s, error: %s", GetMonitorObjectName(monitor), ns, name, err)
-	}
-	setNotExist := errors.IsNotFound(err)
-	if setNotExist {
-		err = member.SetStatefulSetLastAppliedConfigAnnotation(newMonitorSts)
+	shards := monitor.GetShards()
+	var isAllCreated = true
+	for shard := int32(0); shard < shards; shard++ {
+		newMonitorSts, err := getMonitorStatefulSet(sa, secret, monitor, tc, dc, shard)
 		if err != nil {
+			klog.Errorf("Fail to generate statefulset for tm [%s/%s], err: %v", ns, name, err)
 			return err
 		}
-		if err := m.deps.StatefulSetControl.CreateStatefulSet(monitor, newMonitorSts); err != nil {
+		stsName := newMonitorSts.Name
+		oldMonitorSetTmp, err := m.deps.StatefulSetLister.StatefulSets(ns).Get(stsName)
+		if err != nil && !errors.IsNotFound(err) {
+			return fmt.Errorf("syncTidbMonitorStatefulset: fail to get sts %s for cluster %s/%s, error: %s", stsName, ns, name, err)
+		}
+		setNotExist := errors.IsNotFound(err)
+		if setNotExist {
+			err = member.SetStatefulSetLastAppliedConfigAnnotation(newMonitorSts)
+			if err != nil {
+				return err
+			}
+			if err := m.deps.StatefulSetControl.CreateStatefulSet(monitor, newMonitorSts); err != nil {
+				return err
+			}
+			isAllCreated = false
+			continue
+		}
+		err = member.UpdateStatefulSet(m.deps.StatefulSetControl, monitor, newMonitorSts, oldMonitorSetTmp)
+		if err != nil {
+			klog.Errorf("Fail to update statefulset[%s/%s] for tm [%s/%s], err: %v", ns, stsName, ns, name, err)
 			return err
 		}
+	}
+	if !isAllCreated {
 		return controller.RequeueErrorf("TidbMonitor: [%s/%s], waiting for tidbmonitor running", ns, name)
+	} else {
+		return nil
 	}
-
-	return member.UpdateStatefulSet(m.deps.StatefulSetControl, monitor, newMonitorSts, oldMonitorSetTmp)
 }
 
 func (m *MonitorManager) syncTidbMonitorSecret(monitor *v1alpha1.TidbMonitor) (*corev1.Secret, error) {
@@ -349,7 +361,8 @@ func (m *MonitorManager) syncTidbMonitorConfig(monitor *v1alpha1.TidbMonitor) er
 		}
 	}
 
-	promCM, err := getPromConfigMap(monitor, monitorClusterInfos, dmClusterInfos)
+	shards := monitor.GetShards()
+	promCM, err := getPromConfigMap(monitor, monitorClusterInfos, dmClusterInfos, shards)
 	if err != nil {
 		return err
 	}
@@ -461,7 +474,7 @@ func (m *MonitorManager) syncIngress(monitor *v1alpha1.TidbMonitor) error {
 
 func (m *MonitorManager) syncPrometheusIngress(monitor *v1alpha1.TidbMonitor) error {
 	if monitor.Spec.Prometheus.Ingress == nil {
-		return m.removeIngressIfExist(monitor, prometheusName(monitor))
+		return m.removeIngressIfExist(monitor, PrometheusName(monitor.Name, 0))
 	}
 
 	ingress := getPrometheusIngress(monitor)
@@ -471,7 +484,7 @@ func (m *MonitorManager) syncPrometheusIngress(monitor *v1alpha1.TidbMonitor) er
 
 func (m *MonitorManager) syncGrafanaIngress(monitor *v1alpha1.TidbMonitor) error {
 	if monitor.Spec.Grafana == nil || monitor.Spec.Grafana.Ingress == nil {
-		return m.removeIngressIfExist(monitor, grafanaName(monitor))
+		return m.removeIngressIfExist(monitor, GrafanaName(monitor.Name, 0))
 	}
 	ingress := getGrafanaIngress(monitor)
 	_, err := m.deps.TypedControl.CreateOrUpdateIngress(monitor, ingress)