metric: run multiple export/import processes at the same time (#39)

pingcap · Sep 7, 2018 · d1fb78f · d1fb78f
1 parent 2237eec
commit d1fb78f
Show file tree

Hide file tree

Showing 5 changed files with 80 additions and 31 deletions.
diff --git a/metric/importer/prometheus.py b/metric/importer/prometheus.py
@@ -11,6 +11,8 @@
 import random
 import string
 
+import multiprocessing as mp
+
 from utils import fileopt
 from utils import util
 
@@ -24,6 +26,8 @@ def __init__(self, args):
         self.db_name = args.db if args.db else self.unique_dbname()
         self.user = args.user
         self.passwd = args.passwd
+        self.proc_num = args.proc_num if args.proc_num else int(
+            mp.cpu_count() + 1)
 
     # unique_dbname() generates a unique database name for importing, to prevents
     # overwritting of previous imported data
@@ -55,6 +59,22 @@ def exec_importer(self, file=None, chunk_size=2000):
         logging.debug("Running cmd: %s" % ' '.join(cmd))
         return util.run_cmd(cmd)
 
+    def importer_worker(self, filename):
+        # all dumped files are in 'prometheus' sub-directory
+        if not filename or not filename.endswith('.json') or 'prometheus' not in filename:
+            return
+        stderr = self.exec_importer(filename)[1]
+        if stderr and "Request Entity Too Large" in stderr.decode('utf-8'):
+            logging.info("Write to DB failed, retry for once...")
+            retry_stderr = self.exec_importer(filename, chunk_size=100)[1]
+            if not retry_stderr:
+                logging.info("Retry succeeded.")
+            else:
+                logging.warning("Retry failed, stderr is: '%s'" %
+                                retry_stderr)
+        elif stderr:
+            logging.warning(stderr)
+
     def run_importing(self):
         logging.info("Metrics will be imported to database '%s'." %
                      self.db_name)
@@ -68,18 +88,14 @@ def file_list(dir=None):
                     f_list.append(file)
             return f_list
 
-        for file in file_list(self.datadir):
-            # all dumped files are in 'prometheus' sub-directory
-            if not file or not file.endswith('.json') or 'prometheus' not in file:
-                continue
-            stderr = self.exec_importer(file)[1]
-            if stderr and "Request Entity Too Large" in stderr.decode('utf-8'):
-                logging.info("Write to DB failed, retry for once...")
-                retry_stderr = self.exec_importer(file, chunk_size=100)[1]
-                if not retry_stderr:
-                    logging.info("Retry succeeded.")
-                else:
-                    logging.warning("Retry failed, stderr is: '%s'" %
-                                    retry_stderr)
-            elif stderr:
-                logging.warning(stderr)
+        pool = mp.Pool(self.proc_num)
+        files = file_list(self.datadir)
+        pool.map_async(unwrap_self_f, zip([self] * len(files), files))
+        pool.close()
+        pool.join()
+
+
+# a trick to use multiprocessing.Pool inside a class
+# see http://www.rueckstiess.net/research/snippets/show/ca1d7d90 for details
+def unwrap_self_f(arg, **kwarg):
+    return PromDump.importer_worker(*arg, **kwarg)
diff --git a/metric/prometheus.py b/metric/prometheus.py
@@ -7,6 +7,8 @@
 import logging
 import os
 
+import multiprocessing as mp
+
 from metric.base import MetricBase
 from utils import fileopt
 from utils import util
@@ -19,6 +21,9 @@ def __init__(self, args, basedir=None, subdir=None):
 
         self.host = args.host if args.host else 'localhost'
         self.port = args.port if args.port else 9090
+        self.proc_num = args.proc_num if args.proc_num else int(
+            mp.cpu_count() / 2 + 1)
+
         self.api_uri = '/api/v1'
         self.url_base = 'http://%s:%s%s' % (self.host, self.port, self.api_uri)
 
@@ -33,20 +38,33 @@ def get_label_names(self):
         logging.debug("Found %s available metric keys..." % len(result))
         return result
 
+    def query_worker(self, metric):
+        url = '%s/query_range?query=%s&start=%s&end=%s&step=%s' % (
+            self.url_base, metric, self.start_time, self.end_time, self.resolution)
+        response = util.read_url(url)[0]
+        if 'success' not in response[:20].decode('utf-8'):
+            logging.error("Error querying for key '%s'." % metric)
+            logging.debug("Output is:\n%s" % response)
+            return
+        metric_filename = '%s_%s_to_%s_%ss.json' % (
+            metric, self.start_time, self.end_time, self.resolution)
+        fileopt.write_file(os.path.join(
+            self.outdir, metric_filename), response)
+        logging.debug("Saved data for key '%s'." % metric)
+
     def run_collecting(self):
         if self.resolution < 15.0:
             logging.warning(
                 "Sampling resolution < 15s don't increase accuracy but data size.")
-        for metric in self.get_label_names():
-            url = '%s/query_range?query=%s&start=%s&end=%s&step=%s' % (
-                self.url_base, metric, self.start_time, self.end_time, self.resolution)
-            matrix = json.loads(util.read_url(url)[0])
-            if not matrix['status'] == 'success':
-                logging.info("Error querying for key '%s'." % metric)
-                logging.debug("Output is:\n%s" % matrix)
-                continue
-            metric_filename = '%s_%s_to_%s_%ss.json' % (
-                metric, self.start_time, self.end_time, self.resolution)
-            fileopt.write_file(os.path.join(
-                self.outdir, metric_filename), json.dumps(matrix['data']['result']))
-            logging.debug("Saved data for key '%s'." % metric)
+        pool = mp.Pool(self.proc_num)
+        metric_names = self.get_label_names()
+        pool.map_async(unwrap_self_f, zip(
+            [self] * len(metric_names), metric_names))
+        pool.close()
+        pool.join()
+
+
+# a trick to use multiprocessing.Pool inside a class
+# see http://www.rueckstiess.net/research/snippets/show/ca1d7d90 for details
+def unwrap_self_f(arg, **kwarg):
+    return PromMetrics.query_worker(*arg, **kwarg)
diff --git a/tools/prom2influx.go b/tools/prom2influx.go
@@ -24,6 +24,16 @@ type options struct {
 	Chunk  int
 }
 
+type promResult struct {
+	ResultType string
+	Result     model.Matrix
+}
+
+type promDump struct {
+	Status string
+	Data   promResult
+}
+
 func parseOpts() options {
 	influxHost := flag.String("host", "localhost", "The host of influxdb.")
 	influxPort := flag.String("port", "8086", "The port of influxdb.")
@@ -114,8 +124,8 @@ func buildPoints(series *model.SampleStream, client influx.Client,
 	return ptList, nil
 }
 
-func writeBatchPoints(data model.Matrix, opts options) error {
-	for _, series := range data {
+func writeBatchPoints(data promDump, opts options) error {
+	for _, series := range data.Data.Result {
 		client := newClient(opts)
 		ptList, err := buildPoints(series, client, opts)
 		if err != nil {
@@ -153,7 +163,7 @@ func main() {
 	}
 
 	// decode JSON
-	var data model.Matrix
+	var data promDump
 	if err = json.Unmarshal(input, &data); err != nil {
 		log.Fatal(err)
 	}

diff --git a/utils/fileopt.py b/utils/fileopt.py
@@ -21,6 +21,7 @@ def read_file(filename, mode='r'):
 # write data to file
 def write_file(filename, data, mode='w'):
     with open(filename, mode) as f:
+        logging.debug("Writting %s of data to %s" % (len(data), filename))
         try:
             f.write(str(data, 'utf-8'))
         except TypeError:

diff --git a/utils/util.py b/utils/util.py
@@ -201,6 +201,8 @@ def parse_insight_opts():
                              help="End time point of time range, format: '%%Y-%%m-%%d %%H:%%M:%%S' (local time).")
     parser_prom.add_argument("--resolution", type=float, default=None,
                              help="Query resolution step width of Prometheus in seconds, 15.0 by default.")
+    parser_prom.add_argument("--proc-num", type=int, action="store", default=None,
+                             help="Number of parallel queries to run, 'CPU count / 2 + 1' by default.")
 
     parser_load = subparser_metric.add_parser(
         "load", help="Load dumped metrics to local influxdb.")
@@ -216,6 +218,8 @@ def parse_insight_opts():
                              help="The user with priviledge to create database, empty (no authentication needed) by default.")
     parser_load.add_argument("--passwd", action="store", default=None,
                              help="The password of user, empty (no authentication needed) by default.")
+    parser_load.add_argument("--proc-num", type=int, action="store", default=None,
+                             help="Number of parallel importer processes to run, 'CPU count + 1' by default.")
 ####
 
     return parser.parse_args()