perf tests: use metrics for memory usage charts (#5565)

algorand · Jul 13, 2023 · d316914 · d316914
1 parent 8db3d7a
commit d316914
Show file tree

Hide file tree

Showing 3 changed files with 144 additions and 38 deletions.
diff --git a/shared/pingpong/pingpong.go b/shared/pingpong/pingpong.go
@@ -295,17 +295,19 @@ func (pps *WorkerState) scheduleAction() bool {
 		}
 		pps.refreshPos = 0
 	}
-	addr := pps.refreshAddrs[pps.refreshPos]
-	ai, err := pps.client.AccountInformation(addr, true)
-	if err == nil {
-		ppa := pps.accounts[addr]
+	if pps.cfg.NumApp > 0 || pps.cfg.NumAsset > 0 {
+		addr := pps.refreshAddrs[pps.refreshPos]
+		ai, err := pps.client.AccountInformation(addr, true)
+		if err == nil {
+			ppa := pps.accounts[addr]
 
-		pps.integrateAccountInfo(addr, ppa, ai)
-	} else {
-		if !pps.cfg.Quiet {
-			fmt.Printf("background refresh err: %v\n", err)
+			pps.integrateAccountInfo(addr, ppa, ai)
+		} else {
+			if !pps.cfg.Quiet {
+				fmt.Printf("background refresh err: %v\n", err)
+			}
+			return false
 		}
-		return false
 	}
 	pps.refreshPos++
 	return true

diff --git a/test/heapwatch/client_ram_report.py b/test/heapwatch/client_ram_report.py
@@ -47,16 +47,28 @@
     'TB': 1024*1024*1024*1024,
 }
 
-# d = {k:[v,...]}
-def dapp(d, k, v):
+# d = {k: {t: v},...}
+def dapp(d, k, t, v):
     l = d.get(k)
     if l is None:
-        d[k] = [v]
+        d[k] = {t: v}
     else:
-        l.append(v)
+        l[t] = v
+
+# d = {k: {t: {m: v},...},...}
+def dapp_metric(d, k, t, m, v):
+    l = d.get(k)
+    if l is None:
+        d[k] = {t: {m: v}}
+    else:
+        l2 = l.get(t)
+        if l2 is None:
+            l[t] = {m: v}
+        else:
+            l2[m] = v
 
 def get_heap_inuse_totals(dirpath):
-    '''return {"node nickname":[(YYYYmmdd_HHMMSS, bytes), ...], ...}'''
+    '''return {"node nickname": {"YYYYmmdd_HHMMSS": bytes}, ...}'''
     cache_mtime = 0
     cache_path = os.path.join(dirpath, 'heap_inuse_totals.json')
     if os.path.exists(cache_path):
@@ -88,21 +100,50 @@ def get_heap_inuse_totals(dirpath):
             logger.error('could not find total in output: %s', text)
             raise Exception('could not find total in output of: %s', ' '.join([repr(x) for x in cmd]))
         bytesinuse = float(m.group(1)) * multipliers[m.group(2).upper()]
-        dapp(bynick, nick, (timestamp, bytesinuse))
+        dapp(bynick, nick, timestamp, bytesinuse)
         logger.debug('%s ok, %s %f', path, timestamp, bytesinuse)
 
     logger.debug('%d skipped older than cache', skipcount)
     for nick, recs in bynick.items():
         old = cached.get(nick)
         if old is None:
-            cached[nick] = sorted(recs)
+            cached[nick] = recs
         else:
-            cached[nick] = sorted(old + recs)
+            cached[nick].update(recs)
     if cached and bynick:
         with open(cache_path, 'wt') as fout:
             json.dump(cached, fout)
     return cached
 
+def get_heap_metrics(dirpath):
+    '''return {"node nickname": {"YYYYmmdd_HHMMSS": {"metric": value}, ...}, ...}'''
+    metrics_name_re = re.compile(r'(.*)\.(.*).metrics')
+    bynick = {}
+    for path in glob.glob(os.path.join(dirpath, '*.*.metrics')):
+        fname = os.path.basename(path)
+        m = metrics_name_re.match(fname)
+        if not m:
+            logger.warning('could not parse heap filename: %r', path)
+            continue
+        nick = m.group(1)
+        timestamp = m.group(2)
+        with open(path, 'rt') as fin:
+            for line in fin.readlines():
+                if line.startswith('#'):
+                    continue
+                elif line.startswith('algod_go_memory_classes_heap_objects_bytes'):
+                    inuse = float(line.split()[1])
+                    dapp_metric(bynick, nick, timestamp, 'inuse', inuse)
+                elif line.startswith('algod_go_memory_classes_total_bytes'):
+                    total = float(line.split()[1])
+                    dapp_metric(bynick, nick, timestamp, 'total', total)
+                elif line.startswith('algod_go_memory_classes_heap_free_bytes'):
+                    free = float(line.split()[1])
+                    dapp_metric(bynick, nick, timestamp, 'free', free)
+                elif line.startswith('algod_go_memory_classes_heap_released_bytes'):
+                    released = float(line.split()[1])
+                    dapp_metric(bynick, nick, timestamp, 'released', released)
+    return bynick
 
 def maybe_load_tf_nicks(args):
     tf_inventory_path = os.path.join(args.dir, 'terraform-inventory.host')
@@ -121,9 +162,11 @@ def maybe_load_tf_nicks(args):
     return ip_to_name
 
 
-def hostports_to_nicks(args, hostports):
+def hostports_to_nicks(args, hostports, metrics=None):
     ip_to_nick = maybe_load_tf_nicks(args)
     if not ip_to_nick:
+        if metrics:
+            return ['{}#{}'.format(hp, m) for hp in hostports for m in metrics]
         return hostports
     out = []
     for hp in hostports:
@@ -138,6 +181,8 @@ def hostports_to_nicks(args, hostports):
         if not hit:
             hit = hp
         out.append(hit)
+    if metrics:
+        return ['{}#{}'.format(hp, m) for hp in hostports for m in metrics]
     return out
 
 
@@ -154,6 +199,7 @@ def main():
         logging.basicConfig(level=logging.INFO)
 
     heap_totals = get_heap_inuse_totals(args.dir)
+    heap_details = get_heap_metrics(args.dir)
 
     if args.csv:
         if args.csv == '-':
@@ -162,12 +208,18 @@ def main():
             csvf = open(args.csv, 'wt')
         writer = csv.writer(csvf)
         whens = set()
-        for nick, recs in heap_totals.items():
-            for ts, n in recs:
+        col_names_target = heap_totals if heap_totals else heap_details
+        for nick, recs in col_names_target.items():
+            # {k: {t: v}}
+            for ts in recs.keys():
                 whens.add(ts)
         whens = sorted(whens)
-        nodes = sorted(heap_totals.keys())
-        writer.writerow(['when','dt','round'] + hostports_to_nicks(args, nodes))
+        nodes = sorted(col_names_target.keys())
+        metrics = list(heap_details[nodes[0]].values())[0]
+        writer.writerow(
+            ['when','dt','round'] +
+            hostports_to_nicks(args, nodes, metrics=['pprof_inuse_space'] + list(metrics.keys()))
+        )
         first = None
         for ts in whens:
             tv = time.mktime(time.strptime(ts, '%Y%m%d_%H%M%S'))
@@ -179,13 +231,16 @@ def main():
                 bi = json.load(open(bipath))
                 rnd = str(bi['block']['rnd'])
             except:
-                rnd = ''
+                rnd = '0'
             row = [ts, tv-first, rnd]
             for nick in nodes:
-                for rec in heap_totals[nick]:
-                    if rec[0] == ts:
-                        row.append(rec[1])
-                        break
+                # {k: {t: v}}
+                val = heap_totals.get(nick, {}).get(ts)
+                row.append(val if val else 0)
+                vals = heap_details[nick].get(ts)
+                # {k: {t: {m: v}}}
+                if vals:
+                    row.extend(vals.values())
             writer.writerow(row)
 
     return 0

diff --git a/test/heapwatch/plot_crr_csv.py b/test/heapwatch/plot_crr_csv.py
@@ -6,8 +6,18 @@
 import random
 
 from matplotlib import pyplot as plt
+from matplotlib.ticker import MaxNLocator, FuncFormatter
 
 _meta_cols = {'when', 'dt', 'round'}
+_metrics_cols = {'free', 'inuse', 'released', 'total'}
+
+# see https://matplotlib.org/stable/gallery/lines_bars_and_markers/linestyles.html
+plt_line_styles = [
+    'solid', 'dotted', 'dashed', 'dashdot',
+    (5, (10, 3)), # long dash with offset
+    (0, (3, 5, 1, 5)), # dashdotted
+    (0, (3, 10, 1, 10, 1, 10)), # loosely dashdotted
+]
 
 def smin(a,b):
     if a is None:
@@ -22,6 +32,27 @@ def smax(a,b):
         return a
     return max(a,b)
 
+def add_metric(d, k, m, x, y):
+    """d: {k: {m: [(x,y)]}}"""
+    mt = d.get(k)
+    if mt is None:
+        d[k] = {m: [(x,y)]}
+    else:
+        klist = mt.get(m)
+        if klist is None:
+            mt[m] = [(x,y)]
+        else:
+            klist.append((x, y))
+
+
+def format_mem(x, _):
+    if x<0:
+        return ""
+    for unit in ['bytes', 'KB', 'MB', 'GB']:
+        if x < 1024:
+            return "%3.1f %s" % (x, unit)
+        x /= 1024
+
 def main():
     import argparse
     ap = argparse.ArgumentParser()
@@ -36,29 +67,42 @@ def main():
             reader = csv.DictReader(fin)
             for rec in reader:
                 xround = int(rec['round'])
+                row_nick = None
                 for k,v in rec.items():
                     if k in _meta_cols:
                         continue
-                    klist = fvals.get(k)
-                    if klist is None:
-                        klist = []
-                        fvals[k] = klist
                     v = float(v)
-                    klist.append((xround, v))
+                    parts = k.split('#')
+                    if len(parts) == 2:
+                        row_nick = parts[0]
+                        metric = parts[1]
+                    else :
+                        print(f"unknown column {k}")
+                        row_nick = k
+                        metric = k
+                    add_metric(fvals, row_nick, metric, xround, v)
+
                     minv = smin(minv, v)
                     maxv = smax(maxv, v)
         if not fvals:
             print(f"{fname} empty")
             continue
-        print("{} found series {}".format(fname, sorted(fvals.keys())))
+        nodes = sorted(fvals.keys())
+        print("{} found series {}".format(fname, nodes))
         fig, ax = plt.subplots()
+        ax.xaxis.set_major_locator(MaxNLocator(integer=True))
+        ax.yaxis.set_major_formatter(FuncFormatter(format_mem))
         ax.set_ylabel('bytes')
         ax.set_xlabel('round')
         ax.set_ylim(minv,maxv)
-        for k in sorted(fvals.keys()):
-            xy = fvals[k]
-            #for k, xy in fvals.items():
-            lc = None
+
+        max_val_color = max(map(len, nodes)) * ord('z')
+        for k in nodes:
+            lc = None  # let matplotlib to pick a color if there is no standard nodes name pattern => probably because of a single local run
+            if len(nodes) > 1:
+            # if there are multiple nodes choose some color based on the node name
+                s = sum(map(ord, k))
+                lc = (s/max_val_color, s/max_val_color, s/max_val_color)
             if k.startswith('r'):
                 # blueish
                 lc = (0.3*random.random(), 0.3*random.random(), 0.7+(0.3*random.random()))
@@ -68,7 +112,12 @@ def main():
             elif k.startswith('n'):
                 # reddish
                 lc = (0.7+(0.3*random.random()), 0.3*random.random(), 0.3*random.random())
-            ax.plot([p[0] for p in xy], [p[1] for p in xy], label=k, color=lc)
+
+            metrics = fvals[k]
+            for i, metric in enumerate(metrics.keys()):
+                xy = metrics[metric]
+
+                ax.plot([p[0] for p in xy], [p[1] for p in xy], label=f'{k}/{metric}', color=lc, linestyle=plt_line_styles[i%len(plt_line_styles)])
         ax.legend(loc='upper left', ncol=2)
         plt.savefig(fname + '.svg', format='svg')
         plt.savefig(fname + '.png', format='png')